@fanboynz/network-scanner 3.0.3 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/adblock-rust.js +17 -4
- package/lib/adblock.js +92 -15
- package/lib/browserhealth.js +57 -28
- package/lib/cdp.js +68 -34
- package/lib/clear_sitedata.js +68 -20
- package/lib/compress.js +26 -58
- package/lib/curl.js +44 -22
- package/lib/domain-cache.js +8 -57
- package/lib/dry-run.js +9 -4
- package/lib/fingerprint.js +418 -71
- package/lib/interaction.js +262 -26
- package/lib/nettools.js +47 -76
- package/lib/openvpn_vpn.js +116 -35
- package/lib/searchstring.js +15 -237
- package/lib/validate_rules.js +285 -3
- package/lib/wireguard_vpn.js +64 -12
- package/nwss.js +529 -217
- package/package.json +1 -1
- package/regex-tool/index.html +321 -628
package/lib/interaction.js
CHANGED
|
@@ -67,6 +67,97 @@ function fastTimeout(ms) {
|
|
|
67
67
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
+
/**
|
|
71
|
+
* Human-timed click — page.mouse.click() fires mousedown+mouseup ~10-30ms
|
|
72
|
+
* apart, which many ad-network popunder loaders (AdsCore/PropellerAds
|
|
73
|
+
* family) specifically filter as a bot signal: real users hold the
|
|
74
|
+
* button 50-150ms. This helper splits the click into explicit
|
|
75
|
+
* mousedown / hold / mouseup with realistic hold timing, plus optional
|
|
76
|
+
* hover-before-click pause and small click-offset jitter so clicks
|
|
77
|
+
* don't land pixel-perfect at the same (x,y) every time.
|
|
78
|
+
*
|
|
79
|
+
* Drop-in replacement for `page.mouse.click(x, y)` at popunder-trigger
|
|
80
|
+
* sites; bounded per-call cost is ~300-700ms (hover pause + hold + jitter)
|
|
81
|
+
* vs ~30ms for plain .click().
|
|
82
|
+
*
|
|
83
|
+
* @param {object} page - Puppeteer page
|
|
84
|
+
* @param {number} x - target X
|
|
85
|
+
* @param {number} y - target Y
|
|
86
|
+
* @param {object} options
|
|
87
|
+
* @param {number} options.offsetRange - ± px jitter from (x,y); default 5
|
|
88
|
+
* @param {number} options.hoverMin - min hover pause ms; default 150
|
|
89
|
+
* @param {number} options.hoverMax - max hover pause ms; default 450
|
|
90
|
+
* @param {number} options.holdMin - min mouse-down hold ms; default 50
|
|
91
|
+
* @param {number} options.holdMax - max mouse-down hold ms; default 150
|
|
92
|
+
* @param {boolean} options.realistic - emit hold-tremor + mouseup drift;
|
|
93
|
+
* default false. Opt-in for sites that score mouse-click realism
|
|
94
|
+
* (DataDome, Akamai BM, PerimeterX). Adds ~0ms latency (events fit
|
|
95
|
+
* inside the existing hold) but generates 1–3 extra mousemove events
|
|
96
|
+
* between mousedown and mouseup at ±1px tremor, plus a final ±1.5px
|
|
97
|
+
* drift before mouseup so mousedown.x !== mouseup.x. Pure event-stream
|
|
98
|
+
* change — no behavioral difference for the click itself.
|
|
99
|
+
*/
|
|
100
|
+
async function humanClick(page, x, y, options = {}) {
|
|
101
|
+
const {
|
|
102
|
+
offsetRange = 5,
|
|
103
|
+
hoverMin = 150, hoverMax = 450,
|
|
104
|
+
holdMin = 50, holdMax = 150,
|
|
105
|
+
forceDebug = false,
|
|
106
|
+
realistic = false
|
|
107
|
+
} = options;
|
|
108
|
+
// ±offsetRange-px jitter so we don't click pixel-perfect (x,y) every
|
|
109
|
+
// time -- real users have spatial scatter even when aiming for the
|
|
110
|
+
// 'same' visible button.
|
|
111
|
+
const jx = x + (Math.random() - 0.5) * 2 * offsetRange;
|
|
112
|
+
const jy = y + (Math.random() - 0.5) * 2 * offsetRange;
|
|
113
|
+
try {
|
|
114
|
+
// Hover/move first -- many bot detectors check that mouse position
|
|
115
|
+
// matches the click point at mousedown time (browser fires mousemove
|
|
116
|
+
// before mousedown for real cursor hardware).
|
|
117
|
+
await page.mouse.move(jx, jy);
|
|
118
|
+
await fastTimeout(hoverMin + Math.random() * (hoverMax - hoverMin));
|
|
119
|
+
await page.mouse.down();
|
|
120
|
+
|
|
121
|
+
if (realistic) {
|
|
122
|
+
// Split the hold into (tremorCount + 1) chunks; emit a ±1px micromove
|
|
123
|
+
// between each chunk so the page sees mousemove events during the
|
|
124
|
+
// press window (real human hand tremor). Then drift ±MOUSEUP_DRIFT_PX
|
|
125
|
+
// before firing mouseup so mousedown.x/y !== mouseup.x/y.
|
|
126
|
+
const holdMs = holdMin + Math.random() * (holdMax - holdMin);
|
|
127
|
+
const tremorCount = CONTENT_CLICK.TREMOR_COUNT_MIN +
|
|
128
|
+
Math.floor(Math.random() * (CONTENT_CLICK.TREMOR_COUNT_MAX - CONTENT_CLICK.TREMOR_COUNT_MIN + 1));
|
|
129
|
+
const chunkMs = holdMs / (tremorCount + 1);
|
|
130
|
+
for (let i = 0; i < tremorCount; i++) {
|
|
131
|
+
await fastTimeout(chunkMs);
|
|
132
|
+
const tjx = jx + (Math.random() - 0.5) * 2 * CONTENT_CLICK.TREMOR_RANGE_PX;
|
|
133
|
+
const tjy = jy + (Math.random() - 0.5) * 2 * CONTENT_CLICK.TREMOR_RANGE_PX;
|
|
134
|
+
await page.mouse.move(tjx, tjy);
|
|
135
|
+
}
|
|
136
|
+
await fastTimeout(chunkMs);
|
|
137
|
+
// Final drift before mouseup. Move first (mouseup fires at current
|
|
138
|
+
// position) so the up event lands at slightly different coords than
|
|
139
|
+
// the down event — real humans almost always drift during the hold.
|
|
140
|
+
const ux = jx + (Math.random() - 0.5) * 2 * CONTENT_CLICK.MOUSEUP_DRIFT_PX;
|
|
141
|
+
const uy = jy + (Math.random() - 0.5) * 2 * CONTENT_CLICK.MOUSEUP_DRIFT_PX;
|
|
142
|
+
await page.mouse.move(ux, uy);
|
|
143
|
+
await page.mouse.up();
|
|
144
|
+
} else {
|
|
145
|
+
await fastTimeout(holdMin + Math.random() * (holdMax - holdMin));
|
|
146
|
+
await page.mouse.up();
|
|
147
|
+
}
|
|
148
|
+
} catch (err) {
|
|
149
|
+
// Page closed / target detached mid-click is the expected non-fatal
|
|
150
|
+
// path; everything else is unusual enough to surface in debug mode so
|
|
151
|
+
// a site silently failing 100% of clicks (CSP, broken input pipeline,
|
|
152
|
+
// CDP session collapse) is at least visible without --headful.
|
|
153
|
+
if (forceDebug && !/closed|detached|Target|Session closed|Protocol error/i.test(err.message || '')) {
|
|
154
|
+
try {
|
|
155
|
+
console.log(formatLogMessage('debug', `${INTERACTION_TAG} humanClick failed at (${jx.toFixed(0)}, ${jy.toFixed(0)}): ${err.message}`));
|
|
156
|
+
} catch (_) { /* logging itself shouldn't throw, but belt-and-braces */ }
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
70
161
|
// === VIEWPORT AND COORDINATE CONSTANTS ===
|
|
71
162
|
// These control the default viewport assumptions and coordinate generation
|
|
72
163
|
const DEFAULT_VIEWPORT = {
|
|
@@ -138,7 +229,8 @@ const ELEMENT_INTERACTION = {
|
|
|
138
229
|
// NOTE: No preDelay needed — mouse movements + scrolling already provide ~1s
|
|
139
230
|
// of activity before clicks fire, which is enough for async ad script registration
|
|
140
231
|
const CONTENT_CLICK = {
|
|
141
|
-
CLICK_COUNT:
|
|
232
|
+
CLICK_COUNT: 3, // Three attempts (primary + 2 backup; ad SDKs sometimes suppress first OR second click as warmup before firing)
|
|
233
|
+
CLICK_COUNT_MAX: 20, // Hard cap when overridden via siteConfig.interact_click_count — a typo of 500 shouldn't run for minutes
|
|
142
234
|
INTER_CLICK_MIN: 300, // Minimum ms between clicks (above Monetag 250ms cooldown)
|
|
143
235
|
INTER_CLICK_MAX: 500, // Maximum ms between clicks
|
|
144
236
|
// PRE_CLICK_DELAY: most ad scripts register document-level listeners
|
|
@@ -147,7 +239,22 @@ const CONTENT_CLICK = {
|
|
|
147
239
|
// 300ms buffer here was mostly defensive. Reduced to 100ms.
|
|
148
240
|
PRE_CLICK_DELAY: 100,
|
|
149
241
|
VIEWPORT_INSET: 0.2, // Avoid outer 20% of viewport (menus, overlays)
|
|
150
|
-
MOUSE_APPROACH_STEPS: 3
|
|
242
|
+
MOUSE_APPROACH_STEPS: 3, // Minimal steps — just enough for non-instant movement
|
|
243
|
+
// Realistic-mode opt-in (siteConfig.realistic_click). Higher step count
|
|
244
|
+
// raises the mousemove event rate to ~80–125Hz (real mouse minimum is
|
|
245
|
+
// 125Hz USB default) so per-event movementX/Y deltas land in the 5–30px
|
|
246
|
+
// range a real cursor produces — fixes the strongest movement tell.
|
|
247
|
+
// Cost: +~80–120ms per click over the approach. Off by default.
|
|
248
|
+
MOUSE_APPROACH_STEPS_REALISTIC: 15,
|
|
249
|
+
// Realistic-mode hold tremor: 1–3 ±1px micromoves spread across the
|
|
250
|
+
// mousedown→mouseup hold to defeat the "zero events during hold" tell.
|
|
251
|
+
TREMOR_COUNT_MIN: 1,
|
|
252
|
+
TREMOR_COUNT_MAX: 3,
|
|
253
|
+
TREMOR_RANGE_PX: 1,
|
|
254
|
+
// Realistic-mode mouseup drift: real human clicks drift 0–2px between
|
|
255
|
+
// mousedown and mouseup, especially with longer holds. Without this,
|
|
256
|
+
// mousedown.x === mouseup.x is a robotic signal.
|
|
257
|
+
MOUSEUP_DRIFT_PX: 1.5
|
|
151
258
|
};
|
|
152
259
|
|
|
153
260
|
// === INTENSITY SETTINGS ===
|
|
@@ -358,16 +465,19 @@ async function humanLikeMouseMove(page, fromX, fromY, toX, toY, options = {}) {
|
|
|
358
465
|
minDelay = MOUSE_MOVEMENT.MIN_DELAY,
|
|
359
466
|
maxDelay = MOUSE_MOVEMENT.MAX_DELAY,
|
|
360
467
|
curve = MOUSE_MOVEMENT.DEFAULT_CURVE,
|
|
361
|
-
jitter = MOUSE_MOVEMENT.DEFAULT_JITTER
|
|
468
|
+
jitter = MOUSE_MOVEMENT.DEFAULT_JITTER,
|
|
469
|
+
realistic = false // bypass MAX_STEPS / MAX_TOTAL_MS caps for high-cadence approach
|
|
362
470
|
} = options;
|
|
363
471
|
|
|
364
472
|
const distance = Math.sqrt((toX - fromX) ** 2 + (toY - fromY) ** 2);
|
|
365
473
|
|
|
366
474
|
// Step count: caller-provided value capped at MAX_STEPS, otherwise derived
|
|
367
|
-
// from distance and clamped to [MIN_STEPS, DEFAULT_STEPS].
|
|
475
|
+
// from distance and clamped to [MIN_STEPS, DEFAULT_STEPS]. Realistic mode
|
|
476
|
+
// skips the MAX_STEPS cap so callers can push 12–15 steps to match real
|
|
477
|
+
// mouse hardware event rates (~125Hz vs the default's ~30–60Hz).
|
|
368
478
|
let actualSteps;
|
|
369
479
|
if (options.steps) {
|
|
370
|
-
actualSteps = Math.min(options.steps, MOUSE_MOVEMENT.MAX_STEPS);
|
|
480
|
+
actualSteps = realistic ? options.steps : Math.min(options.steps, MOUSE_MOVEMENT.MAX_STEPS);
|
|
371
481
|
} else {
|
|
372
482
|
const calculatedSteps = Math.floor(distance / MOUSE_MOVEMENT.DISTANCE_STEP_RATIO);
|
|
373
483
|
actualSteps = Math.max(
|
|
@@ -377,10 +487,16 @@ async function humanLikeMouseMove(page, fromX, fromY, toX, toY, options = {}) {
|
|
|
377
487
|
}
|
|
378
488
|
|
|
379
489
|
// Emergency cap on total movement time — if step count × max-per-step delay
|
|
380
|
-
// would exceed the budget, reduce step count to fit.
|
|
490
|
+
// would exceed the budget, reduce step count to fit. Realistic mode raises
|
|
491
|
+
// the cap to 600ms so the higher step count survives the trim.
|
|
492
|
+
// Floor-clamp to MIN_STEPS: if a caller passes a maxDelay larger than
|
|
493
|
+
// totalMsLimit (e.g. maxDelay: 1000), the floor division yields 0, and the
|
|
494
|
+
// i=0 iteration then computes progress = 0/0 = NaN, propagating into
|
|
495
|
+
// page.mouse.move(NaN, NaN). Clamping preserves at least MIN_STEPS moves.
|
|
496
|
+
const totalMsLimit = realistic ? 600 : MOUSE_MOVEMENT.MAX_TOTAL_MS;
|
|
381
497
|
const estimatedTime = actualSteps * maxDelay;
|
|
382
|
-
if (estimatedTime >
|
|
383
|
-
actualSteps = Math.
|
|
498
|
+
if (estimatedTime > totalMsLimit) {
|
|
499
|
+
actualSteps = Math.max(MOUSE_MOVEMENT.MIN_STEPS, Math.floor(totalMsLimit / maxDelay));
|
|
384
500
|
}
|
|
385
501
|
|
|
386
502
|
for (let i = 0; i <= actualSteps; i++) {
|
|
@@ -398,8 +514,12 @@ async function humanLikeMouseMove(page, fromX, fromY, toX, toY, options = {}) {
|
|
|
398
514
|
let currentX = fromX + (toX - fromX) * easedProgress;
|
|
399
515
|
let currentY = fromY + (toY - fromY) * easedProgress;
|
|
400
516
|
|
|
401
|
-
// Add slight curve to movement (more human-like)
|
|
402
|
-
|
|
517
|
+
// Add slight curve to movement (more human-like).
|
|
518
|
+
// distance > 0 guard: when fromX === toX AND fromY === toY (integer-quantized
|
|
519
|
+
// random targets in performContentClicks can collide; or external caller passes
|
|
520
|
+
// from === to deliberately) the perpX/perpY divisions become -0/0 = NaN and
|
|
521
|
+
// poison currentX/currentY, causing page.mouse.move(NaN, NaN) to reject via CDP.
|
|
522
|
+
if (curve > 0 && distance > 0 && i > 0 && i < actualSteps) {
|
|
403
523
|
const curveIntensity = Math.sin((i / actualSteps) * Math.PI) * curve * distance * MOUSE_MOVEMENT.CURVE_INTENSITY_RATIO;
|
|
404
524
|
const perpX = -(toY - fromY) / distance;
|
|
405
525
|
const perpY = (toX - fromX) / distance;
|
|
@@ -547,7 +667,9 @@ async function interactWithElements(page, options = {}) {
|
|
|
547
667
|
maxAttempts = ELEMENT_INTERACTION.MAX_ATTEMPTS,
|
|
548
668
|
elementTypes = ['button', 'a', '[role="button"]'],
|
|
549
669
|
avoidDestructive = true,
|
|
550
|
-
timeout = ELEMENT_INTERACTION.TIMEOUT
|
|
670
|
+
timeout = ELEMENT_INTERACTION.TIMEOUT,
|
|
671
|
+
forceDebug = false,
|
|
672
|
+
realistic = false
|
|
551
673
|
} = options;
|
|
552
674
|
|
|
553
675
|
try {
|
|
@@ -555,22 +677,23 @@ async function interactWithElements(page, options = {}) {
|
|
|
555
677
|
try {
|
|
556
678
|
// Check if page is closed before attempting interaction
|
|
557
679
|
if (page.isClosed()) {
|
|
558
|
-
if (
|
|
680
|
+
if (forceDebug) {
|
|
559
681
|
console.log(formatLogMessage('debug', `${INTERACTION_TAG} Page is closed, skipping element interaction`));
|
|
560
682
|
}
|
|
561
683
|
return;
|
|
562
684
|
}
|
|
563
685
|
|
|
564
|
-
//
|
|
565
|
-
//
|
|
566
|
-
//
|
|
567
|
-
//
|
|
568
|
-
|
|
686
|
+
// Body wait honors the caller-provided timeout option (default 2000ms
|
|
687
|
+
// via ELEMENT_INTERACTION.TIMEOUT) -- was previously hardcoded to 1000
|
|
688
|
+
// and silently ignored the option. Explicitly dispose the returned handle
|
|
689
|
+
// rather than relying on Puppeteer's FinalizationRegistry -- matches the
|
|
690
|
+
// dispose pattern already used in performPageInteraction's final-hover block.
|
|
691
|
+
const bodyHandle = await page.waitForSelector('body', { timeout });
|
|
569
692
|
if (bodyHandle) { try { await bodyHandle.dispose(); } catch (_) {} }
|
|
570
693
|
// Re-check after async wait — page may have closed during selector wait
|
|
571
694
|
if (page.isClosed()) return;
|
|
572
695
|
} catch (bodyWaitErr) {
|
|
573
|
-
if (
|
|
696
|
+
if (forceDebug) {
|
|
574
697
|
console.log(formatLogMessage('debug', `${INTERACTION_TAG} Page not ready for element interaction: ${bodyWaitErr.message}`));
|
|
575
698
|
}
|
|
576
699
|
return;
|
|
@@ -598,7 +721,12 @@ async function interactWithElements(page, options = {}) {
|
|
|
598
721
|
|
|
599
722
|
if (isVisible) {
|
|
600
723
|
const text = (el.textContent || el.alt || el.title || '').toLowerCase();
|
|
601
|
-
|
|
724
|
+
// Word-boundary regex match -- prior `text.includes(word)`
|
|
725
|
+
// produced false positives like 'submit' matching
|
|
726
|
+
// 'resubmit'/'submitter', filtering out legitimate
|
|
727
|
+
// clickables. \b ensures whole-word matches only.
|
|
728
|
+
const shouldAvoid = avoidWords && avoidWords.length > 0 &&
|
|
729
|
+
new RegExp('\\b(' + avoidWords.join('|') + ')\\b').test(text);
|
|
602
730
|
|
|
603
731
|
if (!shouldAvoid) {
|
|
604
732
|
clickableElements.push({
|
|
@@ -627,7 +755,7 @@ async function interactWithElements(page, options = {}) {
|
|
|
627
755
|
// Brief pause before clicking
|
|
628
756
|
await fastTimeout(TIMING.CLICK_PAUSE_MIN + Math.random() * (TIMING.CLICK_PAUSE_MAX - TIMING.CLICK_PAUSE_MIN));
|
|
629
757
|
|
|
630
|
-
await page
|
|
758
|
+
await humanClick(page, element.x, element.y, { forceDebug, realistic });
|
|
631
759
|
|
|
632
760
|
// Brief pause after clicking
|
|
633
761
|
await fastTimeout(TIMING.POST_CLICK_MIN + Math.random() * (TIMING.POST_CLICK_MAX - TIMING.POST_CLICK_MIN));
|
|
@@ -679,8 +807,12 @@ async function performContentClicks(page, options = {}) {
|
|
|
679
807
|
preDelay = CONTENT_CLICK.PRE_CLICK_DELAY,
|
|
680
808
|
interClickMin = CONTENT_CLICK.INTER_CLICK_MIN,
|
|
681
809
|
interClickMax = CONTENT_CLICK.INTER_CLICK_MAX,
|
|
682
|
-
forceDebug = false
|
|
810
|
+
forceDebug = false,
|
|
811
|
+
realistic = false // siteConfig.realistic_click — denser approach + hold tremor + mouseup drift
|
|
683
812
|
} = options;
|
|
813
|
+
const approachSteps = realistic
|
|
814
|
+
? CONTENT_CLICK.MOUSE_APPROACH_STEPS_REALISTIC
|
|
815
|
+
: CONTENT_CLICK.MOUSE_APPROACH_STEPS;
|
|
684
816
|
|
|
685
817
|
try {
|
|
686
818
|
if (page.isClosed()) return;
|
|
@@ -707,14 +839,15 @@ async function performContentClicks(page, options = {}) {
|
|
|
707
839
|
|
|
708
840
|
// Natural mouse approach (few steps, no need for elaborate curves)
|
|
709
841
|
await humanLikeMouseMove(page, lastX, lastY, targetX, targetY, {
|
|
710
|
-
steps:
|
|
842
|
+
steps: approachSteps,
|
|
711
843
|
curve: 0.03 + Math.random() * 0.04,
|
|
712
|
-
jitter: 1
|
|
844
|
+
jitter: 1,
|
|
845
|
+
realistic
|
|
713
846
|
});
|
|
714
847
|
|
|
715
848
|
// Brief human-like pause, then click
|
|
716
849
|
await fastTimeout(TIMING.CLICK_PAUSE_MIN + Math.random() * (TIMING.CLICK_PAUSE_MAX - TIMING.CLICK_PAUSE_MIN));
|
|
717
|
-
await page
|
|
850
|
+
await humanClick(page, targetX, targetY, { forceDebug, realistic });
|
|
718
851
|
|
|
719
852
|
if (forceDebug) {
|
|
720
853
|
console.log(formatLogMessage('debug', `${INTERACTION_TAG} Content click ${i + 1}/${clicks} at (${targetX}, ${targetY})`));
|
|
@@ -792,7 +925,82 @@ async function performContentClicks(page, options = {}) {
|
|
|
792
925
|
* includeElementClicks: false
|
|
793
926
|
* });
|
|
794
927
|
*/
|
|
928
|
+
/**
|
|
929
|
+
* Work-aware ceiling (ms) for a single interaction pass.
|
|
930
|
+
*
|
|
931
|
+
* Interaction is a sequence of awaited steps (mouse moves, scrolls, content
|
|
932
|
+
* clicks); under event-loop/CDP contention from many concurrent URLs each step
|
|
933
|
+
* stretches well past its intrinsic cost (a default 3-click pass measured ~4s
|
|
934
|
+
* solo but ~22s at the default concurrency of 6). A FLAT ceiling therefore
|
|
935
|
+
* either truncates legitimate high interact_click_count / realistic_click
|
|
936
|
+
* configs — dropping the very popunder clicks the pass exists to fire — or sits
|
|
937
|
+
* loosely over light runs. Scale by the actual work envelope instead, same
|
|
938
|
+
* philosophy as nwss's per-URL timeout. Per-unit allowances are sized to absorb
|
|
939
|
+
* up to ~default-concurrency contention; the result is a SAFETY ceiling, not a
|
|
940
|
+
* target — interaction returns as soon as its work is done, so a generous
|
|
941
|
+
* ceiling never slows a fast pass, it only bounds a stuck one.
|
|
942
|
+
*
|
|
943
|
+
* @param {Object} options - same shape performPageInteraction receives
|
|
944
|
+
* @returns {number} ceiling in ms (floored at 15000, the prior flat budget)
|
|
945
|
+
*/
|
|
946
|
+
function computeInteractionCeilingMs(options = {}) {
|
|
947
|
+
const {
|
|
948
|
+
intensity = 'medium',
|
|
949
|
+
mouseMovements,
|
|
950
|
+
includeScrolling = true,
|
|
951
|
+
includeElementClicks = false,
|
|
952
|
+
clickCount,
|
|
953
|
+
realistic = false
|
|
954
|
+
} = options;
|
|
955
|
+
|
|
956
|
+
const settings = INTENSITY_SETTINGS[String(intensity).toUpperCase()] || INTENSITY_SETTINGS.MEDIUM;
|
|
957
|
+
const movements = mouseMovements !== undefined ? mouseMovements : settings.movements;
|
|
958
|
+
const scrolls = includeScrolling ? settings.scrolls : 0;
|
|
959
|
+
const clicks = includeElementClicks
|
|
960
|
+
? (clickCount ? Math.min(Math.floor(clickCount), CONTENT_CLICK.CLICK_COUNT_MAX) : CONTENT_CLICK.CLICK_COUNT)
|
|
961
|
+
: 0;
|
|
962
|
+
|
|
963
|
+
const BASE_MS = 6000; // setup, viewport, final move, slack
|
|
964
|
+
const PER_MOVE_MS = 700;
|
|
965
|
+
const PER_SCROLL_MS = 800;
|
|
966
|
+
const PER_CLICK_MS = realistic ? 7000 : 4000; // realistic clicks are denser (15-step approach + tremor)
|
|
967
|
+
|
|
968
|
+
return Math.max(
|
|
969
|
+
15000, // floor = the prior flat budget, so light/default configs are unchanged
|
|
970
|
+
BASE_MS + movements * PER_MOVE_MS + scrolls * PER_SCROLL_MS + clicks * PER_CLICK_MS
|
|
971
|
+
);
|
|
972
|
+
}
|
|
973
|
+
|
|
795
974
|
async function performPageInteraction(page, currentUrl, options = {}, forceDebug = false) {
|
|
975
|
+
// Hard wall-clock ceiling on the whole interaction. The impl's internal
|
|
976
|
+
// checkTimeout() is cooperative — only evaluated BETWEEN steps — so a single
|
|
977
|
+
// blocking await (a CDP round-trip, or a fastTimeout that fires late once
|
|
978
|
+
// many URLs saturate the one event loop / CDP pipe) sails right past the 15s
|
|
979
|
+
// soft budget; that's how interaction was clocking 21-22s under concurrency.
|
|
980
|
+
// Racing the work against a real timer enforces the ceiling no matter where
|
|
981
|
+
// the time actually goes. The timer RESOLVES (never rejects) — interaction
|
|
982
|
+
// failures must not break the scan — and the impl is .catch()'d so the
|
|
983
|
+
// orphaned run can't surface an unhandled rejection after the race settles.
|
|
984
|
+
// Keeps nwss's per-URL INTERACTION_OVERHEAD_MS budget honest: one cycle now
|
|
985
|
+
// stays <= the ceiling even under heavy contention.
|
|
986
|
+
const HARD_CAP_MS = computeInteractionCeilingMs(options); // work-aware: scales with clicks/realistic/intensity
|
|
987
|
+
let capTimer;
|
|
988
|
+
let capped = false;
|
|
989
|
+
const work = _performPageInteractionImpl(page, currentUrl, options, forceDebug).catch(() => {});
|
|
990
|
+
try {
|
|
991
|
+
await Promise.race([
|
|
992
|
+
work,
|
|
993
|
+
new Promise(resolve => { capTimer = setTimeout(() => { capped = true; resolve(); }, HARD_CAP_MS); })
|
|
994
|
+
]);
|
|
995
|
+
} finally {
|
|
996
|
+
if (capTimer) clearTimeout(capTimer);
|
|
997
|
+
}
|
|
998
|
+
if (capped && forceDebug) {
|
|
999
|
+
console.log(formatLogMessage('debug', `${INTERACTION_TAG} Interaction hard-capped at ${HARD_CAP_MS}ms for ${currentUrl} (event-loop/CDP contention)`));
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
|
|
1003
|
+
async function _performPageInteractionImpl(page, currentUrl, options = {}, forceDebug = false) {
|
|
796
1004
|
// mouseMovements deliberately has no default in the destructure: we want
|
|
797
1005
|
// to distinguish 'caller didn't pass it' from 'caller explicitly passed 3'
|
|
798
1006
|
// so the actualMovements calculation below can let intensity drive the
|
|
@@ -803,7 +1011,9 @@ async function performPageInteraction(page, currentUrl, options = {}, forceDebug
|
|
|
803
1011
|
includeScrolling = true,
|
|
804
1012
|
includeElementClicks = false,
|
|
805
1013
|
duration = TIMING.DEFAULT_INTERACTION_DURATION,
|
|
806
|
-
intensity = 'medium'
|
|
1014
|
+
intensity = 'medium',
|
|
1015
|
+
clickCount, // optional override; undefined -> performContentClicks uses CONTENT_CLICK.CLICK_COUNT default
|
|
1016
|
+
realistic = false // siteConfig.realistic_click — propagated to performContentClicks
|
|
807
1017
|
} = options;
|
|
808
1018
|
|
|
809
1019
|
try {
|
|
@@ -910,7 +1120,13 @@ async function performPageInteraction(page, currentUrl, options = {}, forceDebug
|
|
|
910
1120
|
// interactWithElements is still exported for callers that want it.
|
|
911
1121
|
if (includeElementClicks) {
|
|
912
1122
|
if (checkTimeout()) return; // Emergency timeout check
|
|
913
|
-
|
|
1123
|
+
// Pass clickCount only when caller set it (via siteConfig.interact_click_count)
|
|
1124
|
+
// -- omit otherwise so performContentClicks's default destructure
|
|
1125
|
+
// falls through to CONTENT_CLICK.CLICK_COUNT. realistic is always
|
|
1126
|
+
// forwarded (defaults to false at every layer).
|
|
1127
|
+
const ccOpts = { forceDebug, realistic };
|
|
1128
|
+
if (clickCount) ccOpts.clicks = clickCount;
|
|
1129
|
+
await performContentClicks(page, ccOpts);
|
|
914
1130
|
}
|
|
915
1131
|
|
|
916
1132
|
// Final resting position — single mouse.move instead of the previous
|
|
@@ -1037,6 +1253,25 @@ function createInteractionConfig(url, siteConfig = {}) {
|
|
|
1037
1253
|
if (siteConfig.interact_clicks !== undefined) {
|
|
1038
1254
|
config.includeElementClicks = siteConfig.interact_clicks;
|
|
1039
1255
|
}
|
|
1256
|
+
// interact_click_count: per-site override of how many random
|
|
1257
|
+
// content-zone clicks performContentClicks fires. Cap at
|
|
1258
|
+
// CLICK_COUNT_MAX to prevent runaway from typos. Coerce to integer
|
|
1259
|
+
// and clamp >= 1 (count of 0 should be expressed via
|
|
1260
|
+
// interact_clicks: false, not interact_click_count: 0).
|
|
1261
|
+
if (typeof siteConfig.interact_click_count === 'number' && siteConfig.interact_click_count > 0) {
|
|
1262
|
+
config.clickCount = Math.min(
|
|
1263
|
+
Math.floor(siteConfig.interact_click_count),
|
|
1264
|
+
CONTENT_CLICK.CLICK_COUNT_MAX
|
|
1265
|
+
);
|
|
1266
|
+
}
|
|
1267
|
+
// realistic_click: opt-in for sites that score click realism
|
|
1268
|
+
// (DataDome, Akamai BM, PerimeterX). Adds ~80–120ms per click for the
|
|
1269
|
+
// denser approach; hold-tremor and mouseup-drift fit inside the
|
|
1270
|
+
// existing hold window so they're free. Off by default since ad-network
|
|
1271
|
+
// popunder discovery doesn't need it and we'd rather keep scans fast.
|
|
1272
|
+
if (siteConfig.realistic_click === true) {
|
|
1273
|
+
config.realistic = true;
|
|
1274
|
+
}
|
|
1040
1275
|
|
|
1041
1276
|
return config;
|
|
1042
1277
|
} catch (urlErr) {
|
|
@@ -1091,6 +1326,7 @@ module.exports = {
|
|
|
1091
1326
|
// Main interaction functions
|
|
1092
1327
|
performPageInteraction,
|
|
1093
1328
|
createInteractionConfig,
|
|
1329
|
+
computeInteractionCeilingMs,
|
|
1094
1330
|
getViewport,
|
|
1095
1331
|
// Component functions for custom implementations
|
|
1096
1332
|
humanLikeMouseMove,
|
package/lib/nettools.js
CHANGED
|
@@ -561,8 +561,19 @@ async function whoisLookup(domain = '', timeout = 10000, whoisServer = '', debug
|
|
|
561
561
|
|
|
562
562
|
const { stdout, stderr } = await execFileWithTimeout('whois', whoisArgs, timeout);
|
|
563
563
|
const duration = Date.now() - startTime;
|
|
564
|
-
|
|
565
|
-
|
|
564
|
+
|
|
565
|
+
// Treat stderr as failure ONLY when there's no usable stdout. Many whois
|
|
566
|
+
// servers/clients emit non-fatal notices to stderr — referral/redirect
|
|
567
|
+
// lines, rate-limit and GDPR "data redacted" banners, registry
|
|
568
|
+
// disclaimers — while returning the real record on stdout. The old "any
|
|
569
|
+
// stderr -> fail" path discarded those valid records (and triggered
|
|
570
|
+
// needless fallback retries in whoisLookupWithRetry). When stdout has
|
|
571
|
+
// content we prefer it and treat the lookup as successful; the downstream
|
|
572
|
+
// term match is the real gate, so this can't manufacture a match. stderr
|
|
573
|
+
// is still surfaced in debug logging below for visibility.
|
|
574
|
+
const hasUsableStdout = !!(stdout && stdout.trim());
|
|
575
|
+
|
|
576
|
+
if (!hasUsableStdout && stderr && stderr.trim()) {
|
|
566
577
|
if (debugMode) {
|
|
567
578
|
if (logFunc) {
|
|
568
579
|
logFunc(`${messageColors.highlight('[whois]')} Lookup failed for ${cleanDomain} after ${duration}ms`);
|
|
@@ -604,6 +615,14 @@ async function whoisLookup(domain = '', timeout = 10000, whoisServer = '', debug
|
|
|
604
615
|
console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Server: ${selectedServer || 'default'}`));
|
|
605
616
|
console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Output length: ${stdout.length} characters`));
|
|
606
617
|
}
|
|
618
|
+
// Non-fatal stderr alongside usable stdout — kept visible so a real
|
|
619
|
+
// problem (truncation, partial referral) is still diagnosable even
|
|
620
|
+
// though we're treating the lookup as a success.
|
|
621
|
+
if (stderr && stderr.trim()) {
|
|
622
|
+
const note = `${messageColors.highlight('[whois]')} Non-fatal stderr (stdout used): ${stderr.trim()}`;
|
|
623
|
+
if (logFunc) logFunc(note);
|
|
624
|
+
else console.log(formatLogMessage('debug', note));
|
|
625
|
+
}
|
|
607
626
|
}
|
|
608
627
|
|
|
609
628
|
return {
|
|
@@ -1021,66 +1040,6 @@ async function digLookup(domain = '', recordType = 'A', timeout = 5000) {
|
|
|
1021
1040
|
}
|
|
1022
1041
|
}
|
|
1023
1042
|
|
|
1024
|
-
/**
|
|
1025
|
-
* Checks if whois output contains all specified search terms (AND logic)
|
|
1026
|
-
* @param {string} whoisOutput - The whois lookup output
|
|
1027
|
-
* @param {Array<string>} searchTerms - Array of terms that must all be present
|
|
1028
|
-
* @returns {boolean} True if all terms are found
|
|
1029
|
-
*/
|
|
1030
|
-
function checkWhoisTerms(whoisOutput, searchTerms) {
|
|
1031
|
-
if (!searchTerms || !Array.isArray(searchTerms) || searchTerms.length === 0) {
|
|
1032
|
-
return false;
|
|
1033
|
-
}
|
|
1034
|
-
|
|
1035
|
-
const lowerOutput = whoisOutput.toLowerCase();
|
|
1036
|
-
return searchTerms.every(term => lowerOutput.includes(term.toLowerCase()));
|
|
1037
|
-
}
|
|
1038
|
-
|
|
1039
|
-
/**
|
|
1040
|
-
* Checks if whois output contains any of the specified search terms (OR logic)
|
|
1041
|
-
* @param {string} whoisOutput - The whois lookup output
|
|
1042
|
-
* @param {Array<string>} searchTerms - Array of terms where at least one must be present
|
|
1043
|
-
* @returns {boolean} True if any term is found
|
|
1044
|
-
*/
|
|
1045
|
-
function checkWhoisTermsOr(whoisOutput, searchTerms) {
|
|
1046
|
-
if (!searchTerms || !Array.isArray(searchTerms) || searchTerms.length === 0) {
|
|
1047
|
-
return false;
|
|
1048
|
-
}
|
|
1049
|
-
|
|
1050
|
-
const lowerOutput = whoisOutput.toLowerCase();
|
|
1051
|
-
return searchTerms.some(term => lowerOutput.includes(term.toLowerCase()));
|
|
1052
|
-
}
|
|
1053
|
-
|
|
1054
|
-
/**
|
|
1055
|
-
* Checks if dig output contains all specified search terms (AND logic)
|
|
1056
|
-
* @param {string} digOutput - The dig lookup output
|
|
1057
|
-
* @param {Array<string>} searchTerms - Array of terms that must all be present
|
|
1058
|
-
* @returns {boolean} True if all terms are found
|
|
1059
|
-
*/
|
|
1060
|
-
function checkDigTerms(digOutput, searchTerms) {
|
|
1061
|
-
if (!searchTerms || !Array.isArray(searchTerms) || searchTerms.length === 0) {
|
|
1062
|
-
return false;
|
|
1063
|
-
}
|
|
1064
|
-
|
|
1065
|
-
const lowerOutput = digOutput.toLowerCase();
|
|
1066
|
-
return searchTerms.every(term => lowerOutput.includes(term.toLowerCase()));
|
|
1067
|
-
}
|
|
1068
|
-
|
|
1069
|
-
/**
|
|
1070
|
-
* Checks if dig output contains any of the specified search terms (OR logic)
|
|
1071
|
-
* @param {string} digOutput - The dig lookup output
|
|
1072
|
-
* @param {Array<string>} searchTerms - Array of terms where at least one must be present
|
|
1073
|
-
* @returns {boolean} True if any term is found
|
|
1074
|
-
*/
|
|
1075
|
-
function checkDigTermsOr(digOutput, searchTerms) {
|
|
1076
|
-
if (!searchTerms || !Array.isArray(searchTerms) || searchTerms.length === 0) {
|
|
1077
|
-
return false;
|
|
1078
|
-
}
|
|
1079
|
-
|
|
1080
|
-
const lowerOutput = digOutput.toLowerCase();
|
|
1081
|
-
return searchTerms.some(term => lowerOutput.includes(term.toLowerCase()));
|
|
1082
|
-
}
|
|
1083
|
-
|
|
1084
1043
|
/**
|
|
1085
1044
|
* Enhanced dry run callback factory for better nettools reporting
|
|
1086
1045
|
* @param {Map} matchedDomains - The matched domains collection
|
|
@@ -1221,7 +1180,20 @@ function createNetToolsHandler(config) {
|
|
|
1221
1180
|
const digDedupeKey = `${digDomain}:${digConfigKey}`;
|
|
1222
1181
|
const needsWhoisLookup = (hasWhois || hasWhoisOr) && !processedWhoisDomains.has(whoisDedupeKey);
|
|
1223
1182
|
const needsDigLookup = (hasDig || hasDigOr) && !processedDigDomains.has(digDedupeKey);
|
|
1224
|
-
|
|
1183
|
+
|
|
1184
|
+
// Claim the dedupe keys NOW, synchronously, before executeNetToolsLookup
|
|
1185
|
+
// hits its first await. These Sets are shared across all concurrent URL
|
|
1186
|
+
// handlers, so the .has() checks above and these .add() claims must sit in
|
|
1187
|
+
// the same uninterrupted synchronous span to be atomic. The whois claim
|
|
1188
|
+
// was already safe (its add ran before any await), but the dig claim used
|
|
1189
|
+
// to live AFTER the whois lookup's await — opening a window where a second
|
|
1190
|
+
// handler for the same domain passed the dig check before the first
|
|
1191
|
+
// claimed it, running dig twice. Claiming both here closes that window.
|
|
1192
|
+
// Matches the existing claim-before-lookup semantics (no rollback on
|
|
1193
|
+
// failure: a failed lookup still suppresses retries for the TTL).
|
|
1194
|
+
if (needsWhoisLookup) processedWhoisDomains.add(whoisDedupeKey);
|
|
1195
|
+
if (needsDigLookup) processedDigDomains.add(digDedupeKey);
|
|
1196
|
+
|
|
1225
1197
|
// Skip if we don't need to perform any lookups
|
|
1226
1198
|
if (!needsWhoisLookup && !needsDigLookup) {
|
|
1227
1199
|
if (forceDebug) {
|
|
@@ -1320,9 +1292,9 @@ function createNetToolsHandler(config) {
|
|
|
1320
1292
|
|
|
1321
1293
|
// Perform whois lookup if either whois or whois-or is configured
|
|
1322
1294
|
if (needsWhoisLookup) {
|
|
1323
|
-
//
|
|
1324
|
-
|
|
1325
|
-
|
|
1295
|
+
// Dedupe key already claimed up-front (before the first await) to keep
|
|
1296
|
+
// the has()/add() span atomic across concurrent handlers.
|
|
1297
|
+
|
|
1326
1298
|
// Check whois cache first - cache key includes server for accuracy
|
|
1327
1299
|
const selectedServer = selectWhoisServer(whoisServer, whoisServerMode);
|
|
1328
1300
|
const whoisCacheKey = `${whoisRootDomain}-${(selectedServer && selectedServer !== '') ? selectedServer : 'default'}`;
|
|
@@ -1438,11 +1410,10 @@ function createNetToolsHandler(config) {
|
|
|
1438
1410
|
if (whoisResult) {
|
|
1439
1411
|
|
|
1440
1412
|
if (whoisResult.success) {
|
|
1441
|
-
// Lowercase the output ONCE
|
|
1442
|
-
// each
|
|
1443
|
-
// re-
|
|
1444
|
-
// here
|
|
1445
|
-
// a single allocation.
|
|
1413
|
+
// Lowercase the output ONCE. The AND check, OR check, and
|
|
1414
|
+
// matched-term find below each need a lowercased copy; doing it
|
|
1415
|
+
// per-check would re-allocate a multi-KB string each time, so
|
|
1416
|
+
// pre-lower here and let all three share a single allocation.
|
|
1446
1417
|
const whoisOutputLower = whoisResult.output.toLowerCase();
|
|
1447
1418
|
|
|
1448
1419
|
// Check AND terms if configured
|
|
@@ -1578,9 +1549,10 @@ function createNetToolsHandler(config) {
|
|
|
1578
1549
|
|
|
1579
1550
|
// Perform dig lookup if configured
|
|
1580
1551
|
if (needsDigLookup) {
|
|
1581
|
-
//
|
|
1582
|
-
|
|
1583
|
-
|
|
1552
|
+
// Dedupe key already claimed up-front (before the first await) to keep
|
|
1553
|
+
// the has()/add() span atomic across concurrent handlers — this used
|
|
1554
|
+
// to claim here, after the whois await, which left the race window.
|
|
1555
|
+
|
|
1584
1556
|
if (forceDebug) {
|
|
1585
1557
|
const digTypes = [];
|
|
1586
1558
|
if (hasDig) digTypes.push('dig-and');
|
|
@@ -1826,8 +1798,7 @@ function createNetToolsHandler(config) {
|
|
|
1826
1798
|
|
|
1827
1799
|
// Public surface kept narrow on purpose -- only what nwss.js actually
|
|
1828
1800
|
// imports (verified via repo-wide grep). Internal helpers
|
|
1829
|
-
// (whoisLookup, whoisLookupWithRetry, digLookup,
|
|
1830
|
-
// checkWhoisTermsOr, checkDigTerms, checkDigTermsOr, selectWhoisServer,
|
|
1801
|
+
// (whoisLookup, whoisLookupWithRetry, digLookup, selectWhoisServer,
|
|
1831
1802
|
// getCommonWhoisServers, suggestWhoisServers, execFileWithTimeout,
|
|
1832
1803
|
// markResolved, digOutputIndicatesResolution, loadDiskCache,
|
|
1833
1804
|
// saveDiskCache, enforceCacheCap, stripAnsiColors) stay as module-local
|