@fanboynz/network-scanner 3.0.1 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/nwss.js CHANGED
@@ -109,6 +109,7 @@ const TIMEOUTS = Object.freeze({
109
109
  EMERGENCY_RESTART_DELAY: 2000, // Delay after emergency browser restart
110
110
  BROWSER_STABILIZE_DELAY: 1000, // Browser stabilization after restart
111
111
  CURL_HANDLER_DELAY: 3000, // Wait for async curl operations
112
+ NETTOOLS_DRAIN_TIMEOUT: 3000, // Hard cap for awaiting in-flight nettools (dig/whois) handlers before snapshot. Drains immediately if all complete; bounded so a hung dig can't block exit. Mirrors CURL_HANDLER_DELAY's role for curl/searchstring.
112
113
  PROTOCOL_TIMEOUT: 180000, // Chrome DevTools Protocol timeout
113
114
  REDIRECT_JS_TIMEOUT: 5000 // JavaScript redirect detection timeout
114
115
  });
@@ -777,7 +778,8 @@ Redirect Handling Options:
777
778
  isBrave: true/false Spoof Brave browser detection
778
779
  userAgent: "chrome"|"chrome_mac"|"chrome_linux"|"firefox"|"firefox_mac"|"firefox_linux"|"safari" Custom desktop User-Agent
779
780
  interact_intensity: "low"|"medium"|"high" Interaction simulation intensity (default: medium)
780
- delay: <milliseconds> Delay after load (default: 4000)
781
+ delay: <milliseconds> Delay after load (default: 6000, capped at 2000ms unless delay_uncapped: true)
782
+ delay_uncapped: true/false Honor 'delay' up to half the per-URL timeout instead of the 2s default cap. Use for sites with setTimeout-deferred lazy ad/tracker loaders that fire well past the standard post-networkidle window
781
783
  reload: <number> Reload page n times after load (default: 1)
782
784
  forcereload: true/false or ["domain1.com", "domain2.com"] Force cache-clearing reload for all URLs or specific domains
783
785
  clear_sitedata: true/false Clear all cookies, cache, storage before each load (default: false)
@@ -1864,7 +1866,13 @@ function setupFrameHandling(page, forceDebug) {
1864
1866
  '--disable-domain-reliability', // No reliability monitor disk writes
1865
1867
  // PERFORMANCE: Disable non-essential Chrome features in a single flag
1866
1868
  // IMPORTANT: Chrome only reads the LAST --disable-features flag, so combine all into one
1867
- `--disable-features=AudioServiceOutOfProcess,VizDisplayCompositor,TranslateUI,BlinkGenPropertyTrees,Translate,BackForwardCache,AcceptCHFrame,SafeBrowsing,HttpsFirstBalancedModeAutoEnable,site-per-process,PaintHolding${disable_ad_tagging ? ',AdTagging' : ''}`,
1869
+ // AccountConsistencyMirror + AccountConsistencyDice prevent the
1870
+ // Chrome sign-in subsystem from initialising at startup. Combined
1871
+ // with --disable-sync + --allow-browser-signin=false below, this
1872
+ // suppresses the "Something went wrong when opening your profile"
1873
+ // popup that fires in headful + --keep-open mode (temp userDataDir
1874
+ // has no real profile, so the sync init errors out and pops up).
1875
+ `--disable-features=AudioServiceOutOfProcess,VizDisplayCompositor,TranslateUI,BlinkGenPropertyTrees,Translate,BackForwardCache,AcceptCHFrame,SafeBrowsing,HttpsFirstBalancedModeAutoEnable,site-per-process,PaintHolding,AccountConsistencyMirror,AccountConsistencyDice${disable_ad_tagging ? ',AdTagging' : ''}`,
1868
1876
  '--disable-ipc-flooding-protection',
1869
1877
  '--aggressive-cache-discard',
1870
1878
  '--memory-pressure-off',
@@ -1874,7 +1882,16 @@ function setupFrameHandling(page, forceDebug) {
1874
1882
  '--no-sandbox',
1875
1883
  '--disable-setuid-sandbox',
1876
1884
  '--disable-dev-shm-usage',
1877
- ...(keepBrowserOpen ? [] : ['--disable-sync']),
1885
+ // --disable-sync is always-on (was previously dropped in --keep-open
1886
+ // mode, which let the sync subsystem init against our temp
1887
+ // userDataDir and pop the "Something went wrong when opening your
1888
+ // profile" dialog). Inspection during --keep-open doesn't need
1889
+ // sync; nothing in the scanner flow does.
1890
+ '--disable-sync',
1891
+ // Prevent the sign-in promo / account banner from appearing in
1892
+ // headful sessions. Same family of fixes as --disable-sync and the
1893
+ // AccountConsistency* features disabled above.
1894
+ '--allow-browser-signin=false',
1878
1895
  '--mute-audio',
1879
1896
  '--disable-translate',
1880
1897
  '--window-size=1920,1080',
@@ -2100,6 +2117,30 @@ function setupFrameHandling(page, forceDebug) {
2100
2117
  // Use Map to track domains and their resource types for --adblock-rules or --dry-run
2101
2118
  const matchedDomains = (adblockRulesMode || siteConfig.adblock_rules || dryRunMode) ? new Map() : new Set();
2102
2119
 
2120
+ // Per-URL tracking of in-flight async nettools (dig/whois) handlers so we
2121
+ // can drain them BEFORE snapshotting matchedDomains into the result. The
2122
+ // previous fire-and-forget setImmediate pattern dropped late-completing
2123
+ // matches (handler resolved after formatRules had already run). Each
2124
+ // setImmediate-scheduled handler now registers a promise via
2125
+ // trackNetToolsHandler; drainPendingNetTools() awaits all of them with a
2126
+ // hard cap (TIMEOUTS.NETTOOLS_DRAIN_TIMEOUT) so a hung dig can't block.
2127
+ const pendingNetTools = [];
2128
+ const trackNetToolsHandler = (handlerFn) => {
2129
+ pendingNetTools.push(new Promise((resolve) => {
2130
+ setImmediate(async () => {
2131
+ try { await handlerFn(); } catch (_) { /* handler logs its own errors */ }
2132
+ finally { resolve(); }
2133
+ });
2134
+ }));
2135
+ };
2136
+ const drainPendingNetTools = async () => {
2137
+ if (pendingNetTools.length === 0) return;
2138
+ await Promise.race([
2139
+ Promise.all(pendingNetTools),
2140
+ fastTimeout(TIMEOUTS.NETTOOLS_DRAIN_TIMEOUT)
2141
+ ]);
2142
+ };
2143
+
2103
2144
  // Local domain dedup scoped to THIS processUrl call only
2104
2145
  // Prevents cross-config contamination from the global domain cache
2105
2146
  const localDetectedDomains = new Set();
@@ -3167,7 +3208,7 @@ function setupFrameHandling(page, forceDebug) {
3167
3208
  currentUrl, getRootDomain, siteConfig, dumpUrls, matchedUrlsLogFile, forceDebug, fs,
3168
3209
  ignoreDomains, matchesIgnoreDomain
3169
3210
  });
3170
- setImmediate(() => popupNetToolsHandler(checkedRootDomain, fullSubdomain));
3211
+ trackNetToolsHandler(() => popupNetToolsHandler(checkedRootDomain, fullSubdomain));
3171
3212
  } else {
3172
3213
  // No nettools required — regex match alone counts.
3173
3214
  addMatchedDomain(checkedRootDomain, resourceType, fullSubdomain);
@@ -3573,7 +3614,7 @@ function setupFrameHandling(page, forceDebug) {
3573
3614
 
3574
3615
  // Execute nettools check asynchronously
3575
3616
  const originalDomain = fullSubdomain;
3576
- setImmediate(() => netToolsHandler(reqDomain, originalDomain));
3617
+ trackNetToolsHandler(() => netToolsHandler(reqDomain, originalDomain));
3577
3618
  }
3578
3619
  if (forceDebug) {
3579
3620
  console.log(formatLogMessage('debug', `${reqUrl} has nettools validation required - skipping immediate add`));
@@ -3688,7 +3729,7 @@ function setupFrameHandling(page, forceDebug) {
3688
3729
 
3689
3730
  // Execute nettools check asynchronously
3690
3731
  const originalDomain = fullSubdomain; // Use full subdomain for nettools
3691
- setImmediate(() => netToolsHandler(reqDomain, originalDomain));
3732
+ trackNetToolsHandler(() => netToolsHandler(reqDomain, originalDomain));
3692
3733
 
3693
3734
  // Do NOT continue processing this request for immediate domain addition
3694
3735
  // The nettools handler is responsible for adding the domain if validation passes
@@ -4237,13 +4278,22 @@ function setupFrameHandling(page, forceDebug) {
4237
4278
  }
4238
4279
  }
4239
4280
 
4240
- const delayMs = DEFAULT_DELAY;
4281
+ const delayMs = siteConfig.delay || DEFAULT_DELAY;
4241
4282
 
4242
4283
  // Optimized delays for Puppeteer 23.x performance
4243
4284
  const isFastSite = timeout <= TIMEOUTS.FAST_SITE_THRESHOLD;
4244
4285
  const networkIdleTime = TIMEOUTS.NETWORK_IDLE; // Balanced: 2s for reliable network detection
4245
4286
  const networkIdleTimeout = Math.min(timeout / 2, TIMEOUTS.NETWORK_IDLE_MAX); // Balanced: 10s timeout
4246
- const actualDelay = Math.min(delayMs, TIMEOUTS.NETWORK_IDLE); // Balanced: 2s delay for stability
4287
+ // Post-networkidle delay cap. Default (2s) keeps fast sites fast. Opt
4288
+ // in with `delay_uncapped: true` to honor the configured `delay` up to
4289
+ // half the per-URL timeout — useful for sites with setTimeout-deferred
4290
+ // lazy ad/tracker loaders (weather.com, cbssports.com class) where
4291
+ // late requests fire well past the 2s window. See also the per-URL
4292
+ // drainPendingNetTools() which awaits in-flight dig/whois handlers
4293
+ // before the matchedDomains snapshot regardless of this flag.
4294
+ const actualDelay = siteConfig.delay_uncapped === true
4295
+ ? Math.min(delayMs, Math.floor(timeout / 2))
4296
+ : Math.min(delayMs, TIMEOUTS.NETWORK_IDLE);
4247
4297
 
4248
4298
  // Build delay promise (networkIdle + delay + optional flowProxy delay)
4249
4299
  const delayPromise = (async () => {
@@ -4625,7 +4675,8 @@ function setupFrameHandling(page, forceDebug) {
4625
4675
  // Wait a moment for async nettools/searchstring operations to complete
4626
4676
  // Use fast timeout helper for Puppeteer 22.x compatibility
4627
4677
  await fastTimeout(TIMEOUTS.CURL_HANDLER_DELAY); // Wait for async operations
4628
-
4678
+ await drainPendingNetTools(); // Bounded wait for in-flight dig/whois (race fix)
4679
+
4629
4680
  return { url: currentUrl, rules: [], success: true, dryRun: true, matchCount: dryRunResult.matchCount };
4630
4681
  } else {
4631
4682
  // Format rules using the output module
@@ -4639,6 +4690,12 @@ function setupFrameHandling(page, forceDebug) {
4639
4690
  privoxyMode,
4640
4691
  piholeMode
4641
4692
  };
4693
+ // Drain pending dig/whois handlers BEFORE snapshotting matchedDomains.
4694
+ // Without this, late-completing async validations (request fired near
4695
+ // end of the delay window, dig still in flight) get orphaned — their
4696
+ // addMatchedDomain calls happen but the result has already been
4697
+ // returned. Bounded by TIMEOUTS.NETTOOLS_DRAIN_TIMEOUT.
4698
+ await drainPendingNetTools();
4642
4699
  const formattedRules = formatRules(matchedDomains, siteConfig, globalOptions);
4643
4700
 
4644
4701
  return {
@@ -4690,7 +4747,11 @@ function setupFrameHandling(page, forceDebug) {
4690
4747
  };
4691
4748
  }
4692
4749
 
4693
- // For other errors, preserve any matches we found before the error
4750
+ // For other errors, preserve any matches we found before the error.
4751
+ // Drain pending nettools first so dig/whois handlers scheduled DURING
4752
+ // the failed navigation get a chance to add to matchedDomains before
4753
+ // the partial-success snapshot — same race as the success path.
4754
+ await drainPendingNetTools();
4694
4755
  if (matchedDomains && (matchedDomains.size > 0 || (matchedDomains instanceof Map && matchedDomains.size > 0))) {
4695
4756
  const globalOptions = {
4696
4757
  localhostIP,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "3.0.1",
3
+ "version": "3.0.3",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {
@@ -70,7 +70,7 @@ const TARGETS = [
70
70
  extract: async (page) => {
71
71
  return await page.evaluate(() => {
72
72
  const cells = Array.from(document.querySelectorAll('td'));
73
- const out = { passed: 0, failed: 0, warn: 0, total: 0, failures: [] };
73
+ const out = { passed: 0, failed: 0, warn: 0, total: 0, failures: [], warnings: [] };
74
74
  for (const c of cells) {
75
75
  const cls = c.className || '';
76
76
  if (cls.includes('passed')) { out.passed++; out.total++; }
@@ -81,7 +81,14 @@ const TARGETS = [
81
81
  const label = row?.querySelector('td')?.textContent?.trim() || '?';
82
82
  out.failures.push(label);
83
83
  }
84
- else if (cls.includes('warn')) { out.warn++; out.total++; }
84
+ else if (cls.includes('warn')) {
85
+ out.warn++; out.total++;
86
+ // Capture warn-row labels too so a soft regression (cell moving
87
+ // passed -> warn) is debuggable without --headful.
88
+ const row = c.closest('tr');
89
+ const label = row?.querySelector('td')?.textContent?.trim() || '?';
90
+ out.warnings.push(label);
91
+ }
85
92
  }
86
93
  return out;
87
94
  });
@@ -97,15 +104,29 @@ const TARGETS = [
97
104
  await new Promise(r => setTimeout(r, 8000)); // give async tests time
98
105
  return await page.evaluate(() => {
99
106
  const text = document.body.innerText || '';
100
- // CreepJS reports a "Trust Score" percentage and individual signal entries.
101
- const trustMatch = text.match(/Trust Score[:\s]+(\d+(?:\.\d+)?)\s*%/i);
102
- const lieMatch = text.match(/lies[:\s]+(\d+)/i);
103
- const botMatch = text.match(/bot[:\s]+(true|false)/i);
107
+ // CreepJS's actual stealth-relevant outputs are in a "Headless"
108
+ // section as percentages (e.g. "67% headless", "40% stealth",
109
+ // "44% like headless"), not the "Trust Score" label the old
110
+ // regex expected. Engine identification comes from "chromium:
111
+ // true/false" in the same block. Lower headless % and higher
112
+ // stealth % are better for evasion.
113
+ const headlessMatch = text.match(/(\d+(?:\.\d+)?)\s*%\s+headless\b/i);
114
+ const likeHeadlessMatch = text.match(/(\d+(?:\.\d+)?)\s*%\s+like\s+headless/i);
115
+ const stealthMatch = text.match(/(\d+(?:\.\d+)?)\s*%\s+stealth\b/i);
116
+ const chromiumMatch = text.match(/chromium\s*:\s*(true|false)/i);
117
+ // FP ID is CreepJS's stable fingerprint hash — same value across
118
+ // reloads if the fingerprint is unchanged; lets you A/B before
119
+ // and after a spoof change.
120
+ const fpIdMatch = text.match(/FP\s*ID\s*:?\s*([0-9a-f]{16,})/i);
104
121
  return {
105
- trustScore: trustMatch ? parseFloat(trustMatch[1]) : null,
106
- lies: lieMatch ? parseInt(lieMatch[1], 10) : null,
107
- botDetected: botMatch ? botMatch[1] === 'true' : null,
108
- excerpt: text.split('\n').slice(0, 15).join('\n').slice(0, 400)
122
+ headlessPct: headlessMatch ? parseFloat(headlessMatch[1]) : null,
123
+ likeHeadlessPct: likeHeadlessMatch ? parseFloat(likeHeadlessMatch[1]) : null,
124
+ stealthPct: stealthMatch ? parseFloat(stealthMatch[1]) : null,
125
+ isChromium: chromiumMatch ? chromiumMatch[1] === 'true' : null,
126
+ fpId: fpIdMatch ? fpIdMatch[1].slice(0, 16) : null,
127
+ // Larger excerpt (40 lines, up to 2KB) so a future UI rotation
128
+ // is debuggable from the output without --headful.
129
+ excerpt: text.split('\n').slice(0, 40).join('\n').slice(0, 2000)
109
130
  };
110
131
  });
111
132
  }
@@ -165,10 +186,15 @@ function formatResult(target, result) {
165
186
  if (result.failures.length) {
166
187
  lines.push(` failure rows: ${result.failures.slice(0, 10).join(', ')}${result.failures.length > 10 ? ` ... +${result.failures.length - 10} more` : ''}`);
167
188
  }
189
+ if (result.warnings && result.warnings.length) {
190
+ lines.push(` warn rows: ${result.warnings.slice(0, 10).join(', ')}${result.warnings.length > 10 ? ` ... +${result.warnings.length - 10} more` : ''}`);
191
+ }
168
192
  } else if (target.name === 'creepjs') {
169
- lines.push(` trust score: ${result.trustScore ?? 'n/a'}%`);
170
- lines.push(` lies detected: ${result.lies ?? 'n/a'}`);
171
- lines.push(` bot flagged: ${result.botDetected ?? 'n/a'}`);
193
+ lines.push(` FP ID: ${result.fpId ?? 'n/a'} (stable across reloads if fingerprint unchanged)`);
194
+ lines.push(` engine chromium: ${result.isChromium ?? 'n/a'}`);
195
+ lines.push(` headless score: ${result.headlessPct ?? 'n/a'}% (lower = better; 0% = real browser)`);
196
+ lines.push(` like-headless: ${result.likeHeadlessPct ?? 'n/a'}% (lower = better; soft headless signals)`);
197
+ lines.push(` stealth score: ${result.stealthPct ?? 'n/a'}% (lower = better; % likely to be using anti-detection tooling)`);
172
198
  if (result.excerpt) lines.push(` excerpt:\n ${result.excerpt.split('\n').join('\n ')}`);
173
199
  } else if (target.name === 'browserleaks') {
174
200
  for (const [k, v] of Object.entries(result)) {