@fanboynz/network-scanner 3.0.0 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/nwss.js CHANGED
@@ -109,6 +109,7 @@ const TIMEOUTS = Object.freeze({
109
109
  EMERGENCY_RESTART_DELAY: 2000, // Delay after emergency browser restart
110
110
  BROWSER_STABILIZE_DELAY: 1000, // Browser stabilization after restart
111
111
  CURL_HANDLER_DELAY: 3000, // Wait for async curl operations
112
+ NETTOOLS_DRAIN_TIMEOUT: 3000, // Hard cap for awaiting in-flight nettools (dig/whois) handlers before snapshot. Drains immediately if all complete; bounded so a hung dig can't block exit. Mirrors CURL_HANDLER_DELAY's role for curl/searchstring.
112
113
  PROTOCOL_TIMEOUT: 180000, // Chrome DevTools Protocol timeout
113
114
  REDIRECT_JS_TIMEOUT: 5000 // JavaScript redirect detection timeout
114
115
  });
@@ -777,7 +778,8 @@ Redirect Handling Options:
777
778
  isBrave: true/false Spoof Brave browser detection
778
779
  userAgent: "chrome"|"chrome_mac"|"chrome_linux"|"firefox"|"firefox_mac"|"firefox_linux"|"safari" Custom desktop User-Agent
779
780
  interact_intensity: "low"|"medium"|"high" Interaction simulation intensity (default: medium)
780
- delay: <milliseconds> Delay after load (default: 4000)
781
+ delay: <milliseconds> Delay after load (default: 6000, capped at 2000ms unless delay_uncapped: true)
782
+ delay_uncapped: true/false Honor 'delay' up to half the per-URL timeout instead of the 2s default cap. Use for sites with setTimeout-deferred lazy ad/tracker loaders that fire well past the standard post-networkidle window
781
783
  reload: <number> Reload page n times after load (default: 1)
782
784
  forcereload: true/false or ["domain1.com", "domain2.com"] Force cache-clearing reload for all URLs or specific domains
783
785
  clear_sitedata: true/false Clear all cookies, cache, storage before each load (default: false)
@@ -1864,7 +1866,13 @@ function setupFrameHandling(page, forceDebug) {
1864
1866
  '--disable-domain-reliability', // No reliability monitor disk writes
1865
1867
  // PERFORMANCE: Disable non-essential Chrome features in a single flag
1866
1868
  // IMPORTANT: Chrome only reads the LAST --disable-features flag, so combine all into one
1867
- `--disable-features=AudioServiceOutOfProcess,VizDisplayCompositor,TranslateUI,BlinkGenPropertyTrees,Translate,BackForwardCache,AcceptCHFrame,SafeBrowsing,HttpsFirstBalancedModeAutoEnable,site-per-process,PaintHolding${disable_ad_tagging ? ',AdTagging' : ''}`,
1869
+ // AccountConsistencyMirror + AccountConsistencyDice prevent the
1870
+ // Chrome sign-in subsystem from initialising at startup. Combined
1871
+ // with --disable-sync + --allow-browser-signin=false below, this
1872
+ // suppresses the "Something went wrong when opening your profile"
1873
+ // popup that fires in headful + --keep-open mode (temp userDataDir
1874
+ // has no real profile, so the sync init errors out and pops up).
1875
+ `--disable-features=AudioServiceOutOfProcess,VizDisplayCompositor,TranslateUI,BlinkGenPropertyTrees,Translate,BackForwardCache,AcceptCHFrame,SafeBrowsing,HttpsFirstBalancedModeAutoEnable,site-per-process,PaintHolding,AccountConsistencyMirror,AccountConsistencyDice${disable_ad_tagging ? ',AdTagging' : ''}`,
1868
1876
  '--disable-ipc-flooding-protection',
1869
1877
  '--aggressive-cache-discard',
1870
1878
  '--memory-pressure-off',
@@ -1874,7 +1882,16 @@ function setupFrameHandling(page, forceDebug) {
1874
1882
  '--no-sandbox',
1875
1883
  '--disable-setuid-sandbox',
1876
1884
  '--disable-dev-shm-usage',
1877
- ...(keepBrowserOpen ? [] : ['--disable-sync']),
1885
+ // --disable-sync is always-on (was previously dropped in --keep-open
1886
+ // mode, which let the sync subsystem init against our temp
1887
+ // userDataDir and pop the "Something went wrong when opening your
1888
+ // profile" dialog). Inspection during --keep-open doesn't need
1889
+ // sync; nothing in the scanner flow does.
1890
+ '--disable-sync',
1891
+ // Prevent the sign-in promo / account banner from appearing in
1892
+ // headful sessions. Same family of fixes as --disable-sync and the
1893
+ // AccountConsistency* features disabled above.
1894
+ '--allow-browser-signin=false',
1878
1895
  '--mute-audio',
1879
1896
  '--disable-translate',
1880
1897
  '--window-size=1920,1080',
@@ -2100,6 +2117,30 @@ function setupFrameHandling(page, forceDebug) {
2100
2117
  // Use Map to track domains and their resource types for --adblock-rules or --dry-run
2101
2118
  const matchedDomains = (adblockRulesMode || siteConfig.adblock_rules || dryRunMode) ? new Map() : new Set();
2102
2119
 
2120
+ // Per-URL tracking of in-flight async nettools (dig/whois) handlers so we
2121
+ // can drain them BEFORE snapshotting matchedDomains into the result. The
2122
+ // previous fire-and-forget setImmediate pattern dropped late-completing
2123
+ // matches (handler resolved after formatRules had already run). Each
2124
+ // setImmediate-scheduled handler now registers a promise via
2125
+ // trackNetToolsHandler; drainPendingNetTools() awaits all of them with a
2126
+ // hard cap (TIMEOUTS.NETTOOLS_DRAIN_TIMEOUT) so a hung dig can't block.
2127
+ const pendingNetTools = [];
2128
+ const trackNetToolsHandler = (handlerFn) => {
2129
+ pendingNetTools.push(new Promise((resolve) => {
2130
+ setImmediate(async () => {
2131
+ try { await handlerFn(); } catch (_) { /* handler logs its own errors */ }
2132
+ finally { resolve(); }
2133
+ });
2134
+ }));
2135
+ };
2136
+ const drainPendingNetTools = async () => {
2137
+ if (pendingNetTools.length === 0) return;
2138
+ await Promise.race([
2139
+ Promise.all(pendingNetTools),
2140
+ fastTimeout(TIMEOUTS.NETTOOLS_DRAIN_TIMEOUT)
2141
+ ]);
2142
+ };
2143
+
2103
2144
  // Local domain dedup scoped to THIS processUrl call only
2104
2145
  // Prevents cross-config contamination from the global domain cache
2105
2146
  const localDetectedDomains = new Set();
@@ -2323,7 +2364,7 @@ function setupFrameHandling(page, forceDebug) {
2323
2364
  let browserResponsive = false;
2324
2365
  try {
2325
2366
  // Check if browser is still connected before attempting health check
2326
- if (!browserInstance.isConnected()) {
2367
+ if (!browserInstance.connected) {
2327
2368
  throw new Error('Browser not connected');
2328
2369
  }
2329
2370
 
@@ -3167,7 +3208,7 @@ function setupFrameHandling(page, forceDebug) {
3167
3208
  currentUrl, getRootDomain, siteConfig, dumpUrls, matchedUrlsLogFile, forceDebug, fs,
3168
3209
  ignoreDomains, matchesIgnoreDomain
3169
3210
  });
3170
- setImmediate(() => popupNetToolsHandler(checkedRootDomain, fullSubdomain));
3211
+ trackNetToolsHandler(() => popupNetToolsHandler(checkedRootDomain, fullSubdomain));
3171
3212
  } else {
3172
3213
  // No nettools required — regex match alone counts.
3173
3214
  addMatchedDomain(checkedRootDomain, resourceType, fullSubdomain);
@@ -3573,7 +3614,7 @@ function setupFrameHandling(page, forceDebug) {
3573
3614
 
3574
3615
  // Execute nettools check asynchronously
3575
3616
  const originalDomain = fullSubdomain;
3576
- setImmediate(() => netToolsHandler(reqDomain, originalDomain));
3617
+ trackNetToolsHandler(() => netToolsHandler(reqDomain, originalDomain));
3577
3618
  }
3578
3619
  if (forceDebug) {
3579
3620
  console.log(formatLogMessage('debug', `${reqUrl} has nettools validation required - skipping immediate add`));
@@ -3688,7 +3729,7 @@ function setupFrameHandling(page, forceDebug) {
3688
3729
 
3689
3730
  // Execute nettools check asynchronously
3690
3731
  const originalDomain = fullSubdomain; // Use full subdomain for nettools
3691
- setImmediate(() => netToolsHandler(reqDomain, originalDomain));
3732
+ trackNetToolsHandler(() => netToolsHandler(reqDomain, originalDomain));
3692
3733
 
3693
3734
  // Do NOT continue processing this request for immediate domain addition
3694
3735
  // The nettools handler is responsible for adding the domain if validation passes
@@ -4237,13 +4278,22 @@ function setupFrameHandling(page, forceDebug) {
4237
4278
  }
4238
4279
  }
4239
4280
 
4240
- const delayMs = DEFAULT_DELAY;
4281
+ const delayMs = siteConfig.delay || DEFAULT_DELAY;
4241
4282
 
4242
4283
  // Optimized delays for Puppeteer 23.x performance
4243
4284
  const isFastSite = timeout <= TIMEOUTS.FAST_SITE_THRESHOLD;
4244
4285
  const networkIdleTime = TIMEOUTS.NETWORK_IDLE; // Balanced: 2s for reliable network detection
4245
4286
  const networkIdleTimeout = Math.min(timeout / 2, TIMEOUTS.NETWORK_IDLE_MAX); // Balanced: 10s timeout
4246
- const actualDelay = Math.min(delayMs, TIMEOUTS.NETWORK_IDLE); // Balanced: 2s delay for stability
4287
+ // Post-networkidle delay cap. Default (2s) keeps fast sites fast. Opt
4288
+ // in with `delay_uncapped: true` to honor the configured `delay` up to
4289
+ // half the per-URL timeout — useful for sites with setTimeout-deferred
4290
+ // lazy ad/tracker loaders (weather.com, cbssports.com class) where
4291
+ // late requests fire well past the 2s window. See also the per-URL
4292
+ // drainPendingNetTools() which awaits in-flight dig/whois handlers
4293
+ // before the matchedDomains snapshot regardless of this flag.
4294
+ const actualDelay = siteConfig.delay_uncapped === true
4295
+ ? Math.min(delayMs, Math.floor(timeout / 2))
4296
+ : Math.min(delayMs, TIMEOUTS.NETWORK_IDLE);
4247
4297
 
4248
4298
  // Build delay promise (networkIdle + delay + optional flowProxy delay)
4249
4299
  const delayPromise = (async () => {
@@ -4625,7 +4675,8 @@ function setupFrameHandling(page, forceDebug) {
4625
4675
  // Wait a moment for async nettools/searchstring operations to complete
4626
4676
  // Use fast timeout helper for Puppeteer 22.x compatibility
4627
4677
  await fastTimeout(TIMEOUTS.CURL_HANDLER_DELAY); // Wait for async operations
4628
-
4678
+ await drainPendingNetTools(); // Bounded wait for in-flight dig/whois (race fix)
4679
+
4629
4680
  return { url: currentUrl, rules: [], success: true, dryRun: true, matchCount: dryRunResult.matchCount };
4630
4681
  } else {
4631
4682
  // Format rules using the output module
@@ -4639,6 +4690,12 @@ function setupFrameHandling(page, forceDebug) {
4639
4690
  privoxyMode,
4640
4691
  piholeMode
4641
4692
  };
4693
+ // Drain pending dig/whois handlers BEFORE snapshotting matchedDomains.
4694
+ // Without this, late-completing async validations (request fired near
4695
+ // end of the delay window, dig still in flight) get orphaned — their
4696
+ // addMatchedDomain calls happen but the result has already been
4697
+ // returned. Bounded by TIMEOUTS.NETTOOLS_DRAIN_TIMEOUT.
4698
+ await drainPendingNetTools();
4642
4699
  const formattedRules = formatRules(matchedDomains, siteConfig, globalOptions);
4643
4700
 
4644
4701
  return {
@@ -4690,7 +4747,11 @@ function setupFrameHandling(page, forceDebug) {
4690
4747
  };
4691
4748
  }
4692
4749
 
4693
- // For other errors, preserve any matches we found before the error
4750
+ // For other errors, preserve any matches we found before the error.
4751
+ // Drain pending nettools first so dig/whois handlers scheduled DURING
4752
+ // the failed navigation get a chance to add to matchedDomains before
4753
+ // the partial-success snapshot — same race as the success path.
4754
+ await drainPendingNetTools();
4694
4755
  if (matchedDomains && (matchedDomains.size > 0 || (matchedDomains instanceof Map && matchedDomains.size > 0))) {
4695
4756
  const globalOptions = {
4696
4757
  localhostIP,
@@ -5713,7 +5774,7 @@ function setupFrameHandling(page, forceDebug) {
5713
5774
  console.log(messageColors.info('Browser kept open.') + ' Close the browser window or press Ctrl+C to exit.');
5714
5775
  const cleanup = async () => {
5715
5776
  try {
5716
- if (browser.isConnected()) await browser.close();
5777
+ if (browser.connected) await browser.close();
5717
5778
  } catch {}
5718
5779
  process.exit(0);
5719
5780
  };
@@ -5731,7 +5792,7 @@ function setupFrameHandling(page, forceDebug) {
5731
5792
 
5732
5793
  // Enhanced final validation for Puppeteer 23.x
5733
5794
  try {
5734
- const isStillConnected = browser.isConnected();
5795
+ const isStillConnected = browser.connected;
5735
5796
  if (forceDebug) console.log(formatLogMessage('debug', `Browser connection status before cleanup: ${isStillConnected}`));
5736
5797
  } catch (connErr) {
5737
5798
  if (forceDebug) console.log(formatLogMessage('debug', `Browser connection check failed: ${connErr.message}`));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "3.0.0",
3
+ "version": "3.0.2",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {
@@ -0,0 +1,281 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Stealth integration smoke test.
4
+ *
5
+ * Launches Puppeteer, applies the project's full fingerprint spoofing stack
6
+ * (lib/fingerprint.js's applyAllFingerprintSpoofing), navigates to public
7
+ * bot-detection test pages, and reports what the page concluded about us.
8
+ *
9
+ * Purpose: replace "I think the spoof works" theoretical reviews with real
10
+ * signal -- which checks pass, which fail, which moved after a fingerprint
11
+ * change. Run before and after a stealth-related commit to A/B the impact.
12
+ *
13
+ * Usage:
14
+ * node scripts/test-stealth.js # all targets, human-readable
15
+ * node scripts/test-stealth.js sannysoft # one target
16
+ * node scripts/test-stealth.js --headful # show browser GUI
17
+ * node scripts/test-stealth.js --no-spoof # baseline (no fingerprint protection)
18
+ * node scripts/test-stealth.js --ua=firefox # change UA family
19
+ * node scripts/test-stealth.js --format=json # machine-readable output
20
+ * node scripts/test-stealth.js --help # show usage
21
+ *
22
+ * Environment:
23
+ * PUPPETEER_NO_SANDBOX=1 pass --no-sandbox --disable-setuid-sandbox to
24
+ * Chromium. Required when running as root (CI
25
+ * containers, some Docker setups). Off by default
26
+ * so local dev doesn't silently drop the sandbox.
27
+ *
28
+ * Targets (extend by adding to TARGETS below):
29
+ * sannysoft https://bot.sannysoft.com/ — classic fingerprint tests
30
+ * creepjs https://abrahamjuliot.github.io/creepjs/ — modern fingerprint suite
31
+ * browserleaks https://browserleaks.com/javascript — JS env probe
32
+ *
33
+ * Output: one line per target with PASS / WARN / FAIL counts (where parseable),
34
+ * plus a short summary of any explicit detection markers ("Bot detected",
35
+ * "Headless", etc.) found in the page text. With --format=json, emits a single
36
+ * JSON object suitable for piping to diff/jq for before/after comparison.
37
+ *
38
+ * This is a SMOKE test, not a unit test. It doesn't make assertions; it
39
+ * reports what the page reports. Use the output to decide if a stealth
40
+ * change moved the needle.
41
+ */
42
+
43
+ 'use strict';
44
+
45
+ const puppeteer = require('puppeteer');
46
+ const path = require('path');
47
+ const {
48
+ applyAllFingerprintSpoofing,
49
+ USER_AGENT_COLLECTIONS
50
+ } = require(path.resolve(__dirname, '..', 'lib', 'fingerprint'));
51
+
52
+ const args = process.argv.slice(2);
53
+ const HELP = args.includes('--help') || args.includes('-h');
54
+ const HEADFUL = args.includes('--headful');
55
+ const NO_SPOOF = args.includes('--no-spoof');
56
+ const UA_FLAG = (args.find(a => a.startsWith('--ua=')) || '').slice(5) || 'chrome';
57
+ const FORMAT = (args.find(a => a.startsWith('--format=')) || '').slice(9) || 'text';
58
+ const filterTargets = args.filter(a => !a.startsWith('-'));
59
+ // Anything starting with '-' is a flag claim; we validate the known set
60
+ // below so typos like "-headful" or "--no_spoof" don't silently no-op.
61
+ const flagArgs = args.filter(a => a.startsWith('-'));
62
+ const KNOWN_FLAGS = new Set(['--headful', '--no-spoof', '--help', '-h']);
63
+ const KNOWN_FLAG_PREFIXES = ['--ua=', '--format='];
64
+
65
+ const TARGETS = [
66
+ {
67
+ name: 'sannysoft',
68
+ url: 'https://bot.sannysoft.com/',
69
+ // Parse the result tables. Sannysoft uses td.passed / td.failed / td.warn.
70
+ extract: async (page) => {
71
+ return await page.evaluate(() => {
72
+ const cells = Array.from(document.querySelectorAll('td'));
73
+ const out = { passed: 0, failed: 0, warn: 0, total: 0, failures: [] };
74
+ for (const c of cells) {
75
+ const cls = c.className || '';
76
+ if (cls.includes('passed')) { out.passed++; out.total++; }
77
+ else if (cls.includes('failed')) {
78
+ out.failed++; out.total++;
79
+ // Try to capture the row label for context
80
+ const row = c.closest('tr');
81
+ const label = row?.querySelector('td')?.textContent?.trim() || '?';
82
+ out.failures.push(label);
83
+ }
84
+ else if (cls.includes('warn')) { out.warn++; out.total++; }
85
+ }
86
+ return out;
87
+ });
88
+ }
89
+ },
90
+ {
91
+ name: 'creepjs',
92
+ url: 'https://abrahamjuliot.github.io/creepjs/',
93
+ extract: async (page) => {
94
+ // CreepJS surfaces a trust score in the page. Wait briefly for the
95
+ // async fingerprinting tests to complete.
96
+ await page.waitForSelector('#fingerprint-data', { timeout: 30000 }).catch(() => {});
97
+ await new Promise(r => setTimeout(r, 8000)); // give async tests time
98
+ return await page.evaluate(() => {
99
+ const text = document.body.innerText || '';
100
+ // CreepJS reports a "Trust Score" percentage and individual signal entries.
101
+ const trustMatch = text.match(/Trust Score[:\s]+(\d+(?:\.\d+)?)\s*%/i);
102
+ const lieMatch = text.match(/lies[:\s]+(\d+)/i);
103
+ const botMatch = text.match(/bot[:\s]+(true|false)/i);
104
+ return {
105
+ trustScore: trustMatch ? parseFloat(trustMatch[1]) : null,
106
+ lies: lieMatch ? parseInt(lieMatch[1], 10) : null,
107
+ botDetected: botMatch ? botMatch[1] === 'true' : null,
108
+ excerpt: text.split('\n').slice(0, 15).join('\n').slice(0, 400)
109
+ };
110
+ });
111
+ }
112
+ },
113
+ {
114
+ name: 'browserleaks',
115
+ url: 'https://browserleaks.com/javascript',
116
+ extract: async (page) => {
117
+ return await page.evaluate(() => {
118
+ // browserleaks shows the values; we just capture the navigator-related ones
119
+ // and report which look anomalous.
120
+ return {
121
+ userAgent: navigator.userAgent,
122
+ platform: navigator.platform,
123
+ webdriver: navigator.webdriver,
124
+ languages: JSON.stringify(navigator.languages),
125
+ hardwareConcurrency: navigator.hardwareConcurrency,
126
+ deviceMemory: navigator.deviceMemory,
127
+ plugins: navigator.plugins?.length,
128
+ chromeRuntime: typeof window.chrome?.runtime,
129
+ chromeRuntimeVersion: (() => { try { return window.chrome?.runtime?.getManifest?.()?.version; } catch (e) { return 'error'; } })(),
130
+ windowChromeDescriptor: (() => {
131
+ const d = Object.getOwnPropertyDescriptor(window, 'chrome');
132
+ return d ? `writable=${d.writable},enumerable=${d.enumerable},configurable=${d.configurable}` : 'no-descriptor';
133
+ })(),
134
+ errorName: Error.name,
135
+ errorLength: Error.length,
136
+ rtcName: window.RTCPeerConnection?.name,
137
+ imageName: window.Image?.name
138
+ };
139
+ });
140
+ }
141
+ }
142
+ ];
143
+
144
+ function printHelp() {
145
+ console.log(`Usage: node scripts/test-stealth.js [options] [target...]
146
+
147
+ Options:
148
+ --headful launch with browser GUI visible
149
+ --no-spoof baseline run — skip applyAllFingerprintSpoofing
150
+ --ua=<family> UA family to spoof (default: chrome)
151
+ valid: ${Array.from(USER_AGENT_COLLECTIONS.keys()).join(', ')}
152
+ --format=<fmt> output format: text (default) | json
153
+ --help, -h show this message
154
+
155
+ Environment:
156
+ PUPPETEER_NO_SANDBOX=1 pass --no-sandbox to Chromium (required in some CI)
157
+
158
+ Targets: ${TARGETS.map(t => t.name).join(', ')} (default: all)`);
159
+ }
160
+
161
+ function formatResult(target, result) {
162
+ const lines = [`\n=== ${target.name} (${target.url}) ===`];
163
+ if (target.name === 'sannysoft') {
164
+ lines.push(` passed: ${result.passed} | warn: ${result.warn} | failed: ${result.failed} | total: ${result.total}`);
165
+ if (result.failures.length) {
166
+ lines.push(` failure rows: ${result.failures.slice(0, 10).join(', ')}${result.failures.length > 10 ? ` ... +${result.failures.length - 10} more` : ''}`);
167
+ }
168
+ } else if (target.name === 'creepjs') {
169
+ lines.push(` trust score: ${result.trustScore ?? 'n/a'}%`);
170
+ lines.push(` lies detected: ${result.lies ?? 'n/a'}`);
171
+ lines.push(` bot flagged: ${result.botDetected ?? 'n/a'}`);
172
+ if (result.excerpt) lines.push(` excerpt:\n ${result.excerpt.split('\n').join('\n ')}`);
173
+ } else if (target.name === 'browserleaks') {
174
+ for (const [k, v] of Object.entries(result)) {
175
+ lines.push(` ${k.padEnd(24)} ${v}`);
176
+ }
177
+ }
178
+ return lines.join('\n');
179
+ }
180
+
181
+ (async () => {
182
+ if (HELP) { printHelp(); process.exit(0); }
183
+
184
+ // Validate --ua= against the canonical UA list. Previously a typo like
185
+ // --ua=opera silently fell through to applyUserAgentSpoofing's "unknown UA,
186
+ // no-op" path, producing run results that looked spoofed but weren't.
187
+ if (!USER_AGENT_COLLECTIONS.has(UA_FLAG)) {
188
+ console.error(`Invalid --ua=${UA_FLAG}. Valid: ${Array.from(USER_AGENT_COLLECTIONS.keys()).join(', ')}`);
189
+ process.exit(2);
190
+ }
191
+
192
+ if (!['text', 'json'].includes(FORMAT)) {
193
+ console.error(`Invalid --format=${FORMAT}. Valid: text, json`);
194
+ process.exit(2);
195
+ }
196
+
197
+ // Reject unrecognised flags before we launch a browser. Typos like
198
+ // "-headful" or "--no_spoof" used to silently no-op and produce a
199
+ // misleading "spoof on" run that wasn't actually spoofed.
200
+ const badFlags = flagArgs.filter(f =>
201
+ !KNOWN_FLAGS.has(f) && !KNOWN_FLAG_PREFIXES.some(p => f.startsWith(p))
202
+ );
203
+ if (badFlags.length) {
204
+ console.error(`Unrecognised flag(s): ${badFlags.join(', ')}. See --help.`);
205
+ process.exit(2);
206
+ }
207
+
208
+ const targetsToRun = filterTargets.length
209
+ ? TARGETS.filter(t => filterTargets.includes(t.name))
210
+ : TARGETS;
211
+
212
+ if (targetsToRun.length === 0) {
213
+ console.error(`No targets matched. Available: ${TARGETS.map(t => t.name).join(', ')}`);
214
+ process.exit(2);
215
+ }
216
+
217
+ if (FORMAT === 'text') {
218
+ console.log(`Stealth test config: spoof=${!NO_SPOOF}, ua=${UA_FLAG}, headful=${HEADFUL}`);
219
+ console.log(`Targets: ${targetsToRun.map(t => t.name).join(', ')}`);
220
+ }
221
+
222
+ // Sandbox is on by default; opt out via env var rather than baking
223
+ // --no-sandbox into the launch line. CI-as-root needs it; local dev should
224
+ // not silently drop the sandbox just because the test happens to start it.
225
+ const launchArgs = ['--disable-blink-features=AutomationControlled'];
226
+ if (process.env.PUPPETEER_NO_SANDBOX === '1') {
227
+ launchArgs.push('--no-sandbox', '--disable-setuid-sandbox');
228
+ }
229
+
230
+ const browser = await puppeteer.launch({
231
+ headless: !HEADFUL,
232
+ args: launchArgs
233
+ });
234
+
235
+ // Collected for JSON output (and to support a future --fail-on-detection
236
+ // exit code without restructuring the loop).
237
+ const collected = [];
238
+
239
+ try {
240
+ for (const target of targetsToRun) {
241
+ const page = await browser.newPage();
242
+ const started = Date.now();
243
+ try {
244
+ if (!NO_SPOOF) {
245
+ // Apply the same spoofing stack nwss.js uses for real scans.
246
+ await applyAllFingerprintSpoofing(page,
247
+ { userAgent: UA_FLAG, fingerprint_protection: 'random' },
248
+ false,
249
+ target.url
250
+ );
251
+ }
252
+ await page.goto(target.url, { waitUntil: 'networkidle2', timeout: 60000 });
253
+ const result = await target.extract(page);
254
+ collected.push({ name: target.name, url: target.url, ok: true, durationMs: Date.now() - started, result });
255
+ if (FORMAT === 'text') console.log(formatResult(target, result));
256
+ } catch (err) {
257
+ collected.push({ name: target.name, url: target.url, ok: false, durationMs: Date.now() - started, error: err.message });
258
+ if (FORMAT === 'text') {
259
+ console.error(`\n=== ${target.name} (${target.url}) ===`);
260
+ console.error(` ERROR: ${err.message}`);
261
+ }
262
+ } finally {
263
+ await page.close().catch(() => {});
264
+ }
265
+ }
266
+ } finally {
267
+ await browser.close().catch(() => {});
268
+ }
269
+
270
+ if (FORMAT === 'json') {
271
+ // Single object, not NDJSON — easier to diff with `jq` or `diff` between
272
+ // before/after runs. Schema is stable: top-level config + targets[].
273
+ process.stdout.write(JSON.stringify({
274
+ config: { spoof: !NO_SPOOF, ua: UA_FLAG, headful: HEADFUL, noSandbox: process.env.PUPPETEER_NO_SANDBOX === '1' },
275
+ targets: collected
276
+ }, null, 2) + '\n');
277
+ }
278
+ })().catch(err => {
279
+ console.error('test-stealth fatal:', err);
280
+ process.exit(1);
281
+ });