@fanboynz/network-scanner 3.0.1 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm-publish.yml +4 -1
- package/CHANGELOG.md +68 -0
- package/lib/fingerprint.js +318 -44
- package/lib/nettools.js +28 -2
- package/lib/proxy.js +48 -21
- package/lib/socks-relay.js +242 -47
- package/nwss.js +71 -10
- package/package.json +1 -1
- package/scripts/test-stealth.js +39 -13
package/nwss.js
CHANGED
|
@@ -109,6 +109,7 @@ const TIMEOUTS = Object.freeze({
|
|
|
109
109
|
EMERGENCY_RESTART_DELAY: 2000, // Delay after emergency browser restart
|
|
110
110
|
BROWSER_STABILIZE_DELAY: 1000, // Browser stabilization after restart
|
|
111
111
|
CURL_HANDLER_DELAY: 3000, // Wait for async curl operations
|
|
112
|
+
NETTOOLS_DRAIN_TIMEOUT: 3000, // Hard cap for awaiting in-flight nettools (dig/whois) handlers before snapshot. Drains immediately if all complete; bounded so a hung dig can't block exit. Mirrors CURL_HANDLER_DELAY's role for curl/searchstring.
|
|
112
113
|
PROTOCOL_TIMEOUT: 180000, // Chrome DevTools Protocol timeout
|
|
113
114
|
REDIRECT_JS_TIMEOUT: 5000 // JavaScript redirect detection timeout
|
|
114
115
|
});
|
|
@@ -777,7 +778,8 @@ Redirect Handling Options:
|
|
|
777
778
|
isBrave: true/false Spoof Brave browser detection
|
|
778
779
|
userAgent: "chrome"|"chrome_mac"|"chrome_linux"|"firefox"|"firefox_mac"|"firefox_linux"|"safari" Custom desktop User-Agent
|
|
779
780
|
interact_intensity: "low"|"medium"|"high" Interaction simulation intensity (default: medium)
|
|
780
|
-
delay: <milliseconds> Delay after load (default:
|
|
781
|
+
delay: <milliseconds> Delay after load (default: 6000, capped at 2000ms unless delay_uncapped: true)
|
|
782
|
+
delay_uncapped: true/false Honor 'delay' up to half the per-URL timeout instead of the 2s default cap. Use for sites with setTimeout-deferred lazy ad/tracker loaders that fire well past the standard post-networkidle window
|
|
781
783
|
reload: <number> Reload page n times after load (default: 1)
|
|
782
784
|
forcereload: true/false or ["domain1.com", "domain2.com"] Force cache-clearing reload for all URLs or specific domains
|
|
783
785
|
clear_sitedata: true/false Clear all cookies, cache, storage before each load (default: false)
|
|
@@ -1864,7 +1866,13 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1864
1866
|
'--disable-domain-reliability', // No reliability monitor disk writes
|
|
1865
1867
|
// PERFORMANCE: Disable non-essential Chrome features in a single flag
|
|
1866
1868
|
// IMPORTANT: Chrome only reads the LAST --disable-features flag, so combine all into one
|
|
1867
|
-
|
|
1869
|
+
// AccountConsistencyMirror + AccountConsistencyDice prevent the
|
|
1870
|
+
// Chrome sign-in subsystem from initialising at startup. Combined
|
|
1871
|
+
// with --disable-sync + --allow-browser-signin=false below, this
|
|
1872
|
+
// suppresses the "Something went wrong when opening your profile"
|
|
1873
|
+
// popup that fires in headful + --keep-open mode (temp userDataDir
|
|
1874
|
+
// has no real profile, so the sync init errors out and pops up).
|
|
1875
|
+
`--disable-features=AudioServiceOutOfProcess,VizDisplayCompositor,TranslateUI,BlinkGenPropertyTrees,Translate,BackForwardCache,AcceptCHFrame,SafeBrowsing,HttpsFirstBalancedModeAutoEnable,site-per-process,PaintHolding,AccountConsistencyMirror,AccountConsistencyDice${disable_ad_tagging ? ',AdTagging' : ''}`,
|
|
1868
1876
|
'--disable-ipc-flooding-protection',
|
|
1869
1877
|
'--aggressive-cache-discard',
|
|
1870
1878
|
'--memory-pressure-off',
|
|
@@ -1874,7 +1882,16 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1874
1882
|
'--no-sandbox',
|
|
1875
1883
|
'--disable-setuid-sandbox',
|
|
1876
1884
|
'--disable-dev-shm-usage',
|
|
1877
|
-
|
|
1885
|
+
// --disable-sync is always-on (was previously dropped in --keep-open
|
|
1886
|
+
// mode, which let the sync subsystem init against our temp
|
|
1887
|
+
// userDataDir and pop the "Something went wrong when opening your
|
|
1888
|
+
// profile" dialog). Inspection during --keep-open doesn't need
|
|
1889
|
+
// sync; nothing in the scanner flow does.
|
|
1890
|
+
'--disable-sync',
|
|
1891
|
+
// Prevent the sign-in promo / account banner from appearing in
|
|
1892
|
+
// headful sessions. Same family of fixes as --disable-sync and the
|
|
1893
|
+
// AccountConsistency* features disabled above.
|
|
1894
|
+
'--allow-browser-signin=false',
|
|
1878
1895
|
'--mute-audio',
|
|
1879
1896
|
'--disable-translate',
|
|
1880
1897
|
'--window-size=1920,1080',
|
|
@@ -2100,6 +2117,30 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2100
2117
|
// Use Map to track domains and their resource types for --adblock-rules or --dry-run
|
|
2101
2118
|
const matchedDomains = (adblockRulesMode || siteConfig.adblock_rules || dryRunMode) ? new Map() : new Set();
|
|
2102
2119
|
|
|
2120
|
+
// Per-URL tracking of in-flight async nettools (dig/whois) handlers so we
|
|
2121
|
+
// can drain them BEFORE snapshotting matchedDomains into the result. The
|
|
2122
|
+
// previous fire-and-forget setImmediate pattern dropped late-completing
|
|
2123
|
+
// matches (handler resolved after formatRules had already run). Each
|
|
2124
|
+
// setImmediate-scheduled handler now registers a promise via
|
|
2125
|
+
// trackNetToolsHandler; drainPendingNetTools() awaits all of them with a
|
|
2126
|
+
// hard cap (TIMEOUTS.NETTOOLS_DRAIN_TIMEOUT) so a hung dig can't block.
|
|
2127
|
+
const pendingNetTools = [];
|
|
2128
|
+
const trackNetToolsHandler = (handlerFn) => {
|
|
2129
|
+
pendingNetTools.push(new Promise((resolve) => {
|
|
2130
|
+
setImmediate(async () => {
|
|
2131
|
+
try { await handlerFn(); } catch (_) { /* handler logs its own errors */ }
|
|
2132
|
+
finally { resolve(); }
|
|
2133
|
+
});
|
|
2134
|
+
}));
|
|
2135
|
+
};
|
|
2136
|
+
const drainPendingNetTools = async () => {
|
|
2137
|
+
if (pendingNetTools.length === 0) return;
|
|
2138
|
+
await Promise.race([
|
|
2139
|
+
Promise.all(pendingNetTools),
|
|
2140
|
+
fastTimeout(TIMEOUTS.NETTOOLS_DRAIN_TIMEOUT)
|
|
2141
|
+
]);
|
|
2142
|
+
};
|
|
2143
|
+
|
|
2103
2144
|
// Local domain dedup scoped to THIS processUrl call only
|
|
2104
2145
|
// Prevents cross-config contamination from the global domain cache
|
|
2105
2146
|
const localDetectedDomains = new Set();
|
|
@@ -3167,7 +3208,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3167
3208
|
currentUrl, getRootDomain, siteConfig, dumpUrls, matchedUrlsLogFile, forceDebug, fs,
|
|
3168
3209
|
ignoreDomains, matchesIgnoreDomain
|
|
3169
3210
|
});
|
|
3170
|
-
|
|
3211
|
+
trackNetToolsHandler(() => popupNetToolsHandler(checkedRootDomain, fullSubdomain));
|
|
3171
3212
|
} else {
|
|
3172
3213
|
// No nettools required — regex match alone counts.
|
|
3173
3214
|
addMatchedDomain(checkedRootDomain, resourceType, fullSubdomain);
|
|
@@ -3573,7 +3614,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3573
3614
|
|
|
3574
3615
|
// Execute nettools check asynchronously
|
|
3575
3616
|
const originalDomain = fullSubdomain;
|
|
3576
|
-
|
|
3617
|
+
trackNetToolsHandler(() => netToolsHandler(reqDomain, originalDomain));
|
|
3577
3618
|
}
|
|
3578
3619
|
if (forceDebug) {
|
|
3579
3620
|
console.log(formatLogMessage('debug', `${reqUrl} has nettools validation required - skipping immediate add`));
|
|
@@ -3688,7 +3729,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3688
3729
|
|
|
3689
3730
|
// Execute nettools check asynchronously
|
|
3690
3731
|
const originalDomain = fullSubdomain; // Use full subdomain for nettools
|
|
3691
|
-
|
|
3732
|
+
trackNetToolsHandler(() => netToolsHandler(reqDomain, originalDomain));
|
|
3692
3733
|
|
|
3693
3734
|
// Do NOT continue processing this request for immediate domain addition
|
|
3694
3735
|
// The nettools handler is responsible for adding the domain if validation passes
|
|
@@ -4237,13 +4278,22 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4237
4278
|
}
|
|
4238
4279
|
}
|
|
4239
4280
|
|
|
4240
|
-
const delayMs = DEFAULT_DELAY;
|
|
4281
|
+
const delayMs = siteConfig.delay || DEFAULT_DELAY;
|
|
4241
4282
|
|
|
4242
4283
|
// Optimized delays for Puppeteer 23.x performance
|
|
4243
4284
|
const isFastSite = timeout <= TIMEOUTS.FAST_SITE_THRESHOLD;
|
|
4244
4285
|
const networkIdleTime = TIMEOUTS.NETWORK_IDLE; // Balanced: 2s for reliable network detection
|
|
4245
4286
|
const networkIdleTimeout = Math.min(timeout / 2, TIMEOUTS.NETWORK_IDLE_MAX); // Balanced: 10s timeout
|
|
4246
|
-
|
|
4287
|
+
// Post-networkidle delay cap. Default (2s) keeps fast sites fast. Opt
|
|
4288
|
+
// in with `delay_uncapped: true` to honor the configured `delay` up to
|
|
4289
|
+
// half the per-URL timeout — useful for sites with setTimeout-deferred
|
|
4290
|
+
// lazy ad/tracker loaders (weather.com, cbssports.com class) where
|
|
4291
|
+
// late requests fire well past the 2s window. See also the per-URL
|
|
4292
|
+
// drainPendingNetTools() which awaits in-flight dig/whois handlers
|
|
4293
|
+
// before the matchedDomains snapshot regardless of this flag.
|
|
4294
|
+
const actualDelay = siteConfig.delay_uncapped === true
|
|
4295
|
+
? Math.min(delayMs, Math.floor(timeout / 2))
|
|
4296
|
+
: Math.min(delayMs, TIMEOUTS.NETWORK_IDLE);
|
|
4247
4297
|
|
|
4248
4298
|
// Build delay promise (networkIdle + delay + optional flowProxy delay)
|
|
4249
4299
|
const delayPromise = (async () => {
|
|
@@ -4625,7 +4675,8 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4625
4675
|
// Wait a moment for async nettools/searchstring operations to complete
|
|
4626
4676
|
// Use fast timeout helper for Puppeteer 22.x compatibility
|
|
4627
4677
|
await fastTimeout(TIMEOUTS.CURL_HANDLER_DELAY); // Wait for async operations
|
|
4628
|
-
|
|
4678
|
+
await drainPendingNetTools(); // Bounded wait for in-flight dig/whois (race fix)
|
|
4679
|
+
|
|
4629
4680
|
return { url: currentUrl, rules: [], success: true, dryRun: true, matchCount: dryRunResult.matchCount };
|
|
4630
4681
|
} else {
|
|
4631
4682
|
// Format rules using the output module
|
|
@@ -4639,6 +4690,12 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4639
4690
|
privoxyMode,
|
|
4640
4691
|
piholeMode
|
|
4641
4692
|
};
|
|
4693
|
+
// Drain pending dig/whois handlers BEFORE snapshotting matchedDomains.
|
|
4694
|
+
// Without this, late-completing async validations (request fired near
|
|
4695
|
+
// end of the delay window, dig still in flight) get orphaned — their
|
|
4696
|
+
// addMatchedDomain calls happen but the result has already been
|
|
4697
|
+
// returned. Bounded by TIMEOUTS.NETTOOLS_DRAIN_TIMEOUT.
|
|
4698
|
+
await drainPendingNetTools();
|
|
4642
4699
|
const formattedRules = formatRules(matchedDomains, siteConfig, globalOptions);
|
|
4643
4700
|
|
|
4644
4701
|
return {
|
|
@@ -4690,7 +4747,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4690
4747
|
};
|
|
4691
4748
|
}
|
|
4692
4749
|
|
|
4693
|
-
// For other errors, preserve any matches we found before the error
|
|
4750
|
+
// For other errors, preserve any matches we found before the error.
|
|
4751
|
+
// Drain pending nettools first so dig/whois handlers scheduled DURING
|
|
4752
|
+
// the failed navigation get a chance to add to matchedDomains before
|
|
4753
|
+
// the partial-success snapshot — same race as the success path.
|
|
4754
|
+
await drainPendingNetTools();
|
|
4694
4755
|
if (matchedDomains && (matchedDomains.size > 0 || (matchedDomains instanceof Map && matchedDomains.size > 0))) {
|
|
4695
4756
|
const globalOptions = {
|
|
4696
4757
|
localhostIP,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.3",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|
package/scripts/test-stealth.js
CHANGED
|
@@ -70,7 +70,7 @@ const TARGETS = [
|
|
|
70
70
|
extract: async (page) => {
|
|
71
71
|
return await page.evaluate(() => {
|
|
72
72
|
const cells = Array.from(document.querySelectorAll('td'));
|
|
73
|
-
const out = { passed: 0, failed: 0, warn: 0, total: 0, failures: [] };
|
|
73
|
+
const out = { passed: 0, failed: 0, warn: 0, total: 0, failures: [], warnings: [] };
|
|
74
74
|
for (const c of cells) {
|
|
75
75
|
const cls = c.className || '';
|
|
76
76
|
if (cls.includes('passed')) { out.passed++; out.total++; }
|
|
@@ -81,7 +81,14 @@ const TARGETS = [
|
|
|
81
81
|
const label = row?.querySelector('td')?.textContent?.trim() || '?';
|
|
82
82
|
out.failures.push(label);
|
|
83
83
|
}
|
|
84
|
-
else if (cls.includes('warn')) {
|
|
84
|
+
else if (cls.includes('warn')) {
|
|
85
|
+
out.warn++; out.total++;
|
|
86
|
+
// Capture warn-row labels too so a soft regression (cell moving
|
|
87
|
+
// passed -> warn) is debuggable without --headful.
|
|
88
|
+
const row = c.closest('tr');
|
|
89
|
+
const label = row?.querySelector('td')?.textContent?.trim() || '?';
|
|
90
|
+
out.warnings.push(label);
|
|
91
|
+
}
|
|
85
92
|
}
|
|
86
93
|
return out;
|
|
87
94
|
});
|
|
@@ -97,15 +104,29 @@ const TARGETS = [
|
|
|
97
104
|
await new Promise(r => setTimeout(r, 8000)); // give async tests time
|
|
98
105
|
return await page.evaluate(() => {
|
|
99
106
|
const text = document.body.innerText || '';
|
|
100
|
-
// CreepJS
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
107
|
+
// CreepJS's actual stealth-relevant outputs are in a "Headless"
|
|
108
|
+
// section as percentages (e.g. "67% headless", "40% stealth",
|
|
109
|
+
// "44% like headless"), not the "Trust Score" label the old
|
|
110
|
+
// regex expected. Engine identification comes from "chromium:
|
|
111
|
+
// true/false" in the same block. Lower headless % and higher
|
|
112
|
+
// stealth % are better for evasion.
|
|
113
|
+
const headlessMatch = text.match(/(\d+(?:\.\d+)?)\s*%\s+headless\b/i);
|
|
114
|
+
const likeHeadlessMatch = text.match(/(\d+(?:\.\d+)?)\s*%\s+like\s+headless/i);
|
|
115
|
+
const stealthMatch = text.match(/(\d+(?:\.\d+)?)\s*%\s+stealth\b/i);
|
|
116
|
+
const chromiumMatch = text.match(/chromium\s*:\s*(true|false)/i);
|
|
117
|
+
// FP ID is CreepJS's stable fingerprint hash — same value across
|
|
118
|
+
// reloads if the fingerprint is unchanged; lets you A/B before
|
|
119
|
+
// and after a spoof change.
|
|
120
|
+
const fpIdMatch = text.match(/FP\s*ID\s*:?\s*([0-9a-f]{16,})/i);
|
|
104
121
|
return {
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
122
|
+
headlessPct: headlessMatch ? parseFloat(headlessMatch[1]) : null,
|
|
123
|
+
likeHeadlessPct: likeHeadlessMatch ? parseFloat(likeHeadlessMatch[1]) : null,
|
|
124
|
+
stealthPct: stealthMatch ? parseFloat(stealthMatch[1]) : null,
|
|
125
|
+
isChromium: chromiumMatch ? chromiumMatch[1] === 'true' : null,
|
|
126
|
+
fpId: fpIdMatch ? fpIdMatch[1].slice(0, 16) : null,
|
|
127
|
+
// Larger excerpt (40 lines, up to 2KB) so a future UI rotation
|
|
128
|
+
// is debuggable from the output without --headful.
|
|
129
|
+
excerpt: text.split('\n').slice(0, 40).join('\n').slice(0, 2000)
|
|
109
130
|
};
|
|
110
131
|
});
|
|
111
132
|
}
|
|
@@ -165,10 +186,15 @@ function formatResult(target, result) {
|
|
|
165
186
|
if (result.failures.length) {
|
|
166
187
|
lines.push(` failure rows: ${result.failures.slice(0, 10).join(', ')}${result.failures.length > 10 ? ` ... +${result.failures.length - 10} more` : ''}`);
|
|
167
188
|
}
|
|
189
|
+
if (result.warnings && result.warnings.length) {
|
|
190
|
+
lines.push(` warn rows: ${result.warnings.slice(0, 10).join(', ')}${result.warnings.length > 10 ? ` ... +${result.warnings.length - 10} more` : ''}`);
|
|
191
|
+
}
|
|
168
192
|
} else if (target.name === 'creepjs') {
|
|
169
|
-
lines.push(`
|
|
170
|
-
lines.push(`
|
|
171
|
-
lines.push(`
|
|
193
|
+
lines.push(` FP ID: ${result.fpId ?? 'n/a'} (stable across reloads if fingerprint unchanged)`);
|
|
194
|
+
lines.push(` engine chromium: ${result.isChromium ?? 'n/a'}`);
|
|
195
|
+
lines.push(` headless score: ${result.headlessPct ?? 'n/a'}% (lower = better; 0% = real browser)`);
|
|
196
|
+
lines.push(` like-headless: ${result.likeHeadlessPct ?? 'n/a'}% (lower = better; soft headless signals)`);
|
|
197
|
+
lines.push(` stealth score: ${result.stealthPct ?? 'n/a'}% (lower = better; % likely to be using anti-detection tooling)`);
|
|
172
198
|
if (result.excerpt) lines.push(` excerpt:\n ${result.excerpt.split('\n').join('\n ')}`);
|
|
173
199
|
} else if (target.name === 'browserleaks') {
|
|
174
200
|
for (const [k, v] of Object.entries(result)) {
|