@fanboynz/network-scanner 2.0.65 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm-publish.yml +134 -10
- package/CHANGELOG.md +135 -0
- package/CLAUDE.md +18 -7
- package/README.md +12 -4
- package/lib/adblock-rust.js +23 -18
- package/lib/adblock.js +127 -82
- package/lib/browserexit.js +210 -200
- package/lib/browserhealth.js +84 -60
- package/lib/cdp.js +103 -81
- package/lib/clear_sitedata.js +61 -159
- package/lib/cloudflare.js +579 -409
- package/lib/colorize.js +29 -12
- package/lib/compare.js +16 -8
- package/lib/compress.js +2 -1
- package/lib/curl.js +287 -220
- package/lib/domain-cache.js +87 -40
- package/lib/dry-run.js +137 -194
- package/lib/fingerprint.js +20 -18
- package/lib/flowproxy.js +391 -188
- package/lib/ghost-cursor.js +8 -7
- package/lib/grep.js +248 -171
- package/lib/ignore_similar.js +70 -124
- package/lib/interaction.js +132 -235
- package/lib/nettools.js +309 -87
- package/lib/openvpn_vpn.js +12 -11
- package/lib/output.js +92 -59
- package/lib/post-processing.js +216 -162
- package/lib/proxy.js +105 -7
- package/lib/redirect.js +46 -30
- package/lib/referrer.js +158 -165
- package/lib/searchstring.js +290 -381
- package/lib/smart-cache.js +141 -91
- package/lib/socks-relay.js +267 -0
- package/lib/spawn-async.js +137 -0
- package/lib/validate_rules.js +188 -176
- package/lib/wireguard_vpn.js +111 -117
- package/nwss.js +872 -149
- package/package.json +6 -5
package/lib/proxy.js
CHANGED
|
@@ -18,6 +18,15 @@
|
|
|
18
18
|
*
|
|
19
19
|
* SOCKS5 with auth:
|
|
20
20
|
* "proxy": "socks5://user:pass@127.0.0.1:1080"
|
|
21
|
+
* Chromium itself cannot authenticate SOCKS5 (crbug.com/256785), so
|
|
22
|
+
* this module auto-starts an in-process no-auth SOCKS5 relay
|
|
23
|
+
* (lib/socks-relay.js) that does the upstream RFC 1929 auth. Chromium
|
|
24
|
+
* connects to the local relay (no auth — which it CAN do) and the
|
|
25
|
+
* relay tunnels to the authenticated upstream. Transparent: keep the
|
|
26
|
+
* socks5://user:pass@host form in config. Requires prepareSocksRelays()
|
|
27
|
+
* to be awaited once before the scan loop (nwss.js does this).
|
|
28
|
+
* NOTE: socks4 with auth is still unsupported (userId-only,
|
|
29
|
+
* near-extinct) — use socks5 or an authenticated HTTP proxy.
|
|
21
30
|
*
|
|
22
31
|
* HTTP proxy (corporate):
|
|
23
32
|
* "proxy": "http://proxy.corp.com:3128"
|
|
@@ -56,8 +65,9 @@
|
|
|
56
65
|
*/
|
|
57
66
|
|
|
58
67
|
const { formatLogMessage } = require('./colorize');
|
|
68
|
+
const { ensureRelay, getRelayPort } = require('./socks-relay');
|
|
59
69
|
|
|
60
|
-
const PROXY_MODULE_VERSION = '1.
|
|
70
|
+
const PROXY_MODULE_VERSION = '1.2.0';
|
|
61
71
|
const SUPPORTED_PROTOCOLS = ['socks5', 'socks4', 'http', 'https'];
|
|
62
72
|
|
|
63
73
|
const DEFAULT_PORTS = {
|
|
@@ -105,8 +115,12 @@ function parseProxyUrl(proxyUrl) {
|
|
|
105
115
|
if (!host) return null;
|
|
106
116
|
|
|
107
117
|
const port = parseInt(url.port, 10) || DEFAULT_PORTS[protocol] || 1080;
|
|
108
|
-
|
|
109
|
-
|
|
118
|
+
// decodeURIComponent throws URIError on a literal '%' that isn't a valid
|
|
119
|
+
// escape (e.g. a password containing '%'). Fall back to the raw value so
|
|
120
|
+
// an otherwise-valid proxy isn't rejected as "Invalid proxy URL".
|
|
121
|
+
const safeDecode = (v) => { try { return decodeURIComponent(v); } catch (_) { return v; } };
|
|
122
|
+
const username = url.username ? safeDecode(url.username) : null;
|
|
123
|
+
const password = url.password ? safeDecode(url.password) : null;
|
|
110
124
|
|
|
111
125
|
return { protocol, host, port, username, password };
|
|
112
126
|
} catch (_) {
|
|
@@ -124,6 +138,41 @@ function needsProxy(siteConfig) {
|
|
|
124
138
|
return !!getConfiguredProxy(siteConfig);
|
|
125
139
|
}
|
|
126
140
|
|
|
141
|
+
/**
|
|
142
|
+
* Pre-start local no-auth SOCKS5 relays for every distinct authenticated
|
|
143
|
+
* SOCKS5 upstream across the given site configs. Must be awaited ONCE
|
|
144
|
+
* before the scan loop — getProxyArgs() then does a pure sync lookup of
|
|
145
|
+
* the relay port, so the fragile per-batch browser-launch path stays
|
|
146
|
+
* synchronous.
|
|
147
|
+
*
|
|
148
|
+
* @param {object[]} siteConfigs
|
|
149
|
+
* @param {boolean} forceDebug
|
|
150
|
+
* @returns {Promise<number>} count of relays started
|
|
151
|
+
*/
|
|
152
|
+
async function prepareSocksRelays(siteConfigs, forceDebug = false) {
|
|
153
|
+
let started = 0;
|
|
154
|
+
const seen = new Set();
|
|
155
|
+
for (const cfg of (siteConfigs || [])) {
|
|
156
|
+
const url = getConfiguredProxy(cfg);
|
|
157
|
+
if (!url) continue;
|
|
158
|
+
const parsed = parseProxyUrl(url);
|
|
159
|
+
// Only socks5 with credentials needs a relay. socks4-auth stays
|
|
160
|
+
// unsupported (near-extinct, userId-only); http/https auth works
|
|
161
|
+
// natively via page.authenticate().
|
|
162
|
+
if (!parsed || parsed.protocol !== 'socks5' || !parsed.username) continue;
|
|
163
|
+
const key = `${parsed.host}:${parsed.port}:${parsed.username}`;
|
|
164
|
+
if (seen.has(key)) continue;
|
|
165
|
+
seen.add(key);
|
|
166
|
+
try {
|
|
167
|
+
await ensureRelay(parsed, forceDebug);
|
|
168
|
+
started++;
|
|
169
|
+
} catch (e) {
|
|
170
|
+
console.warn(formatLogMessage('proxy', `Failed to start SOCKS5 auth relay for ${parsed.host}:${parsed.port}: ${e.message}`));
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
return started;
|
|
174
|
+
}
|
|
175
|
+
|
|
127
176
|
/**
|
|
128
177
|
* Returns Chromium launch arguments for the configured proxy.
|
|
129
178
|
*
|
|
@@ -141,15 +190,45 @@ function getProxyArgs(siteConfig, forceDebug = false) {
|
|
|
141
190
|
return [];
|
|
142
191
|
}
|
|
143
192
|
|
|
193
|
+
// Authenticated SOCKS5: Chromium can't auth SOCKS, so point it at the
|
|
194
|
+
// local no-auth relay (started upfront by prepareSocksRelays) which does
|
|
195
|
+
// the upstream auth. Credentials never reach Chromium. The relay speaks
|
|
196
|
+
// SOCKS5 and forwards domain addresses, so the remote-DNS rule below
|
|
197
|
+
// still applies correctly to the localhost hop.
|
|
198
|
+
let effectiveHost = parsed.host;
|
|
199
|
+
let effectivePort = parsed.port;
|
|
200
|
+
let effectiveProto = parsed.protocol;
|
|
201
|
+
if (parsed.protocol === 'socks5' && parsed.username) {
|
|
202
|
+
const relayPort = getRelayPort(parsed);
|
|
203
|
+
if (relayPort) {
|
|
204
|
+
effectiveHost = '127.0.0.1';
|
|
205
|
+
effectivePort = relayPort;
|
|
206
|
+
const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
|
|
207
|
+
if (debug) {
|
|
208
|
+
console.log(formatLogMessage('proxy', `SOCKS5 auth via local relay 127.0.0.1:${relayPort} -> ${parsed.host}:${parsed.port}`));
|
|
209
|
+
}
|
|
210
|
+
} else {
|
|
211
|
+
// prepareSocksRelays should have started this; defensive only.
|
|
212
|
+
console.warn(formatLogMessage('proxy', `No SOCKS5 auth relay for ${parsed.host}:${parsed.port} — call prepareSocksRelays() before the scan. Connection will fail (Chromium can't auth SOCKS).`));
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
144
216
|
const args = [
|
|
145
|
-
`--proxy-server=${
|
|
217
|
+
`--proxy-server=${effectiveProto}://${effectiveHost}:${effectivePort}`
|
|
146
218
|
];
|
|
147
219
|
|
|
148
|
-
// Remote DNS:
|
|
149
|
-
//
|
|
220
|
+
// Remote DNS: force proxy-side hostname resolution (prevents DNS leaks).
|
|
221
|
+
// SOCKS5 only — it can carry a hostname to the proxy for remote
|
|
222
|
+
// resolution. SOCKS4 cannot (the protocol only accepts an IPv4 address;
|
|
223
|
+
// resolution must happen client-side), so applying MAP * ~NOTFOUND there
|
|
224
|
+
// makes Chromium's local resolver fail with nothing able to resolve the
|
|
225
|
+
// hostname — every request breaks. HTTP/HTTPS proxies resolve remotely
|
|
226
|
+
// by default and need no rule.
|
|
150
227
|
const remoteDns = siteConfig.proxy_remote_dns ?? siteConfig.socks5_remote_dns;
|
|
151
|
-
if (
|
|
228
|
+
if (parsed.protocol === 'socks5' && remoteDns !== false) {
|
|
152
229
|
args.push('--host-resolver-rules=MAP * ~NOTFOUND , EXCLUDE 127.0.0.1');
|
|
230
|
+
} else if (parsed.protocol === 'socks4' && remoteDns === true) {
|
|
231
|
+
console.warn(formatLogMessage('proxy', `proxy_remote_dns ignored: SOCKS4 cannot do proxy-side DNS resolution (use SOCKS5)`));
|
|
153
232
|
}
|
|
154
233
|
|
|
155
234
|
// Bypass list: domains that skip the proxy
|
|
@@ -182,6 +261,20 @@ async function applyProxyAuth(page, siteConfig, forceDebug = false) {
|
|
|
182
261
|
const parsed = parseProxyUrl(proxyUrl);
|
|
183
262
|
if (!parsed || !parsed.username) return false;
|
|
184
263
|
|
|
264
|
+
// Chromium can't authenticate SOCKS proxies, and page.authenticate() is
|
|
265
|
+
// HTTP-407-only. SOCKS5+creds is handled out-of-band by the local
|
|
266
|
+
// no-auth relay (prepareSocksRelays + getProxyArgs rewrite) — Chromium
|
|
267
|
+
// talks no-auth to 127.0.0.1, so there's nothing for page.authenticate
|
|
268
|
+
// to do here; return quietly. SOCKS4 auth (userId-only, near-extinct)
|
|
269
|
+
// stays genuinely unsupported.
|
|
270
|
+
if (parsed.protocol === 'socks5') {
|
|
271
|
+
return false; // relay handles upstream auth
|
|
272
|
+
}
|
|
273
|
+
if (parsed.protocol === 'socks4') {
|
|
274
|
+
console.warn(formatLogMessage('proxy', `SOCKS4 proxy auth is unsupported (use SOCKS5, which is auto-relayed, or an authenticated HTTP proxy).`));
|
|
275
|
+
return false;
|
|
276
|
+
}
|
|
277
|
+
|
|
185
278
|
try {
|
|
186
279
|
await page.authenticate({
|
|
187
280
|
username: parsed.username,
|
|
@@ -265,9 +358,14 @@ function getModuleInfo() {
|
|
|
265
358
|
return { version: PROXY_MODULE_VERSION, name: 'Proxy Handler' };
|
|
266
359
|
}
|
|
267
360
|
|
|
361
|
+
// Re-export relay teardown so nwss.js cleanup paths can close listeners.
|
|
362
|
+
const { closeAllRelays: closeAllSocksRelays } = require('./socks-relay');
|
|
363
|
+
|
|
268
364
|
module.exports = {
|
|
269
365
|
parseProxyUrl,
|
|
270
366
|
needsProxy,
|
|
367
|
+
prepareSocksRelays,
|
|
368
|
+
closeAllSocksRelays,
|
|
271
369
|
getProxyArgs,
|
|
272
370
|
applyProxyAuth,
|
|
273
371
|
testProxy,
|
package/lib/redirect.js
CHANGED
|
@@ -165,8 +165,14 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
|
|
|
165
165
|
// Inject JavaScript redirect detection
|
|
166
166
|
await jsRedirectDetector();
|
|
167
167
|
|
|
168
|
-
if (forceDebug
|
|
169
|
-
|
|
168
|
+
if (forceDebug) {
|
|
169
|
+
// Avoid Object.keys allocation just to check emptiness — a for...in
|
|
170
|
+
// early-exit on the first own key is enough.
|
|
171
|
+
let hasOpts = false;
|
|
172
|
+
for (const _k in gotoOptions) { hasOpts = true; break; }
|
|
173
|
+
if (hasOpts) {
|
|
174
|
+
console.log(formatLogMessage('debug', `Using goto options: ${JSON.stringify(gotoOptions)}`));
|
|
175
|
+
}
|
|
170
176
|
}
|
|
171
177
|
|
|
172
178
|
// Initial navigation. Puppeteer's page.goto returns the response for the
|
|
@@ -184,7 +190,7 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
|
|
|
184
190
|
} catch (_) { /* response disposed or detached — fine, stays null */ }
|
|
185
191
|
}
|
|
186
192
|
|
|
187
|
-
if (response && response.url() !== currentUrl) {
|
|
193
|
+
if (response && response.url() !== currentUrl && !response.url().startsWith('chrome-error://')) {
|
|
188
194
|
// Check redirect limit before adding
|
|
189
195
|
if (redirectChain.length >= maxRedirects) {
|
|
190
196
|
if (forceDebug) {
|
|
@@ -192,12 +198,12 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
|
|
|
192
198
|
}
|
|
193
199
|
finalUrl = currentUrl; // Keep original URL
|
|
194
200
|
} else {
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
201
|
+
finalUrl = response.url();
|
|
202
|
+
redirected = true;
|
|
203
|
+
if (!redirectChain.includes(finalUrl)) redirectChain.push(finalUrl);
|
|
198
204
|
}
|
|
199
205
|
if (forceDebug) {
|
|
200
|
-
console.log(formatLogMessage('debug', `HTTP redirect detected: ${currentUrl}
|
|
206
|
+
console.log(formatLogMessage('debug', `HTTP redirect detected: ${currentUrl} -> ${finalUrl}`));
|
|
201
207
|
}
|
|
202
208
|
}
|
|
203
209
|
|
|
@@ -223,9 +229,11 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
|
|
|
223
229
|
};
|
|
224
230
|
});
|
|
225
231
|
|
|
226
|
-
// Check if URL changed (either through JS redirect or automatic redirect)
|
|
232
|
+
// Check if URL changed (either through JS redirect or automatic redirect).
|
|
233
|
+
// Skip chrome-error://* — it's Puppeteer's landing page on DNS/connection
|
|
234
|
+
// failure and adding it to the chain produces bogus intermediate hops.
|
|
227
235
|
const currentPageUrl = page.url();
|
|
228
|
-
if (currentPageUrl && currentPageUrl !== finalUrl && !redirectChain.includes(currentPageUrl)) {
|
|
236
|
+
if (currentPageUrl && currentPageUrl !== finalUrl && !currentPageUrl.startsWith('chrome-error://') && !redirectChain.includes(currentPageUrl)) {
|
|
229
237
|
// Check redirect limit before adding
|
|
230
238
|
if (redirectChain.length >= maxRedirects) {
|
|
231
239
|
if (forceDebug) {
|
|
@@ -275,21 +283,23 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
|
|
|
275
283
|
await detectCommonJSRedirects(page, forceDebug, formatLogMessage);
|
|
276
284
|
}
|
|
277
285
|
|
|
278
|
-
// Final URL check
|
|
286
|
+
// Final URL check. Same chrome-error://* skip as the earlier branches —
|
|
287
|
+
// a navigation that ended in a chrome-error landing shouldn't be treated
|
|
288
|
+
// as the "final" URL of a successful redirect chain.
|
|
279
289
|
const finalPageUrl = page.url();
|
|
280
|
-
if (finalPageUrl && finalPageUrl !== finalUrl) {
|
|
290
|
+
if (finalPageUrl && finalPageUrl !== finalUrl && !finalPageUrl.startsWith('chrome-error://')) {
|
|
281
291
|
// Check redirect limit before final update
|
|
282
292
|
if (redirectChain.length >= maxRedirects) {
|
|
283
293
|
if (forceDebug) {
|
|
284
294
|
console.log(formatLogMessage('debug', `Maximum redirects (${maxRedirects}) reached, keeping current finalUrl`));
|
|
285
295
|
}
|
|
286
296
|
} else {
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
297
|
+
finalUrl = finalPageUrl;
|
|
298
|
+
redirected = true;
|
|
299
|
+
if (!redirectChain.includes(finalUrl)) {
|
|
300
|
+
redirectChain.push(finalUrl);
|
|
301
|
+
}
|
|
291
302
|
}
|
|
292
|
-
}
|
|
293
303
|
}
|
|
294
304
|
|
|
295
305
|
} finally {
|
|
@@ -298,21 +308,19 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
|
|
|
298
308
|
|
|
299
309
|
// Log redirect summary
|
|
300
310
|
if (redirected && forceDebug) {
|
|
301
|
-
console.log(formatLogMessage('debug', `Redirect chain: ${redirectChain.join('
|
|
311
|
+
console.log(formatLogMessage('debug', `Redirect chain: ${redirectChain.join(' -> ')}`));
|
|
302
312
|
}
|
|
303
313
|
|
|
304
|
-
// Extract redirect domains
|
|
305
|
-
|
|
314
|
+
// Extract intermediate redirect domains (exclude the final entry). Single
|
|
315
|
+
// loop instead of slice().map().filter() — three array allocations down to
|
|
316
|
+
// one push-loop. redirectChain is bounded at maxRedirects (default 10).
|
|
317
|
+
const redirectDomains = [];
|
|
306
318
|
if (redirected && redirectChain.length > 1) {
|
|
307
|
-
|
|
308
|
-
const intermediateDomains = redirectChain.slice(0, -1).map(url => {
|
|
319
|
+
for (let i = 0; i < redirectChain.length - 1; i++) {
|
|
309
320
|
try {
|
|
310
|
-
|
|
311
|
-
} catch {
|
|
312
|
-
|
|
313
|
-
}
|
|
314
|
-
}).filter(Boolean);
|
|
315
|
-
redirectDomains = intermediateDomains;
|
|
321
|
+
redirectDomains.push(new URL(redirectChain[i]).hostname);
|
|
322
|
+
} catch (_) { /* skip malformed entries */ }
|
|
323
|
+
}
|
|
316
324
|
}
|
|
317
325
|
|
|
318
326
|
return { finalUrl, redirected, redirectChain, originalUrl: currentUrl, redirectDomains, httpStatus, cfRay };
|
|
@@ -410,13 +418,21 @@ async function handleRedirectTimeout(page, originalUrl, error, safeGetDomain, fo
|
|
|
410
418
|
|
|
411
419
|
try {
|
|
412
420
|
const currentPageUrl = page.url();
|
|
413
|
-
|
|
421
|
+
// Skip chrome-error://* the same way navigateWithRedirectHandling does:
|
|
422
|
+
// a DNS/connection-failure landing isn't a "partial redirect recovery",
|
|
423
|
+
// and safeGetDomain('chrome-error://chromewebdata/') returns
|
|
424
|
+
// 'chromewebdata', which would otherwise differ from the original
|
|
425
|
+
// domain and falsely report success here.
|
|
426
|
+
if (currentPageUrl
|
|
427
|
+
&& currentPageUrl !== 'about:blank'
|
|
428
|
+
&& !currentPageUrl.startsWith('chrome-error://')
|
|
429
|
+
&& currentPageUrl !== originalUrl) {
|
|
414
430
|
const originalDomain = safeGetDomain(originalUrl);
|
|
415
431
|
const currentDomain = safeGetDomain(currentPageUrl);
|
|
416
|
-
|
|
432
|
+
|
|
417
433
|
if (originalDomain !== currentDomain) {
|
|
418
434
|
if (forceDebug) {
|
|
419
|
-
console.log(formatLogMessage('debug', `Partial redirect timeout recovered: ${originalDomain}
|
|
435
|
+
console.log(formatLogMessage('debug', `Partial redirect timeout recovered: ${originalDomain} -> ${currentDomain}`));
|
|
420
436
|
}
|
|
421
437
|
return { success: true, finalUrl: currentPageUrl, redirected: true };
|
|
422
438
|
}
|