@fanboynz/network-scanner 2.0.65 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/proxy.js CHANGED
@@ -18,6 +18,15 @@
18
18
  *
19
19
  * SOCKS5 with auth:
20
20
  * "proxy": "socks5://user:pass@127.0.0.1:1080"
21
+ * Chromium itself cannot authenticate SOCKS5 (crbug.com/256785), so
22
+ * this module auto-starts an in-process no-auth SOCKS5 relay
23
+ * (lib/socks-relay.js) that does the upstream RFC 1929 auth. Chromium
24
+ * connects to the local relay (no auth — which it CAN do) and the
25
+ * relay tunnels to the authenticated upstream. Transparent: keep the
26
+ * socks5://user:pass@host form in config. Requires prepareSocksRelays()
27
+ * to be awaited once before the scan loop (nwss.js does this).
28
+ * NOTE: socks4 with auth is still unsupported (userId-only,
29
+ * near-extinct) — use socks5 or an authenticated HTTP proxy.
21
30
  *
22
31
  * HTTP proxy (corporate):
23
32
  * "proxy": "http://proxy.corp.com:3128"
@@ -56,8 +65,9 @@
56
65
  */
57
66
 
58
67
  const { formatLogMessage } = require('./colorize');
68
+ const { ensureRelay, getRelayPort } = require('./socks-relay');
59
69
 
60
- const PROXY_MODULE_VERSION = '1.1.0';
70
+ const PROXY_MODULE_VERSION = '1.2.0';
61
71
  const SUPPORTED_PROTOCOLS = ['socks5', 'socks4', 'http', 'https'];
62
72
 
63
73
  const DEFAULT_PORTS = {
@@ -105,8 +115,12 @@ function parseProxyUrl(proxyUrl) {
105
115
  if (!host) return null;
106
116
 
107
117
  const port = parseInt(url.port, 10) || DEFAULT_PORTS[protocol] || 1080;
108
- const username = url.username ? decodeURIComponent(url.username) : null;
109
- const password = url.password ? decodeURIComponent(url.password) : null;
118
+ // decodeURIComponent throws URIError on a literal '%' that isn't a valid
119
+ // escape (e.g. a password containing '%'). Fall back to the raw value so
120
+ // an otherwise-valid proxy isn't rejected as "Invalid proxy URL".
121
+ const safeDecode = (v) => { try { return decodeURIComponent(v); } catch (_) { return v; } };
122
+ const username = url.username ? safeDecode(url.username) : null;
123
+ const password = url.password ? safeDecode(url.password) : null;
110
124
 
111
125
  return { protocol, host, port, username, password };
112
126
  } catch (_) {
@@ -124,6 +138,41 @@ function needsProxy(siteConfig) {
124
138
  return !!getConfiguredProxy(siteConfig);
125
139
  }
126
140
 
141
+ /**
142
+ * Pre-start local no-auth SOCKS5 relays for every distinct authenticated
143
+ * SOCKS5 upstream across the given site configs. Must be awaited ONCE
144
+ * before the scan loop — getProxyArgs() then does a pure sync lookup of
145
+ * the relay port, so the fragile per-batch browser-launch path stays
146
+ * synchronous.
147
+ *
148
+ * @param {object[]} siteConfigs
149
+ * @param {boolean} forceDebug
150
+ * @returns {Promise<number>} count of relays started
151
+ */
152
+ async function prepareSocksRelays(siteConfigs, forceDebug = false) {
153
+ let started = 0;
154
+ const seen = new Set();
155
+ for (const cfg of (siteConfigs || [])) {
156
+ const url = getConfiguredProxy(cfg);
157
+ if (!url) continue;
158
+ const parsed = parseProxyUrl(url);
159
+ // Only socks5 with credentials needs a relay. socks4-auth stays
160
+ // unsupported (near-extinct, userId-only); http/https auth works
161
+ // natively via page.authenticate().
162
+ if (!parsed || parsed.protocol !== 'socks5' || !parsed.username) continue;
163
+ const key = `${parsed.host}:${parsed.port}:${parsed.username}`;
164
+ if (seen.has(key)) continue;
165
+ seen.add(key);
166
+ try {
167
+ await ensureRelay(parsed, forceDebug);
168
+ started++;
169
+ } catch (e) {
170
+ console.warn(formatLogMessage('proxy', `Failed to start SOCKS5 auth relay for ${parsed.host}:${parsed.port}: ${e.message}`));
171
+ }
172
+ }
173
+ return started;
174
+ }
175
+
127
176
  /**
128
177
  * Returns Chromium launch arguments for the configured proxy.
129
178
  *
@@ -141,15 +190,45 @@ function getProxyArgs(siteConfig, forceDebug = false) {
141
190
  return [];
142
191
  }
143
192
 
193
+ // Authenticated SOCKS5: Chromium can't auth SOCKS, so point it at the
194
+ // local no-auth relay (started upfront by prepareSocksRelays) which does
195
+ // the upstream auth. Credentials never reach Chromium. The relay speaks
196
+ // SOCKS5 and forwards domain addresses, so the remote-DNS rule below
197
+ // still applies correctly to the localhost hop.
198
+ let effectiveHost = parsed.host;
199
+ let effectivePort = parsed.port;
200
+ let effectiveProto = parsed.protocol;
201
+ if (parsed.protocol === 'socks5' && parsed.username) {
202
+ const relayPort = getRelayPort(parsed);
203
+ if (relayPort) {
204
+ effectiveHost = '127.0.0.1';
205
+ effectivePort = relayPort;
206
+ const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
207
+ if (debug) {
208
+ console.log(formatLogMessage('proxy', `SOCKS5 auth via local relay 127.0.0.1:${relayPort} -> ${parsed.host}:${parsed.port}`));
209
+ }
210
+ } else {
211
+ // prepareSocksRelays should have started this; defensive only.
212
+ console.warn(formatLogMessage('proxy', `No SOCKS5 auth relay for ${parsed.host}:${parsed.port} — call prepareSocksRelays() before the scan. Connection will fail (Chromium can't auth SOCKS).`));
213
+ }
214
+ }
215
+
144
216
  const args = [
145
- `--proxy-server=${parsed.protocol}://${parsed.host}:${parsed.port}`
217
+ `--proxy-server=${effectiveProto}://${effectiveHost}:${effectivePort}`
146
218
  ];
147
219
 
148
- // Remote DNS: resolve hostnames through the proxy (prevents DNS leaks)
149
- // Only meaningful for SOCKS proxies; HTTP proxies resolve remotely by default
220
+ // Remote DNS: force proxy-side hostname resolution (prevents DNS leaks).
221
+ // SOCKS5 only it can carry a hostname to the proxy for remote
222
+ // resolution. SOCKS4 cannot (the protocol only accepts an IPv4 address;
223
+ // resolution must happen client-side), so applying MAP * ~NOTFOUND there
224
+ // makes Chromium's local resolver fail with nothing able to resolve the
225
+ // hostname — every request breaks. HTTP/HTTPS proxies resolve remotely
226
+ // by default and need no rule.
150
227
  const remoteDns = siteConfig.proxy_remote_dns ?? siteConfig.socks5_remote_dns;
151
- if ((parsed.protocol === 'socks5' || parsed.protocol === 'socks4') && remoteDns !== false) {
228
+ if (parsed.protocol === 'socks5' && remoteDns !== false) {
152
229
  args.push('--host-resolver-rules=MAP * ~NOTFOUND , EXCLUDE 127.0.0.1');
230
+ } else if (parsed.protocol === 'socks4' && remoteDns === true) {
231
+ console.warn(formatLogMessage('proxy', `proxy_remote_dns ignored: SOCKS4 cannot do proxy-side DNS resolution (use SOCKS5)`));
153
232
  }
154
233
 
155
234
  // Bypass list: domains that skip the proxy
@@ -182,6 +261,20 @@ async function applyProxyAuth(page, siteConfig, forceDebug = false) {
182
261
  const parsed = parseProxyUrl(proxyUrl);
183
262
  if (!parsed || !parsed.username) return false;
184
263
 
264
+ // Chromium can't authenticate SOCKS proxies, and page.authenticate() is
265
+ // HTTP-407-only. SOCKS5+creds is handled out-of-band by the local
266
+ // no-auth relay (prepareSocksRelays + getProxyArgs rewrite) — Chromium
267
+ // talks no-auth to 127.0.0.1, so there's nothing for page.authenticate
268
+ // to do here; return quietly. SOCKS4 auth (userId-only, near-extinct)
269
+ // stays genuinely unsupported.
270
+ if (parsed.protocol === 'socks5') {
271
+ return false; // relay handles upstream auth
272
+ }
273
+ if (parsed.protocol === 'socks4') {
274
+ console.warn(formatLogMessage('proxy', `SOCKS4 proxy auth is unsupported (use SOCKS5, which is auto-relayed, or an authenticated HTTP proxy).`));
275
+ return false;
276
+ }
277
+
185
278
  try {
186
279
  await page.authenticate({
187
280
  username: parsed.username,
@@ -265,9 +358,14 @@ function getModuleInfo() {
265
358
  return { version: PROXY_MODULE_VERSION, name: 'Proxy Handler' };
266
359
  }
267
360
 
361
+ // Re-export relay teardown so nwss.js cleanup paths can close listeners.
362
+ const { closeAllRelays: closeAllSocksRelays } = require('./socks-relay');
363
+
268
364
  module.exports = {
269
365
  parseProxyUrl,
270
366
  needsProxy,
367
+ prepareSocksRelays,
368
+ closeAllSocksRelays,
271
369
  getProxyArgs,
272
370
  applyProxyAuth,
273
371
  testProxy,
package/lib/redirect.js CHANGED
@@ -165,8 +165,14 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
165
165
  // Inject JavaScript redirect detection
166
166
  await jsRedirectDetector();
167
167
 
168
- if (forceDebug && Object.keys(gotoOptions).length > 0) {
169
- console.log(formatLogMessage('debug', `Using goto options: ${JSON.stringify(gotoOptions)}`));
168
+ if (forceDebug) {
169
+ // Avoid Object.keys allocation just to check emptiness — a for...in
170
+ // early-exit on the first own key is enough.
171
+ let hasOpts = false;
172
+ for (const _k in gotoOptions) { hasOpts = true; break; }
173
+ if (hasOpts) {
174
+ console.log(formatLogMessage('debug', `Using goto options: ${JSON.stringify(gotoOptions)}`));
175
+ }
170
176
  }
171
177
 
172
178
  // Initial navigation. Puppeteer's page.goto returns the response for the
@@ -184,7 +190,7 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
184
190
  } catch (_) { /* response disposed or detached — fine, stays null */ }
185
191
  }
186
192
 
187
- if (response && response.url() !== currentUrl) {
193
+ if (response && response.url() !== currentUrl && !response.url().startsWith('chrome-error://')) {
188
194
  // Check redirect limit before adding
189
195
  if (redirectChain.length >= maxRedirects) {
190
196
  if (forceDebug) {
@@ -192,12 +198,12 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
192
198
  }
193
199
  finalUrl = currentUrl; // Keep original URL
194
200
  } else {
195
- finalUrl = response.url();
196
- redirected = true;
197
- if (!redirectChain.includes(finalUrl)) redirectChain.push(finalUrl);
201
+ finalUrl = response.url();
202
+ redirected = true;
203
+ if (!redirectChain.includes(finalUrl)) redirectChain.push(finalUrl);
198
204
  }
199
205
  if (forceDebug) {
200
- console.log(formatLogMessage('debug', `HTTP redirect detected: ${currentUrl} ? ${finalUrl}`));
206
+ console.log(formatLogMessage('debug', `HTTP redirect detected: ${currentUrl} -> ${finalUrl}`));
201
207
  }
202
208
  }
203
209
 
@@ -223,9 +229,11 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
223
229
  };
224
230
  });
225
231
 
226
- // Check if URL changed (either through JS redirect or automatic redirect)
232
+ // Check if URL changed (either through JS redirect or automatic redirect).
233
+ // Skip chrome-error://* — it's Puppeteer's landing page on DNS/connection
234
+ // failure and adding it to the chain produces bogus intermediate hops.
227
235
  const currentPageUrl = page.url();
228
- if (currentPageUrl && currentPageUrl !== finalUrl && !redirectChain.includes(currentPageUrl)) {
236
+ if (currentPageUrl && currentPageUrl !== finalUrl && !currentPageUrl.startsWith('chrome-error://') && !redirectChain.includes(currentPageUrl)) {
229
237
  // Check redirect limit before adding
230
238
  if (redirectChain.length >= maxRedirects) {
231
239
  if (forceDebug) {
@@ -275,21 +283,23 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
275
283
  await detectCommonJSRedirects(page, forceDebug, formatLogMessage);
276
284
  }
277
285
 
278
- // Final URL check
286
+ // Final URL check. Same chrome-error://* skip as the earlier branches —
287
+ // a navigation that ended in a chrome-error landing shouldn't be treated
288
+ // as the "final" URL of a successful redirect chain.
279
289
  const finalPageUrl = page.url();
280
- if (finalPageUrl && finalPageUrl !== finalUrl) {
290
+ if (finalPageUrl && finalPageUrl !== finalUrl && !finalPageUrl.startsWith('chrome-error://')) {
281
291
  // Check redirect limit before final update
282
292
  if (redirectChain.length >= maxRedirects) {
283
293
  if (forceDebug) {
284
294
  console.log(formatLogMessage('debug', `Maximum redirects (${maxRedirects}) reached, keeping current finalUrl`));
285
295
  }
286
296
  } else {
287
- finalUrl = finalPageUrl;
288
- redirected = true;
289
- if (!redirectChain.includes(finalUrl)) {
290
- redirectChain.push(finalUrl);
297
+ finalUrl = finalPageUrl;
298
+ redirected = true;
299
+ if (!redirectChain.includes(finalUrl)) {
300
+ redirectChain.push(finalUrl);
301
+ }
291
302
  }
292
- }
293
303
  }
294
304
 
295
305
  } finally {
@@ -298,21 +308,19 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
298
308
 
299
309
  // Log redirect summary
300
310
  if (redirected && forceDebug) {
301
- console.log(formatLogMessage('debug', `Redirect chain: ${redirectChain.join(' ? ')}`));
311
+ console.log(formatLogMessage('debug', `Redirect chain: ${redirectChain.join(' -> ')}`));
302
312
  }
303
313
 
304
- // Extract redirect domains to exclude from matching
305
- let redirectDomains = [];
314
+ // Extract intermediate redirect domains (exclude the final entry). Single
315
+ // loop instead of slice().map().filter() — three array allocations down to
316
+ // one push-loop. redirectChain is bounded at maxRedirects (default 10).
317
+ const redirectDomains = [];
306
318
  if (redirected && redirectChain.length > 1) {
307
- // Get all intermediate domains (exclude the final domain)
308
- const intermediateDomains = redirectChain.slice(0, -1).map(url => {
319
+ for (let i = 0; i < redirectChain.length - 1; i++) {
309
320
  try {
310
- return new URL(url).hostname;
311
- } catch {
312
- return null;
313
- }
314
- }).filter(Boolean);
315
- redirectDomains = intermediateDomains;
321
+ redirectDomains.push(new URL(redirectChain[i]).hostname);
322
+ } catch (_) { /* skip malformed entries */ }
323
+ }
316
324
  }
317
325
 
318
326
  return { finalUrl, redirected, redirectChain, originalUrl: currentUrl, redirectDomains, httpStatus, cfRay };
@@ -410,13 +418,21 @@ async function handleRedirectTimeout(page, originalUrl, error, safeGetDomain, fo
410
418
 
411
419
  try {
412
420
  const currentPageUrl = page.url();
413
- if (currentPageUrl && currentPageUrl !== 'about:blank' && currentPageUrl !== originalUrl) {
421
+ // Skip chrome-error://* the same way navigateWithRedirectHandling does:
422
+ // a DNS/connection-failure landing isn't a "partial redirect recovery",
423
+ // and safeGetDomain('chrome-error://chromewebdata/') returns
424
+ // 'chromewebdata', which would otherwise differ from the original
425
+ // domain and falsely report success here.
426
+ if (currentPageUrl
427
+ && currentPageUrl !== 'about:blank'
428
+ && !currentPageUrl.startsWith('chrome-error://')
429
+ && currentPageUrl !== originalUrl) {
414
430
  const originalDomain = safeGetDomain(originalUrl);
415
431
  const currentDomain = safeGetDomain(currentPageUrl);
416
-
432
+
417
433
  if (originalDomain !== currentDomain) {
418
434
  if (forceDebug) {
419
- console.log(formatLogMessage('debug', `Partial redirect timeout recovered: ${originalDomain} ? ${currentDomain}`));
435
+ console.log(formatLogMessage('debug', `Partial redirect timeout recovered: ${originalDomain} -> ${currentDomain}`));
420
436
  }
421
437
  return { success: true, finalUrl: currentPageUrl, redirected: true };
422
438
  }