@fanboynz/network-scanner 3.0.2 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/lib/adblock-rust.js +17 -4
- package/lib/adblock.js +92 -15
- package/lib/browserhealth.js +57 -28
- package/lib/cdp.js +68 -34
- package/lib/clear_sitedata.js +68 -20
- package/lib/compress.js +26 -58
- package/lib/curl.js +44 -22
- package/lib/domain-cache.js +8 -57
- package/lib/dry-run.js +9 -4
- package/lib/fingerprint.js +735 -114
- package/lib/interaction.js +262 -26
- package/lib/nettools.js +47 -76
- package/lib/openvpn_vpn.js +116 -35
- package/lib/searchstring.js +15 -237
- package/lib/validate_rules.js +285 -3
- package/lib/wireguard_vpn.js +64 -12
- package/nwss.js +529 -217
- package/package.json +1 -1
- package/regex-tool/index.html +321 -628
- package/scripts/test-stealth.js +39 -13
package/lib/openvpn_vpn.js
CHANGED
|
@@ -8,7 +8,8 @@
|
|
|
8
8
|
// VPN tunnel � not just the site that requested it. For isolated
|
|
9
9
|
// per-site VPN with concurrency, a SOCKS proxy approach is needed.
|
|
10
10
|
|
|
11
|
-
const { execSync, spawn } = require('child_process');
|
|
11
|
+
const { execSync, spawn, spawnSync } = require('child_process');
|
|
12
|
+
const crypto = require('crypto');
|
|
12
13
|
const fs = require('fs');
|
|
13
14
|
const path = require('path');
|
|
14
15
|
const { formatLogMessage, messageColors } = require('./colorize');
|
|
@@ -22,10 +23,17 @@ const OPENVPN_TAG = messageColors.processing('[openvpn]');
|
|
|
22
23
|
function getExternalIP(tunDevice) {
|
|
23
24
|
const services = ['https://api.ipify.org', 'https://ifconfig.me/ip', 'https://icanhazip.com'];
|
|
24
25
|
for (const service of services) {
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
// spawnSync with arg array (no shell) — tunDevice flows from
|
|
27
|
+
// findTunDevice (kernel-assigned /sys/class/net names, practically
|
|
28
|
+
// safe) but the pattern with execSync + interpolation was bad style.
|
|
29
|
+
// Match wireguard_vpn.js's spawn-array approach for consistency.
|
|
30
|
+
const args = ['-s', '-m', '5'];
|
|
31
|
+
if (tunDevice) args.push('--interface', tunDevice);
|
|
32
|
+
args.push(service);
|
|
33
|
+
const result = spawnSync('curl', args, { encoding: 'utf8', timeout: 8000 });
|
|
34
|
+
if (result.status === 0 && result.stdout) {
|
|
35
|
+
return result.stdout.trim();
|
|
36
|
+
}
|
|
29
37
|
}
|
|
30
38
|
return null;
|
|
31
39
|
}
|
|
@@ -127,7 +135,13 @@ function checkTunDevice() {
|
|
|
127
135
|
* Ensure temp directory exists with secure permissions
|
|
128
136
|
*/
|
|
129
137
|
function ensureTempDir() {
|
|
130
|
-
|
|
138
|
+
// 0o700 matches wireguard_vpn.js — other users on the box can't list
|
|
139
|
+
// the dir to discover which connection names exist. Individual files
|
|
140
|
+
// inside are already 0o600 so contents were safe before, but directory
|
|
141
|
+
// listing leaked the connection-name list. Note: mode is only applied
|
|
142
|
+
// on creation; an existing dir from a prior run with mode 0o755 keeps
|
|
143
|
+
// its mode until disconnectAll's rm -rf + next session creates fresh.
|
|
144
|
+
fs.mkdirSync(TEMP_DIR, { recursive: true, mode: 0o700 });
|
|
131
145
|
}
|
|
132
146
|
|
|
133
147
|
/**
|
|
@@ -169,8 +183,19 @@ function resolveConnectionName(vpnConfig) {
|
|
|
169
183
|
if (vpnConfig.config) {
|
|
170
184
|
return path.basename(vpnConfig.config, '.ovpn');
|
|
171
185
|
}
|
|
172
|
-
|
|
173
|
-
|
|
186
|
+
// Inline-only config without explicit name: derive a stable name from a
|
|
187
|
+
// hash of the content so connect and disconnect resolve to the same name
|
|
188
|
+
// across calls. The old `nwss-ovpn${activeConnections.size}` used the
|
|
189
|
+
// live Map size, so disconnect computed a DIFFERENT name than connect
|
|
190
|
+
// did (size had grown in between) and silently failed to find the
|
|
191
|
+
// entry — the connection would leak until disconnectAll. Same fix as
|
|
192
|
+
// wireguard_vpn.js commit 478a3ad.
|
|
193
|
+
if (vpnConfig.config_inline) {
|
|
194
|
+
const hash = crypto.createHash('sha1').update(vpnConfig.config_inline).digest('hex').slice(0, 8);
|
|
195
|
+
return `nwss-ovpn${hash}`;
|
|
196
|
+
}
|
|
197
|
+
// Last resort — should be unreachable if validation ran first.
|
|
198
|
+
return 'nwss-ovpn-unknown';
|
|
174
199
|
}
|
|
175
200
|
|
|
176
201
|
/**
|
|
@@ -359,14 +384,23 @@ async function startConnection(configPath, vpnConfig, forceDebug = false) {
|
|
|
359
384
|
return { success: true, connection: connectionName, tunDevice: existing.tunDevice, alreadyActive: true };
|
|
360
385
|
}
|
|
361
386
|
|
|
362
|
-
// Kill any stale processes from a previous run using this config
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
387
|
+
// Kill any stale processes from a previous run using this config.
|
|
388
|
+
// spawnSync with arg array (no shell) — connectionName flows from
|
|
389
|
+
// user config (vpnConfig.name). Naive shell interpolation here was
|
|
390
|
+
// vulnerable to '`; rm -rf ~; #' style injection.
|
|
391
|
+
const pkillRes = spawnSync('sudo', ['pkill', '-TERM', '-f', `openvpn.*${connectionName}`], {
|
|
392
|
+
encoding: 'utf8', timeout: 3000
|
|
393
|
+
});
|
|
394
|
+
// Only sleep if pkill actually matched + killed something (status 0).
|
|
395
|
+
// pkill exits 1 when no processes match — pre-execSync code threw on
|
|
396
|
+
// non-zero and the surrounding try/catch swallowed it, so the sleep
|
|
397
|
+
// never ran in the no-stale-process case. With spawnSync (no throw
|
|
398
|
+
// on non-zero), we have to gate this explicitly to preserve the same
|
|
399
|
+
// behavior — otherwise every fresh connect would waste 1s of
|
|
400
|
+
// blocking event loop.
|
|
401
|
+
if (pkillRes.status === 0) {
|
|
402
|
+
spawnSync('sleep', ['1'], { timeout: 3000 });
|
|
403
|
+
}
|
|
370
404
|
|
|
371
405
|
ensureTempDir();
|
|
372
406
|
const logPath = path.join(TEMP_DIR, `${connectionName}.log`);
|
|
@@ -411,7 +445,17 @@ async function startConnection(configPath, vpnConfig, forceDebug = false) {
|
|
|
411
445
|
if (!result.connected) {
|
|
412
446
|
// Kill the process if still running
|
|
413
447
|
try { child.kill('SIGTERM'); } catch {}
|
|
414
|
-
|
|
448
|
+
// C2: check exitCode/signalCode before SIGKILL — child.kill uses the
|
|
449
|
+
// captured PID, and Linux PIDs can be reused. If the process already
|
|
450
|
+
// exited in the 3s window, a reused PID could belong to an unrelated
|
|
451
|
+
// process that we'd then SIGKILL. unref() lets the event loop exit
|
|
452
|
+
// naturally instead of waiting on this background cleanup timer.
|
|
453
|
+
const sigkillTimer = setTimeout(() => {
|
|
454
|
+
if (child.exitCode === null && child.signalCode === null) {
|
|
455
|
+
try { child.kill('SIGKILL'); } catch {}
|
|
456
|
+
}
|
|
457
|
+
}, 3000);
|
|
458
|
+
if (typeof sigkillTimer.unref === 'function') sigkillTimer.unref();
|
|
415
459
|
return { success: false, connection: connectionName, error: result.error };
|
|
416
460
|
}
|
|
417
461
|
|
|
@@ -441,21 +485,19 @@ function stopConnection(connectionName, forceDebug = false) {
|
|
|
441
485
|
}
|
|
442
486
|
|
|
443
487
|
try {
|
|
444
|
-
// Find the actual openvpn PID (child of sudo) and kill it
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
}
|
|
488
|
+
// Find the actual openvpn PID (child of sudo) and kill it.
|
|
489
|
+
// spawnSync with arg array — connectionName from user config, naive
|
|
490
|
+
// shell interpolation was vulnerable to ';rm -rf ~' style injection.
|
|
491
|
+
spawnSync('sudo', ['pkill', '-TERM', '-f', `openvpn.*${connectionName}`], {
|
|
492
|
+
encoding: 'utf8', timeout: 3000
|
|
493
|
+
});
|
|
450
494
|
|
|
451
495
|
const killed = waitForProcessExit(info.pid, 5000);
|
|
452
496
|
if (!killed) {
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
} catch {}
|
|
458
|
-
}
|
|
497
|
+
spawnSync('sudo', ['pkill', '-9', '-f', `openvpn.*${connectionName}`], {
|
|
498
|
+
encoding: 'utf8', timeout: 3000
|
|
499
|
+
});
|
|
500
|
+
}
|
|
459
501
|
} catch (killErr) {
|
|
460
502
|
// Process may already be dead
|
|
461
503
|
if (forceDebug) {
|
|
@@ -532,13 +574,18 @@ function checkConnection(connectionName, testHost = '1.1.1.1', forceDebug = fals
|
|
|
532
574
|
return { connected: false, error: `OpenVPN process exited with code ${info.process.exitCode}` };
|
|
533
575
|
}
|
|
534
576
|
|
|
535
|
-
// Ping through the tunnel interface
|
|
577
|
+
// Ping through the tunnel interface. spawnSync with arg array —
|
|
578
|
+
// testHost flows from user config (vpnConfig.test_host), naive shell
|
|
579
|
+
// interpolation was vulnerable to '1.1.1.1; rm -rf ~' style injection.
|
|
536
580
|
try {
|
|
537
581
|
const iface = info.tunDevice || 'tun0';
|
|
538
|
-
const
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
)
|
|
582
|
+
const pingRes = spawnSync('ping', ['-c', '1', '-W', '5', '-I', iface, testHost], {
|
|
583
|
+
encoding: 'utf8', timeout: 8000
|
|
584
|
+
});
|
|
585
|
+
if (pingRes.status !== 0) {
|
|
586
|
+
throw new Error((pingRes.stderr || pingRes.stdout || '').split('\n')[0] || `ping failed for ${testHost}`);
|
|
587
|
+
}
|
|
588
|
+
const result = pingRes.stdout;
|
|
542
589
|
|
|
543
590
|
const latencyMatch = result.match(/time=([0-9.]+)\s*ms/);
|
|
544
591
|
const latencyMs = latencyMatch ? parseFloat(latencyMatch[1]) : null;
|
|
@@ -660,6 +707,23 @@ function validateOvpnConfig(ovpnConfig) {
|
|
|
660
707
|
result.warnings.push('Both "config" and "config_inline" provided; "config" takes precedence');
|
|
661
708
|
}
|
|
662
709
|
|
|
710
|
+
// D1: Validate user-provided connection 'name' to prevent path traversal
|
|
711
|
+
// via writeAuthFile/writeInlineConfig's path.join, AND to limit the blast
|
|
712
|
+
// radius of any future shell-interpolation that re-introduces ${name}.
|
|
713
|
+
// The current shell calls all use spawnSync with arg arrays (S1 fix), but
|
|
714
|
+
// the regex still blocks values that would confuse pkill's -f pattern
|
|
715
|
+
// match. Length 32 = generous for connection names but bounded.
|
|
716
|
+
// Mirror of wireguard_vpn.js F1 validation.
|
|
717
|
+
if (ovpnConfig.name !== undefined && ovpnConfig.name !== null) {
|
|
718
|
+
if (typeof ovpnConfig.name !== 'string' || !/^[a-zA-Z0-9_-]{1,32}$/.test(ovpnConfig.name)) {
|
|
719
|
+
result.isValid = false;
|
|
720
|
+
result.errors.push(
|
|
721
|
+
`Invalid 'name' value ${JSON.stringify(ovpnConfig.name)}: ` +
|
|
722
|
+
`must match /^[a-zA-Z0-9_-]{1,32}$/ (path-safe chars, max 32)`
|
|
723
|
+
);
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
|
|
663
727
|
// Validate config file exists
|
|
664
728
|
if (ovpnConfig.config) {
|
|
665
729
|
const configPath = ovpnConfig.config;
|
|
@@ -796,7 +860,24 @@ async function connectForSite(siteConfig, forceDebug = false) {
|
|
|
796
860
|
}
|
|
797
861
|
}
|
|
798
862
|
|
|
799
|
-
|
|
863
|
+
// C3: Only fetch external IP when debug-logging would actually use it.
|
|
864
|
+
// getExternalIP runs up to 3 sequential 8s-timeout curls (~24s worst
|
|
865
|
+
// case of blocking event loop) per VPN connect. Without this gate,
|
|
866
|
+
// every successful OpenVPN connect burned 1-24s on a value that's
|
|
867
|
+
// only displayed in the nwss info log when present — and only
|
|
868
|
+
// genuinely useful for debugging. Matches wireguard_vpn.js's
|
|
869
|
+
// forceDebug-gated approach (commit b97dedb).
|
|
870
|
+
//
|
|
871
|
+
// UX trade-off: the nwss info log (nwss.js:2273) only shows the IP
|
|
872
|
+
// when forceDebug is on. Users wanting the IP in non-debug runs need
|
|
873
|
+
// --debug; same behavior as WireGuard.
|
|
874
|
+
let externalIP = null;
|
|
875
|
+
if (forceDebug) {
|
|
876
|
+
externalIP = getExternalIP(startResult.tunDevice);
|
|
877
|
+
if (externalIP) {
|
|
878
|
+
console.log(formatLogMessage('debug', `${OPENVPN_TAG} ${connectionName} external IP: ${externalIP}`));
|
|
879
|
+
}
|
|
880
|
+
}
|
|
800
881
|
return { success: true, connection: connectionName, tunDevice: startResult.tunDevice, externalIP };
|
|
801
882
|
}
|
|
802
883
|
|
package/lib/searchstring.js
CHANGED
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
|
|
4
4
|
const fs = require('fs');
|
|
5
5
|
const { formatLogMessage, messageColors } = require('./colorize');
|
|
6
|
-
|
|
7
|
-
//
|
|
8
|
-
//
|
|
9
|
-
//
|
|
6
|
+
// Subsystem tag for the Puppeteer response-listener path. createCurlHandler
|
|
7
|
+
// + its CURL_TAG (and the downloadWithCurl/downloadWithRetry helpers) used
|
|
8
|
+
// to live here but were dead — nwss.js imports the curl-based handler from
|
|
9
|
+
// lib/curl.js instead. Removed in the same cleanup that drops those
|
|
10
|
+
// functions.
|
|
10
11
|
const SEARCHSTRING_TAG = messageColors.processing('[searchstring]');
|
|
11
|
-
const { runProcess } = require('./spawn-async');
|
|
12
12
|
const { grepContent } = require('./grep');
|
|
13
13
|
|
|
14
14
|
// Configuration constants for search logic
|
|
@@ -51,83 +51,6 @@ function parseSearchStrings(searchstring, searchstringAnd) {
|
|
|
51
51
|
};
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
-
/**
|
|
55
|
-
* Downloads content using curl with appropriate headers and timeout
|
|
56
|
-
* @param {string} url - The URL to download
|
|
57
|
-
* @param {string} userAgent - User agent string to use
|
|
58
|
-
* @param {number} timeout - Timeout in seconds (default: 30)
|
|
59
|
-
* @returns {Promise<string>} The downloaded content
|
|
60
|
-
*/
|
|
61
|
-
async function downloadWithCurl(url, userAgent = '', timeout = 30) {
|
|
62
|
-
const MAX_STDOUT_BYTES = 52428800; // 50MB, matches --max-filesize below
|
|
63
|
-
|
|
64
|
-
const curlArgs = [
|
|
65
|
-
'-s',
|
|
66
|
-
'-L',
|
|
67
|
-
'--max-time', timeout.toString(),
|
|
68
|
-
'--max-redirs', '5',
|
|
69
|
-
'--fail-with-body',
|
|
70
|
-
'--max-filesize', '52428800',
|
|
71
|
-
'--range', '0-52428799',
|
|
72
|
-
'--compressed'
|
|
73
|
-
];
|
|
74
|
-
if (userAgent) curlArgs.push('-H', `User-Agent: ${userAgent}`);
|
|
75
|
-
curlArgs.push(
|
|
76
|
-
'-H', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
77
|
-
'-H', 'Accept-Language: en-US,en;q=0.5',
|
|
78
|
-
'-H', 'Accept-Encoding: gzip, deflate',
|
|
79
|
-
'-H', 'Connection: keep-alive',
|
|
80
|
-
'-H', 'Upgrade-Insecure-Requests: 1'
|
|
81
|
-
);
|
|
82
|
-
curlArgs.push(url);
|
|
83
|
-
|
|
84
|
-
// Shared async-spawn helper — same streaming/cap/timeout/kill plumbing
|
|
85
|
-
// that used to be ~80 lines of inline boilerplate here.
|
|
86
|
-
const result = await runProcess('curl', curlArgs, {
|
|
87
|
-
timeout: timeout * 1000,
|
|
88
|
-
maxStdout: MAX_STDOUT_BYTES
|
|
89
|
-
});
|
|
90
|
-
|
|
91
|
-
if (result.error) throw new Error(`Curl failed for ${url}: ${result.error}`);
|
|
92
|
-
if (result.truncated) throw new Error(`Curl output exceeded ${MAX_STDOUT_BYTES} bytes for ${url}`);
|
|
93
|
-
if (result.signal) throw new Error(`Curl killed by signal ${result.signal} for ${url}`);
|
|
94
|
-
if (result.code !== 0) {
|
|
95
|
-
throw new Error(`Curl exited with status ${result.code}: ${result.stderr.toString('utf8')}`);
|
|
96
|
-
}
|
|
97
|
-
return result.stdout.toString('utf8');
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
/**
|
|
101
|
-
* Downloads content with retry logic for transient failures
|
|
102
|
-
* @param {string} url - The URL to download
|
|
103
|
-
* @param {string} userAgent - User agent string to use
|
|
104
|
-
* @param {number} timeout - Timeout in seconds
|
|
105
|
-
* @param {number} retries - Number of retry attempts (default: 2)
|
|
106
|
-
* @returns {Promise<string>} The downloaded content
|
|
107
|
-
*/
|
|
108
|
-
async function downloadWithRetry(url, userAgent = '', timeout = 30, retries = 2) {
|
|
109
|
-
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
110
|
-
try {
|
|
111
|
-
return await downloadWithCurl(url, userAgent, timeout);
|
|
112
|
-
} catch (err) {
|
|
113
|
-
// Don't retry on final attempt
|
|
114
|
-
if (attempt === retries) throw err;
|
|
115
|
-
|
|
116
|
-
// Only retry on specific transient errors
|
|
117
|
-
const shouldRetry = err.message.includes('timeout') ||
|
|
118
|
-
err.message.includes('Connection refused') ||
|
|
119
|
-
err.message.includes('502') ||
|
|
120
|
-
err.message.includes('503') ||
|
|
121
|
-
err.message.includes('Connection reset');
|
|
122
|
-
|
|
123
|
-
if (!shouldRetry) throw err;
|
|
124
|
-
|
|
125
|
-
// Exponential backoff: 1s, 2s, 4s...
|
|
126
|
-
await new Promise(resolve => setTimeout(resolve, 1000 * Math.pow(2, attempt)));
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
|
|
131
54
|
// Lookup table for the 6 named entities the previous chained-replace
|
|
132
55
|
// handled. Hoisted out of safeDecodeXmlEntities so the object isn't
|
|
133
56
|
// reallocated per call.
|
|
@@ -337,157 +260,6 @@ function shouldAnalyzeContentType(contentType) {
|
|
|
337
260
|
return textTypes.some(type => normalizedType.startsWith(type));
|
|
338
261
|
}
|
|
339
262
|
|
|
340
|
-
/**
|
|
341
|
-
* Creates a curl-based URL handler for downloading and optionally searching content
|
|
342
|
-
* @param {object} config - Configuration object containing all necessary parameters
|
|
343
|
-
* @returns {Function} URL handler function for curl-based content analysis
|
|
344
|
-
*/
|
|
345
|
-
function createCurlHandler(config) {
|
|
346
|
-
const {
|
|
347
|
-
searchStrings,
|
|
348
|
-
searchStringsAnd,
|
|
349
|
-
hasSearchStringAnd,
|
|
350
|
-
regexes,
|
|
351
|
-
matchedDomains,
|
|
352
|
-
addMatchedDomain, // Helper function for adding domains
|
|
353
|
-
currentUrl,
|
|
354
|
-
perSiteSubDomains,
|
|
355
|
-
ignoreDomains,
|
|
356
|
-
matchesIgnoreDomain,
|
|
357
|
-
getRootDomain,
|
|
358
|
-
siteConfig,
|
|
359
|
-
dumpUrls,
|
|
360
|
-
matchedUrlsLogFile,
|
|
361
|
-
forceDebug,
|
|
362
|
-
userAgent,
|
|
363
|
-
resourceType, // Resource type from request
|
|
364
|
-
hasSearchString
|
|
365
|
-
} = config;
|
|
366
|
-
|
|
367
|
-
// Hoisted: currentUrl doesn't change for this handler's lifetime, so
|
|
368
|
-
// parsing its hostname once at handler-creation eliminates the
|
|
369
|
-
// per-request URL allocation.
|
|
370
|
-
let currentUrlHostname = '';
|
|
371
|
-
try { currentUrlHostname = new URL(currentUrl).hostname; } catch (_) {}
|
|
372
|
-
|
|
373
|
-
return async function curlHandler(requestUrl) {
|
|
374
|
-
// Regex check FIRST — cheap filter that skips ~99% of requests.
|
|
375
|
-
// Previously this ran AFTER a URL parse + domain-cache lookup;
|
|
376
|
-
// the parse is the expensive bit, so doing it after the cheap
|
|
377
|
-
// gate moves the cost off the hot path.
|
|
378
|
-
const matchesRegex = regexes.some(re => re.test(requestUrl));
|
|
379
|
-
if (!matchesRegex) return;
|
|
380
|
-
|
|
381
|
-
// Parse requestUrl ONCE and reuse. Was parsed 2-3 times.
|
|
382
|
-
let requestHostname;
|
|
383
|
-
try { requestHostname = new URL(requestUrl).hostname; } catch (_) { return; }
|
|
384
|
-
const reqDomain = perSiteSubDomains ? requestHostname : getRootDomain(requestUrl);
|
|
385
|
-
|
|
386
|
-
if (typeof config.isDomainAlreadyDetected === 'function' && config.isDomainAlreadyDetected(reqDomain)) {
|
|
387
|
-
if (forceDebug) {
|
|
388
|
-
console.log(formatLogMessage('debug', `${CURL_TAG} Skipping already detected domain: ${reqDomain}`));
|
|
389
|
-
}
|
|
390
|
-
return;
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
const isFirstParty = currentUrlHostname === requestHostname;
|
|
394
|
-
|
|
395
|
-
// Apply first-party/third-party filtering
|
|
396
|
-
if (isFirstParty && siteConfig.firstParty === false) {
|
|
397
|
-
if (forceDebug) {
|
|
398
|
-
console.log(formatLogMessage('debug', `${CURL_TAG} Skipping first-party request (firstParty=false): ${requestUrl}`));
|
|
399
|
-
}
|
|
400
|
-
return;
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
if (!isFirstParty && siteConfig.thirdParty === false) {
|
|
404
|
-
if (forceDebug) {
|
|
405
|
-
console.log(formatLogMessage('debug', `${CURL_TAG} Skipping third-party request (thirdParty=false): ${requestUrl}`));
|
|
406
|
-
}
|
|
407
|
-
return;
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
try {
|
|
411
|
-
if (forceDebug) {
|
|
412
|
-
console.log(formatLogMessage('debug', `${CURL_TAG} Downloading content from: ${requestUrl}`));
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
// If NO searchstring is defined, match immediately (like browser behavior)
|
|
416
|
-
if (!hasSearchString && !hasSearchStringAnd) {
|
|
417
|
-
if (!reqDomain || matchesIgnoreDomain(reqDomain, ignoreDomains)) {
|
|
418
|
-
return;
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
addMatchedDomain(reqDomain, resourceType);
|
|
422
|
-
const simplifiedUrl = getRootDomain(currentUrl);
|
|
423
|
-
|
|
424
|
-
if (siteConfig.verbose === 1) {
|
|
425
|
-
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
426
|
-
const resourceInfo = resourceType ? ` (${resourceType})` : '';
|
|
427
|
-
console.log(`[match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl) matched regex${resourceInfo}`);
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
if (dumpUrls) {
|
|
431
|
-
const timestamp = new Date().toISOString();
|
|
432
|
-
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
433
|
-
const resourceInfo = resourceType ? ` (${resourceType})` : '';
|
|
434
|
-
try {
|
|
435
|
-
fs.appendFileSync(matchedUrlsLogFile,
|
|
436
|
-
`${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl)${resourceInfo}\n`);
|
|
437
|
-
} catch (logErr) {
|
|
438
|
-
console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
|
|
439
|
-
}
|
|
440
|
-
}
|
|
441
|
-
return;
|
|
442
|
-
}
|
|
443
|
-
|
|
444
|
-
// If searchstring IS defined, download and search content
|
|
445
|
-
const content = await downloadWithRetry(requestUrl, userAgent, 30);
|
|
446
|
-
|
|
447
|
-
// Check if content contains search strings (OR or AND logic)
|
|
448
|
-
const { found, matchedString, logicType, error } = searchContent(content, searchStrings, searchStringsAnd, '', requestUrl);
|
|
449
|
-
|
|
450
|
-
if (found) {
|
|
451
|
-
if (!reqDomain || matchesIgnoreDomain(reqDomain, ignoreDomains)) {
|
|
452
|
-
return;
|
|
453
|
-
}
|
|
454
|
-
|
|
455
|
-
addMatchedDomain(reqDomain, resourceType);
|
|
456
|
-
const simplifiedUrl = getRootDomain(currentUrl);
|
|
457
|
-
|
|
458
|
-
if (siteConfig.verbose === 1) {
|
|
459
|
-
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
460
|
-
const resourceInfo = resourceType ? ` (${resourceType})` : '';
|
|
461
|
-
console.log(`[match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl) contains searchstring (${logicType}): "${matchedString}"${resourceInfo}`);
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
if (dumpUrls) {
|
|
465
|
-
const timestamp = new Date().toISOString();
|
|
466
|
-
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
467
|
-
const resourceInfo = resourceType ? ` (${resourceType})` : '';
|
|
468
|
-
try {
|
|
469
|
-
fs.appendFileSync(matchedUrlsLogFile,
|
|
470
|
-
`${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl, searchstring (${logicType}): "${matchedString}")${resourceInfo}\n`);
|
|
471
|
-
} catch (logErr) {
|
|
472
|
-
console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
|
|
473
|
-
}
|
|
474
|
-
}
|
|
475
|
-
} else if (forceDebug) {
|
|
476
|
-
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
477
|
-
console.log(formatLogMessage('debug', `${CURL_TAG} ${requestUrl} (${partyType}) matched regex but no searchstring found`));
|
|
478
|
-
if (error) {
|
|
479
|
-
console.log(formatLogMessage('debug', `${CURL_TAG} Search error: ${error}`));
|
|
480
|
-
}
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
} catch (err) {
|
|
484
|
-
if (forceDebug) {
|
|
485
|
-
console.log(formatLogMessage('debug', `${CURL_TAG} Failed to download content for ${requestUrl}: ${err.message}`));
|
|
486
|
-
}
|
|
487
|
-
}
|
|
488
|
-
};
|
|
489
|
-
}
|
|
490
|
-
|
|
491
263
|
/**
|
|
492
264
|
* Creates a response handler function for the given configuration
|
|
493
265
|
* @param {object} config - Configuration object containing all necessary parameters
|
|
@@ -758,14 +530,20 @@ function validateSearchString(searchstring, searchstringAnd) {
|
|
|
758
530
|
return { isValid: true, error: null };
|
|
759
531
|
}
|
|
760
532
|
|
|
533
|
+
// Public surface used by nwss.js (parseSearchStrings, createResponseHandler)
|
|
534
|
+
// and lib/validate_rules.js (validateSearchString). searchContent,
|
|
535
|
+
// safeDecodeXmlEntities, and shouldAnalyzeContentType stay exported as
|
|
536
|
+
// reasonable internal-helper API surface even though current external
|
|
537
|
+
// consumers don't import them. createCurlHandler + downloadWithCurl +
|
|
538
|
+
// downloadWithRetry were removed entirely — createCurlHandler had no
|
|
539
|
+
// external invocations (nwss.js imported the name but never called it,
|
|
540
|
+
// using lib/curl.js's version instead), and the download helpers were
|
|
541
|
+
// only consumed by createCurlHandler.
|
|
761
542
|
module.exports = {
|
|
762
543
|
parseSearchStrings,
|
|
763
544
|
searchContent,
|
|
764
545
|
safeDecodeXmlEntities,
|
|
765
546
|
shouldAnalyzeContentType,
|
|
766
547
|
createResponseHandler,
|
|
767
|
-
|
|
768
|
-
downloadWithCurl,
|
|
769
|
-
validateSearchString,
|
|
770
|
-
downloadWithRetry
|
|
548
|
+
validateSearchString
|
|
771
549
|
};
|