@fanboynz/network-scanner 2.0.33 → 2.0.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/nwss.js +107 -68
- package/package.json +1 -1
package/nwss.js
CHANGED
|
@@ -210,7 +210,7 @@ const dryRunMode = args.includes('--dry-run');
|
|
|
210
210
|
const compressLogs = args.includes('--compress-logs');
|
|
211
211
|
const removeTempFiles = args.includes('--remove-tempfiles');
|
|
212
212
|
const validateConfig = args.includes('--validate-config');
|
|
213
|
-
|
|
213
|
+
let validateRules = args.includes('--validate-rules');
|
|
214
214
|
const testValidation = args.includes('--test-validation');
|
|
215
215
|
let cleanRules = args.includes('--clean-rules');
|
|
216
216
|
const clearCache = args.includes('--clear-cache');
|
|
@@ -694,6 +694,11 @@ const {
|
|
|
694
694
|
...otherGlobalConfig
|
|
695
695
|
} = config;
|
|
696
696
|
|
|
697
|
+
// Pre-compile global blocked regexes ONCE (used in every processUrl call)
|
|
698
|
+
const globalBlockedRegexes = Array.isArray(globalBlocked)
|
|
699
|
+
? globalBlocked.map(pattern => new RegExp(pattern))
|
|
700
|
+
: [];
|
|
701
|
+
|
|
697
702
|
// Apply global configuration overrides with validation
|
|
698
703
|
// Priority: Command line args > config.json > defaults
|
|
699
704
|
const MAX_CONCURRENT_SITES = (() => {
|
|
@@ -943,6 +948,39 @@ if (dumpUrls) {
|
|
|
943
948
|
}
|
|
944
949
|
}
|
|
945
950
|
|
|
951
|
+
// --- Buffered Log Writer ---
|
|
952
|
+
// Avoids blocking I/O on every intercepted request in debug/dumpurls mode
|
|
953
|
+
const _logBuffers = new Map(); // filePath -> string[]
|
|
954
|
+
const LOG_FLUSH_INTERVAL = 2000; // Flush every 2 seconds
|
|
955
|
+
let _logFlushTimer = null;
|
|
956
|
+
|
|
957
|
+
function bufferedLogWrite(filePath, entry) {
|
|
958
|
+
if (!filePath) return;
|
|
959
|
+
if (!_logBuffers.has(filePath)) {
|
|
960
|
+
_logBuffers.set(filePath, []);
|
|
961
|
+
}
|
|
962
|
+
_logBuffers.get(filePath).push(entry);
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
function flushLogBuffers() {
|
|
966
|
+
for (const [filePath, entries] of _logBuffers) {
|
|
967
|
+
if (entries.length > 0) {
|
|
968
|
+
try {
|
|
969
|
+
fs.appendFileSync(filePath, entries.join(''));
|
|
970
|
+
} catch (err) {
|
|
971
|
+
console.warn(formatLogMessage('warn', `Failed to flush log buffer to ${filePath}: ${err.message}`));
|
|
972
|
+
}
|
|
973
|
+
entries.length = 0; // Clear buffer
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
// Start periodic flush if any logging is enabled
|
|
979
|
+
if (forceDebug || dumpUrls) {
|
|
980
|
+
_logFlushTimer = setInterval(flushLogBuffers, LOG_FLUSH_INTERVAL);
|
|
981
|
+
_logFlushTimer.unref(); // Don't keep process alive just for flushing
|
|
982
|
+
}
|
|
983
|
+
|
|
946
984
|
// Log comments if debug mode is enabled and comments exist
|
|
947
985
|
if (forceDebug && globalComments) {
|
|
948
986
|
const commentList = Array.isArray(globalComments) ? globalComments : [globalComments];
|
|
@@ -1047,15 +1085,21 @@ function shouldBypassCacheForUrl(url, siteConfig) {
|
|
|
1047
1085
|
return siteConfig.bypass_cache === true;
|
|
1048
1086
|
}
|
|
1049
1087
|
|
|
1050
|
-
// ability to use
|
|
1088
|
+
// ability to use wildcards in ignoreDomains
|
|
1089
|
+
// Cache compiled wildcard regexes to avoid recompilation on every request
|
|
1090
|
+
const _wildcardRegexCache = new Map();
|
|
1051
1091
|
function matchesIgnoreDomain(domain, ignorePatterns) {
|
|
1052
1092
|
return ignorePatterns.some(pattern => {
|
|
1053
1093
|
if (pattern.includes('*')) {
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1094
|
+
let compiled = _wildcardRegexCache.get(pattern);
|
|
1095
|
+
if (!compiled) {
|
|
1096
|
+
const regexPattern = pattern
|
|
1097
|
+
.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') // Escape all regex specials including *
|
|
1098
|
+
.replace(/\\\*/g, '.*'); // Convert escaped \* back to .*
|
|
1099
|
+
compiled = new RegExp(`^${regexPattern}$`);
|
|
1100
|
+
_wildcardRegexCache.set(pattern, compiled);
|
|
1101
|
+
}
|
|
1102
|
+
return compiled.test(domain);
|
|
1059
1103
|
}
|
|
1060
1104
|
return domain.endsWith(pattern);
|
|
1061
1105
|
});
|
|
@@ -1065,14 +1109,6 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1065
1109
|
// Track active frames and clear on navigation to prevent detached frame access
|
|
1066
1110
|
let activeFrames = new Set(); // Use Set to track frame references
|
|
1067
1111
|
|
|
1068
|
-
// Clear frame tracking on navigation to prevent stale references
|
|
1069
|
-
page.on('framenavigated', (frame) => {
|
|
1070
|
-
if (frame === page.mainFrame()) {
|
|
1071
|
-
// Main frame navigated - clear all tracked frames
|
|
1072
|
-
activeFrames.clear();
|
|
1073
|
-
}
|
|
1074
|
-
});
|
|
1075
|
-
|
|
1076
1112
|
// Handle frame creation with error suppression
|
|
1077
1113
|
page.on('frameattached', async (frame) => {
|
|
1078
1114
|
// Enhanced frame handling with detached frame protection
|
|
@@ -1182,10 +1218,16 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1182
1218
|
}
|
|
1183
1219
|
}
|
|
1184
1220
|
});
|
|
1185
|
-
// Handle frame navigations
|
|
1221
|
+
// Handle frame navigations - clear stale tracking and monitor activity
|
|
1186
1222
|
page.on('framenavigated', (frame) => {
|
|
1187
1223
|
|
|
1188
|
-
//
|
|
1224
|
+
// Main frame navigated - clear all tracked frames to prevent stale references
|
|
1225
|
+
if (frame === page.mainFrame()) {
|
|
1226
|
+
activeFrames.clear();
|
|
1227
|
+
return;
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
// Skip child frames not in our active set
|
|
1189
1231
|
if (!activeFrames.has(frame)) return;
|
|
1190
1232
|
|
|
1191
1233
|
let frameUrl;
|
|
@@ -1311,7 +1353,6 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1311
1353
|
'--disable-client-side-phishing-detection',
|
|
1312
1354
|
'--enable-features=NetworkService',
|
|
1313
1355
|
// Disk space controls - 50MB cache limits
|
|
1314
|
-
'--disable-features=VizDisplayCompositor',
|
|
1315
1356
|
`--disk-cache-size=${CACHE_LIMITS.DISK_CACHE_SIZE}`, // 50MB disk cache
|
|
1316
1357
|
`--media-cache-size=${CACHE_LIMITS.MEDIA_CACHE_SIZE}`, // 50MB media cache
|
|
1317
1358
|
'--disable-application-cache',
|
|
@@ -1324,7 +1365,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1324
1365
|
'--disable-ipc-flooding-protection',
|
|
1325
1366
|
'--aggressive-cache-discard',
|
|
1326
1367
|
'--memory-pressure-off',
|
|
1327
|
-
'--max_old_space_size=2048',
|
|
1368
|
+
'--max_old_space_size=2048', // V8 heap limit
|
|
1328
1369
|
'--disable-prompt-on-repost', // Fixes form popup on page reload
|
|
1329
1370
|
'--disable-background-networking',
|
|
1330
1371
|
'--no-sandbox',
|
|
@@ -1339,7 +1380,6 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1339
1380
|
'--disable-extensions',
|
|
1340
1381
|
'--no-default-browser-check',
|
|
1341
1382
|
'--safebrowsing-disable-auto-update',
|
|
1342
|
-
'--max_old_space_size=1024',
|
|
1343
1383
|
'--ignore-ssl-errors',
|
|
1344
1384
|
'--ignore-certificate-errors',
|
|
1345
1385
|
'--ignore-certificate-errors-spki-list',
|
|
@@ -1401,12 +1441,16 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1401
1441
|
// Set up cleanup on process termination
|
|
1402
1442
|
process.on('SIGINT', async () => {
|
|
1403
1443
|
if (forceDebug) console.log(formatLogMessage('debug', 'SIGINT received, performing cleanup...'));
|
|
1444
|
+
flushLogBuffers();
|
|
1445
|
+
if (_logFlushTimer) clearInterval(_logFlushTimer);
|
|
1404
1446
|
await performEmergencyCleanup();
|
|
1405
1447
|
process.exit(0);
|
|
1406
1448
|
});
|
|
1407
1449
|
|
|
1408
1450
|
process.on('SIGTERM', async () => {
|
|
1409
1451
|
if (forceDebug) console.log(formatLogMessage('debug', 'SIGTERM received, performing cleanup...'));
|
|
1452
|
+
flushLogBuffers();
|
|
1453
|
+
if (_logFlushTimer) clearInterval(_logFlushTimer);
|
|
1410
1454
|
await performEmergencyCleanup();
|
|
1411
1455
|
process.exit(0);
|
|
1412
1456
|
});
|
|
@@ -2197,11 +2241,10 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2197
2241
|
? siteConfig.blocked.map(pattern => new RegExp(pattern))
|
|
2198
2242
|
: [];
|
|
2199
2243
|
|
|
2200
|
-
//
|
|
2201
|
-
const
|
|
2202
|
-
?
|
|
2203
|
-
:
|
|
2204
|
-
const allBlockedRegexes = [...blockedRegexes, ...globalBlockedRegexes];
|
|
2244
|
+
// Combine site-specific with pre-compiled global blocked patterns
|
|
2245
|
+
const allBlockedRegexes = blockedRegexes.length > 0
|
|
2246
|
+
? [...blockedRegexes, ...globalBlockedRegexes]
|
|
2247
|
+
: globalBlockedRegexes; // Avoid spread when no site-specific patterns
|
|
2205
2248
|
|
|
2206
2249
|
/**
|
|
2207
2250
|
* Helper function to add domain to matched collection
|
|
@@ -2331,12 +2374,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2331
2374
|
// - URL matching against blocklists (`blockedRegexes`).
|
|
2332
2375
|
// - URL matching against filter patterns (`regexes`) for domain extraction.
|
|
2333
2376
|
// - Global `ignoreDomains` list.
|
|
2377
|
+
// Pre-compute values that are constant for this URL
|
|
2378
|
+
const simplifiedCurrentUrl = getRootDomain(currentUrl);
|
|
2379
|
+
|
|
2334
2380
|
page.on('request', request => {
|
|
2335
2381
|
const checkedUrl = request.url();
|
|
2336
|
-
const
|
|
2337
|
-
const checkedRootDomain = safeGetDomain(checkedUrl, false);
|
|
2382
|
+
const fullSubdomain = safeGetDomain(checkedUrl, true); // Full hostname for cache
|
|
2383
|
+
const checkedRootDomain = safeGetDomain(checkedUrl, false);
|
|
2338
2384
|
// Check against ALL first-party domains (original + all redirects)
|
|
2339
|
-
// This prevents redirect destinations from being marked as third-party
|
|
2340
2385
|
const isFirstParty = checkedRootDomain && firstPartyDomains.has(checkedRootDomain);
|
|
2341
2386
|
|
|
2342
2387
|
// Block infinite iframe loops - safely access frame URL
|
|
@@ -2349,9 +2394,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2349
2394
|
}
|
|
2350
2395
|
})();
|
|
2351
2396
|
if (frameUrl && frameUrl.includes('creative.dmzjmp.com') &&
|
|
2352
|
-
|
|
2397
|
+
checkedUrl.includes('go.dmzjmp.com/api/models')) {
|
|
2353
2398
|
if (forceDebug) {
|
|
2354
|
-
console.log(formatLogMessage('debug', `Blocking potential infinite iframe loop: ${
|
|
2399
|
+
console.log(formatLogMessage('debug', `Blocking potential infinite iframe loop: ${checkedUrl}`));
|
|
2355
2400
|
}
|
|
2356
2401
|
request.abort();
|
|
2357
2402
|
return;
|
|
@@ -2359,19 +2404,19 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2359
2404
|
|
|
2360
2405
|
// Enhanced debug logging to show which frame the request came from
|
|
2361
2406
|
if (forceDebug) {
|
|
2362
|
-
let
|
|
2407
|
+
let debugFrameUrl = 'unknown-frame';
|
|
2363
2408
|
let isMainFrame = false;
|
|
2364
2409
|
|
|
2365
2410
|
try {
|
|
2366
2411
|
const frame = request.frame();
|
|
2367
2412
|
if (frame) {
|
|
2368
|
-
|
|
2413
|
+
debugFrameUrl = frame.url();
|
|
2369
2414
|
isMainFrame = frame === page.mainFrame();
|
|
2370
2415
|
}
|
|
2371
2416
|
} catch (frameErr) {
|
|
2372
|
-
|
|
2417
|
+
debugFrameUrl = 'detached-frame';
|
|
2373
2418
|
}
|
|
2374
|
-
console.log(formatLogMessage('debug', `${messageColors.highlight('[req]')}[frame: ${isMainFrame ? 'main' : 'iframe'}] ${
|
|
2419
|
+
console.log(formatLogMessage('debug', `${messageColors.highlight('[req]')}[frame: ${isMainFrame ? 'main' : 'iframe'}] ${debugFrameUrl} → ${checkedUrl}`));
|
|
2375
2420
|
}
|
|
2376
2421
|
|
|
2377
2422
|
// Apply adblock rules BEFORE expensive regex checks for better performance
|
|
@@ -2397,46 +2442,36 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2397
2442
|
|
|
2398
2443
|
// Show --debug output and the url while its scanning
|
|
2399
2444
|
if (forceDebug) {
|
|
2400
|
-
const simplifiedUrl = getRootDomain(currentUrl);
|
|
2401
2445
|
const timestamp = new Date().toISOString();
|
|
2402
|
-
const logEntry = `${timestamp} [debug req][${
|
|
2446
|
+
const logEntry = `${timestamp} [debug req][${simplifiedCurrentUrl}] ${checkedUrl}\n`;
|
|
2403
2447
|
|
|
2404
2448
|
// Output to console
|
|
2405
|
-
console.log(formatLogMessage('debug', `${messageColors.highlight('[req]')}[${
|
|
2449
|
+
console.log(formatLogMessage('debug', `${messageColors.highlight('[req]')}[${simplifiedCurrentUrl}] ${checkedUrl}`));
|
|
2406
2450
|
|
|
2407
|
-
// Output to file
|
|
2408
|
-
|
|
2409
|
-
try {
|
|
2410
|
-
fs.appendFileSync(debugLogFile, logEntry + '\n');
|
|
2411
|
-
} catch (logErr) {
|
|
2412
|
-
console.warn(formatLogMessage('warn', `Failed to write to debug log file: ${logErr.message}`));
|
|
2413
|
-
}
|
|
2414
|
-
}
|
|
2451
|
+
// Output to file (buffered)
|
|
2452
|
+
bufferedLogWrite(debugLogFile, logEntry);
|
|
2415
2453
|
}
|
|
2416
|
-
const reqUrl =
|
|
2454
|
+
const reqUrl = checkedUrl;
|
|
2417
2455
|
|
|
2418
|
-
// ALWAYS extract the FULL subdomain for cache checking to preserve unique subdomains
|
|
2419
|
-
const fullSubdomain = safeGetDomain(reqUrl, true); // Always get full subdomain for cache
|
|
2420
2456
|
const reqDomain = safeGetDomain(reqUrl, perSiteSubDomains); // Output domain based on config
|
|
2421
2457
|
|
|
2422
2458
|
if (allBlockedRegexes.some(re => re.test(reqUrl))) {
|
|
2423
2459
|
if (forceDebug) {
|
|
2424
|
-
// Find which specific pattern matched
|
|
2425
|
-
|
|
2426
|
-
|
|
2427
|
-
|
|
2428
|
-
|
|
2429
|
-
|
|
2430
|
-
|
|
2431
|
-
|
|
2432
|
-
if (debugLogFile) {
|
|
2433
|
-
try {
|
|
2434
|
-
const timestamp = new Date().toISOString();
|
|
2435
|
-
fs.appendFileSync(debugLogFile, `${timestamp} [blocked][${simplifiedUrl}] ${reqUrl} (${patternSource} pattern: ${matchedPattern})\n`);
|
|
2436
|
-
} catch (logErr) {
|
|
2437
|
-
console.warn(formatLogMessage('warn', `Failed to write blocked domain to debug log: ${logErr.message}`));
|
|
2460
|
+
// Find which specific pattern matched using already-compiled regexes
|
|
2461
|
+
let matchedPattern = '(unknown)';
|
|
2462
|
+
let patternSource = 'global';
|
|
2463
|
+
for (let i = 0; i < allBlockedRegexes.length; i++) {
|
|
2464
|
+
if (allBlockedRegexes[i].test(reqUrl)) {
|
|
2465
|
+
matchedPattern = allBlockedRegexes[i].source;
|
|
2466
|
+
patternSource = i < blockedRegexes.length ? 'site' : 'global';
|
|
2467
|
+
break;
|
|
2438
2468
|
}
|
|
2439
2469
|
}
|
|
2470
|
+
console.log(formatLogMessage('debug', `${messageColors.blocked('[blocked]')}[${simplifiedCurrentUrl}] ${reqUrl} blocked by ${patternSource} pattern: ${matchedPattern}`));
|
|
2471
|
+
|
|
2472
|
+
// Also log to file (buffered)
|
|
2473
|
+
const timestamp = new Date().toISOString();
|
|
2474
|
+
bufferedLogWrite(debugLogFile, `${timestamp} [blocked][${simplifiedCurrentUrl}] ${reqUrl} (${patternSource} pattern: ${matchedPattern})\n`);
|
|
2440
2475
|
}
|
|
2441
2476
|
|
|
2442
2477
|
// NEW: Check if even_blocked is enabled and this URL matches filter regex
|
|
@@ -2463,15 +2498,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2463
2498
|
addMatchedDomain(reqDomain, resourceType, fullSubdomain);
|
|
2464
2499
|
}
|
|
2465
2500
|
|
|
2466
|
-
const simplifiedUrl = getRootDomain(currentUrl);
|
|
2467
2501
|
if (siteConfig.verbose === 1) {
|
|
2468
2502
|
const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
|
|
2469
|
-
console.log(formatLogMessage('match', `[${
|
|
2503
|
+
console.log(formatLogMessage('match', `[${simplifiedCurrentUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
|
|
2470
2504
|
}
|
|
2471
2505
|
if (dumpUrls) {
|
|
2472
2506
|
const timestamp = new Date().toISOString();
|
|
2473
2507
|
const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
|
|
2474
|
-
|
|
2508
|
+
bufferedLogWrite(matchedUrlsLogFile, `${timestamp} [match][${simplifiedCurrentUrl}] ${reqUrl} (resourceType: ${resourceType})${resourceInfo} [BLOCKED BUT ADDED]\n`);
|
|
2475
2509
|
}
|
|
2476
2510
|
break; // Only match once per URL
|
|
2477
2511
|
}
|
|
@@ -2627,15 +2661,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2627
2661
|
} else {
|
|
2628
2662
|
addMatchedDomain(reqDomain, resourceType);
|
|
2629
2663
|
}
|
|
2630
|
-
const simplifiedUrl = getRootDomain(currentUrl);
|
|
2631
2664
|
if (siteConfig.verbose === 1) {
|
|
2632
2665
|
const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
|
|
2633
|
-
console.log(formatLogMessage('match', `[${
|
|
2666
|
+
console.log(formatLogMessage('match', `[${simplifiedCurrentUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
|
|
2634
2667
|
}
|
|
2635
2668
|
if (dumpUrls) {
|
|
2636
2669
|
const timestamp = new Date().toISOString();
|
|
2637
2670
|
const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
|
|
2638
|
-
|
|
2671
|
+
bufferedLogWrite(matchedUrlsLogFile, `${timestamp} [match][${simplifiedCurrentUrl}] ${reqUrl} (resourceType: ${resourceType})${resourceInfo}\n`);
|
|
2639
2672
|
}
|
|
2640
2673
|
} else if (hasNetTools && !hasSearchString && !hasSearchStringAnd) {
|
|
2641
2674
|
// If nettools are configured (whois/dig), perform checks on the domain
|
|
@@ -4119,6 +4152,12 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4119
4152
|
}
|
|
4120
4153
|
}
|
|
4121
4154
|
|
|
4155
|
+
// Flush any remaining buffered log entries before compression/exit
|
|
4156
|
+
flushLogBuffers();
|
|
4157
|
+
if (_logFlushTimer) {
|
|
4158
|
+
clearInterval(_logFlushTimer);
|
|
4159
|
+
}
|
|
4160
|
+
|
|
4122
4161
|
// Compress log files if --compress-logs is enabled
|
|
4123
4162
|
if (compressLogs && dumpUrls && !dryRunMode) {
|
|
4124
4163
|
// Collect all existing log files for compression
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.35",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|