@fanboynz/network-scanner 1.0.97 → 1.0.99
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -1
- package/lib/browserhealth.js +330 -17
- package/nwss.1 +267 -7
- package/nwss.js +67 -11
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -151,7 +151,10 @@ Example:
|
|
|
151
151
|
| `blocked` | Array | - | Domains or regexes to block during scanning |
|
|
152
152
|
| `even_blocked` | Boolean | `false` | Add matching rules even if requests are blocked |
|
|
153
153
|
| `bypass_cache` | Boolean | `false` | Skip all caching for this site's URLs |
|
|
154
|
-
| `window_cleanup` | Boolean | `false` | Close
|
|
154
|
+
| `window_cleanup` | Boolean or String | `false` | Close old/unused browser windows/tabs after entire URL group completes |
|
|
155
|
+
|
|
156
|
+
**Window cleanup modes:** `false` (disabled), `true` (conservative - closes obvious leftovers), `"all"` (aggressive - closes all content pages). Both active modes preserve the main Puppeteer window and wait 16 seconds before cleanup to avoid interfering with active operations.
|
|
157
|
+
|
|
155
158
|
|
|
156
159
|
### Redirect Handling Options
|
|
157
160
|
|
|
@@ -320,6 +323,39 @@ node nwss.js --max-concurrent 12 --cleanup-interval 300 -o rules.txt
|
|
|
320
323
|
|
|
321
324
|
### Stealth Configuration Examples
|
|
322
325
|
|
|
326
|
+
#### Memory Management with Window Cleanup
|
|
327
|
+
```json
|
|
328
|
+
{
|
|
329
|
+
"url": [
|
|
330
|
+
"https://popup-heavy-site1.com",
|
|
331
|
+
"https://popup-heavy-site2.com",
|
|
332
|
+
"https://popup-heavy-site3.com"
|
|
333
|
+
],
|
|
334
|
+
"filterRegex": "\\.(space|website|tech)\\b",
|
|
335
|
+
"window_cleanup": "all",
|
|
336
|
+
"interact": true,
|
|
337
|
+
"reload": 2,
|
|
338
|
+
"resourceTypes": ["script", "fetch"],
|
|
339
|
+
"comments": "Aggressive cleanup for sites that open many popups"
|
|
340
|
+
}
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
#### Conservative Memory Management
|
|
344
|
+
```json
|
|
345
|
+
{
|
|
346
|
+
"url": "https://complex-site.com",
|
|
347
|
+
"filterRegex": "analytics|tracking",
|
|
348
|
+
"window_cleanup": true,
|
|
349
|
+
"interact": true,
|
|
350
|
+
"delay": 8000,
|
|
351
|
+
"reload": 3,
|
|
352
|
+
"comments": [
|
|
353
|
+
"Conservative cleanup preserves potentially active content",
|
|
354
|
+
"Good for sites with complex iframe structures"
|
|
355
|
+
]
|
|
356
|
+
}
|
|
357
|
+
```
|
|
358
|
+
|
|
323
359
|
#### E-commerce Site Scanning
|
|
324
360
|
```json
|
|
325
361
|
{
|
|
@@ -368,6 +404,22 @@ node nwss.js --max-concurrent 12 --cleanup-interval 300 -o rules.txt
|
|
|
368
404
|
|
|
369
405
|
---
|
|
370
406
|
|
|
407
|
+
## Memory Management
|
|
408
|
+
|
|
409
|
+
The scanner includes intelligent window management to prevent memory accumulation during long scans:
|
|
410
|
+
|
|
411
|
+
- **Conservative cleanup** (`window_cleanup: true`): Selectively closes pages that appear to be leftovers from previous scans
|
|
412
|
+
- **Aggressive cleanup** (`window_cleanup: "all"`): Closes all content pages from previous operations for maximum memory recovery
|
|
413
|
+
- **Main window preservation**: Both modes always preserve the main Puppeteer browser window to maintain stability
|
|
414
|
+
- **Popup window handling**: Automatically detects and closes popup windows created by previous site scans
|
|
415
|
+
- **Timing protection**: 16-second delay ensures no active operations are interrupted during cleanup
|
|
416
|
+
- **Active page protection**: Never affects pages currently being processed by concurrent scanning operations
|
|
417
|
+
- **Memory reporting**: Reports estimated memory freed from closed windows for performance monitoring
|
|
418
|
+
|
|
419
|
+
Use aggressive cleanup for sites that open many popups or when processing large numbers of URLs. Use conservative cleanup when you want to preserve potentially active content but still free obvious leftovers.
|
|
420
|
+
|
|
421
|
+
---
|
|
422
|
+
|
|
371
423
|
## INSTALL
|
|
372
424
|
|
|
373
425
|
#### (Ubuntu as example). NOTE: Use Chrome and not Chromium for best compatibility.
|
package/lib/browserhealth.js
CHANGED
|
@@ -7,7 +7,19 @@ const { formatLogMessage, messageColors } = require('./colorize');
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
// Window cleanup delay constant
|
|
10
|
-
const WINDOW_CLEANUP_DELAY_MS =
|
|
10
|
+
const WINDOW_CLEANUP_DELAY_MS = 15000;
|
|
11
|
+
// window_clean REALTIME
|
|
12
|
+
const REALTIME_CLEANUP_BUFFER_MS = 15000; // Additional buffer time after site delay. Delay increased to fix missing hits.
|
|
13
|
+
const REALTIME_CLEANUP_THRESHOLD = 8; // Default number of pages to keep
|
|
14
|
+
const REALTIME_CLEANUP_MIN_PAGES = 3; // Minimum pages before cleanup kicks in
|
|
15
|
+
|
|
16
|
+
// Track page creation order for realtime cleanup
|
|
17
|
+
const pageCreationTracker = new Map(); // Maps page -> creation timestamp
|
|
18
|
+
let pageCreationCounter = 0;
|
|
19
|
+
|
|
20
|
+
// Track page usage for realtime cleanup safety
|
|
21
|
+
const pageUsageTracker = new Map(); // Maps page -> { lastActivity: timestamp, isProcessing: boolean }
|
|
22
|
+
const PAGE_IDLE_THRESHOLD = 15000; // 15 seconds of inactivity before considering page safe to clean
|
|
11
23
|
|
|
12
24
|
/**
|
|
13
25
|
* Performs group-level window cleanup after all URLs in a site group complete
|
|
@@ -15,33 +27,78 @@ const WINDOW_CLEANUP_DELAY_MS = 16000;
|
|
|
15
27
|
* @param {import('puppeteer').Browser} browserInstance - Browser instance
|
|
16
28
|
* @param {string} groupDescription - Description of the group for logging
|
|
17
29
|
* @param {boolean} forceDebug - Debug logging flag
|
|
30
|
+
* @param {string|boolean} cleanupMode - Cleanup mode: true/"default" (conservative), "all" (aggressive)
|
|
18
31
|
* @returns {Promise<Object>} Cleanup results
|
|
19
32
|
*/
|
|
20
|
-
async function performGroupWindowCleanup(browserInstance, groupDescription, forceDebug) {
|
|
33
|
+
async function performGroupWindowCleanup(browserInstance, groupDescription, forceDebug, cleanupMode = true) {
|
|
21
34
|
try {
|
|
22
35
|
// Wait before cleanup to allow any final operations to complete
|
|
36
|
+
const modeText = cleanupMode === "all" ? "aggressive cleanup of old windows" : "conservative cleanup of extra windows"
|
|
23
37
|
if (forceDebug) {
|
|
24
|
-
console.log(formatLogMessage('debug', `[group_window_cleanup] Waiting ${WINDOW_CLEANUP_DELAY_MS}ms before
|
|
38
|
+
console.log(formatLogMessage('debug', `[group_window_cleanup] Waiting ${WINDOW_CLEANUP_DELAY_MS}ms before ${modeText} for group: ${groupDescription}`));
|
|
39
|
+
|
|
25
40
|
}
|
|
26
41
|
await new Promise(resolve => setTimeout(resolve, WINDOW_CLEANUP_DELAY_MS));
|
|
27
42
|
|
|
28
43
|
const allPages = await browserInstance.pages();
|
|
29
|
-
|
|
30
|
-
|
|
44
|
+
// Identify the main Puppeteer window (should be about:blank or the initial page)
|
|
45
|
+
let mainPuppeteerPage = null;
|
|
46
|
+
let pagesToClose = [];
|
|
31
47
|
|
|
32
|
-
|
|
48
|
+
// Find the main page - typically the first page that's about:blank or has been there longest
|
|
49
|
+
for (const page of allPages) {
|
|
50
|
+
const url = page.url();
|
|
51
|
+
if (url === 'about:blank' || url === '' || url.startsWith('chrome://')) {
|
|
52
|
+
if (!mainPuppeteerPage) {
|
|
53
|
+
mainPuppeteerPage = page; // First blank page is likely the main window
|
|
54
|
+
} else {
|
|
55
|
+
pagesToClose.push(page); // Additional blank pages can be closed
|
|
56
|
+
}
|
|
57
|
+
} else {
|
|
58
|
+
// Any page with actual content should be evaluated for closure
|
|
59
|
+
if (cleanupMode === "all") {
|
|
60
|
+
// Aggressive mode: close all content pages
|
|
61
|
+
pagesToClose.push(page);
|
|
62
|
+
} else {
|
|
63
|
+
// Conservative mode: only close pages that look like leftovers from previous scans
|
|
64
|
+
// Keep pages that might still be actively used
|
|
65
|
+
const isOldPage = await isPageFromPreviousScan(page, forceDebug);
|
|
66
|
+
if (isOldPage) {
|
|
67
|
+
pagesToClose.push(page);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Ensure we always have a main page
|
|
74
|
+
if (!mainPuppeteerPage && allPages.length > 0) {
|
|
75
|
+
mainPuppeteerPage = allPages[0]; // Fallback to first page
|
|
76
|
+
pagesToClose = allPages.slice(1);
|
|
33
77
|
if (forceDebug) {
|
|
34
|
-
console.log(formatLogMessage('debug', `[group_window_cleanup] No
|
|
78
|
+
console.log(formatLogMessage('debug', `[group_window_cleanup] No blank page found, using first page as main: ${mainPuppeteerPage.url()}`));
|
|
35
79
|
}
|
|
36
|
-
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (pagesToClose.length === 0) {
|
|
83
|
+
if (forceDebug) {
|
|
84
|
+
console.log(formatLogMessage('debug', `[group_window_cleanup] No windows to close for group: ${groupDescription}`));
|
|
85
|
+
}
|
|
86
|
+
return {
|
|
87
|
+
success: true,
|
|
88
|
+
closedCount: 0,
|
|
89
|
+
totalPages: allPages.length,
|
|
90
|
+
estimatedMemoryFreed: 0,
|
|
91
|
+
mainPagePreserved: true,
|
|
92
|
+
cleanupMode: cleanupMode === "all" ? "all" : "default"
|
|
93
|
+
};
|
|
37
94
|
}
|
|
38
95
|
|
|
39
96
|
// Estimate memory usage before closing
|
|
40
97
|
let totalEstimatedMemory = 0;
|
|
41
98
|
const pageMemoryEstimates = [];
|
|
42
99
|
|
|
43
|
-
for (let i = 0; i <
|
|
44
|
-
const page =
|
|
100
|
+
for (let i = 0; i < pagesToClose.length; i++) {
|
|
101
|
+
const page = pagesToClose[i];
|
|
45
102
|
let pageMemoryEstimate = 0;
|
|
46
103
|
|
|
47
104
|
try {
|
|
@@ -78,17 +135,20 @@ async function performGroupWindowCleanup(browserInstance, groupDescription, forc
|
|
|
78
135
|
totalEstimatedMemory += pageMemoryEstimate;
|
|
79
136
|
}
|
|
80
137
|
|
|
81
|
-
// Close
|
|
82
|
-
const closePromises =
|
|
138
|
+
// Close identified old/unused pages
|
|
139
|
+
const closePromises = pagesToClose.map(async (page, index) => {
|
|
83
140
|
try {
|
|
84
141
|
if (!page.isClosed()) {
|
|
142
|
+
if (forceDebug) {
|
|
143
|
+
console.log(formatLogMessage('debug', `[group_window_cleanup] Closing page: ${page.url()}`));
|
|
144
|
+
}
|
|
85
145
|
await page.close();
|
|
86
146
|
return { success: true, url: page.url() || `page-${index}`, estimatedMemory: pageMemoryEstimates[index] };
|
|
87
147
|
}
|
|
88
148
|
return { success: false, reason: 'already_closed', estimatedMemory: 0 };
|
|
89
149
|
} catch (closeErr) {
|
|
90
150
|
if (forceDebug) {
|
|
91
|
-
console.log(formatLogMessage('debug', `[group_window_cleanup] Failed to close page ${index + 1}: ${closeErr.message}`));
|
|
151
|
+
console.log(formatLogMessage('debug', `[group_window_cleanup] Failed to close old page ${index + 1}: ${closeErr.message}`));
|
|
92
152
|
}
|
|
93
153
|
return { success: false, error: closeErr.message, estimatedMemory: 0 };
|
|
94
154
|
}
|
|
@@ -114,18 +174,22 @@ async function performGroupWindowCleanup(browserInstance, groupDescription, forc
|
|
|
114
174
|
};
|
|
115
175
|
|
|
116
176
|
if (forceDebug) {
|
|
117
|
-
console.log(formatLogMessage('debug', `[group_window_cleanup] Closed ${successfulCloses}/${
|
|
177
|
+
console.log(formatLogMessage('debug', `[group_window_cleanup] Closed ${successfulCloses}/${pagesToClose.length} old windows for completed group: ${groupDescription} after ${WINDOW_CLEANUP_DELAY_MS}ms delay`));
|
|
118
178
|
console.log(formatLogMessage('debug', `[group_window_cleanup] Estimated memory freed: ${formatMemory(actualMemoryFreed)}`));
|
|
179
|
+
if (mainPuppeteerPage) {
|
|
180
|
+
console.log(formatLogMessage('debug', `[group_window_cleanup] Main Puppeteer window preserved: ${mainPuppeteerPage.url()}`));
|
|
181
|
+
}
|
|
119
182
|
}
|
|
120
183
|
|
|
121
184
|
return {
|
|
122
185
|
success: true,
|
|
123
186
|
closedCount: successfulCloses,
|
|
124
187
|
totalPages: allPages.length,
|
|
125
|
-
|
|
188
|
+
mainPagePreserved: mainPuppeteerPage && !mainPuppeteerPage.isClosed(),
|
|
126
189
|
delayUsed: WINDOW_CLEANUP_DELAY_MS,
|
|
127
190
|
estimatedMemoryFreed: actualMemoryFreed,
|
|
128
|
-
estimatedMemoryFreedFormatted: formatMemory(actualMemoryFreed)
|
|
191
|
+
estimatedMemoryFreedFormatted: formatMemory(actualMemoryFreed),
|
|
192
|
+
cleanupMode: cleanupMode === "all" ? "all" : "default"
|
|
129
193
|
};
|
|
130
194
|
} catch (cleanupErr) {
|
|
131
195
|
if (forceDebug) {
|
|
@@ -135,6 +199,237 @@ async function performGroupWindowCleanup(browserInstance, groupDescription, forc
|
|
|
135
199
|
}
|
|
136
200
|
}
|
|
137
201
|
|
|
202
|
+
/**
|
|
203
|
+
* Checks if a page is safe to close (not actively processing)
|
|
204
|
+
* @param {import('puppeteer').Page} page - Page to check
|
|
205
|
+
* @param {boolean} forceDebug - Debug logging flag
|
|
206
|
+
* @returns {Promise<boolean>} True if page is safe to close
|
|
207
|
+
*/
|
|
208
|
+
async function isPageSafeToClose(page, forceDebug) {
|
|
209
|
+
try {
|
|
210
|
+
if (page.isClosed()) {
|
|
211
|
+
return true; // Already closed
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const usage = pageUsageTracker.get(page);
|
|
215
|
+
if (!usage) {
|
|
216
|
+
// No usage data - assume safe if page exists for a while
|
|
217
|
+
return true;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Check if page is actively processing
|
|
221
|
+
if (usage.isProcessing) {
|
|
222
|
+
if (forceDebug) {
|
|
223
|
+
console.log(formatLogMessage('debug', `[realtime_cleanup] Page still processing: ${page.url().substring(0, 50)}...`));
|
|
224
|
+
}
|
|
225
|
+
return false;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Check if page has been idle long enough
|
|
229
|
+
const idleTime = Date.now() - usage.lastActivity;
|
|
230
|
+
const isSafe = idleTime >= PAGE_IDLE_THRESHOLD;
|
|
231
|
+
|
|
232
|
+
if (!isSafe && forceDebug) {
|
|
233
|
+
console.log(formatLogMessage('debug', `[realtime_cleanup] Page not idle long enough: ${Math.round(idleTime/1000)}s < ${PAGE_IDLE_THRESHOLD/1000}s`));
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return isSafe;
|
|
237
|
+
} catch (err) {
|
|
238
|
+
if (forceDebug) {
|
|
239
|
+
console.log(formatLogMessage('debug', `[realtime_cleanup] Error checking page safety: ${err.message}`));
|
|
240
|
+
}
|
|
241
|
+
return true; // Assume safe if we can't check
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Updates page usage tracking
|
|
247
|
+
* @param {import('puppeteer').Page} page - Page to update
|
|
248
|
+
* @param {boolean} isProcessing - Whether page is actively processing
|
|
249
|
+
*/
|
|
250
|
+
function updatePageUsage(page, isProcessing = false) {
|
|
251
|
+
try {
|
|
252
|
+
if (!page.isClosed()) {
|
|
253
|
+
pageUsageTracker.set(page, {
|
|
254
|
+
lastActivity: Date.now(),
|
|
255
|
+
isProcessing: isProcessing
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
} catch (err) {
|
|
259
|
+
// Ignore errors in usage tracking
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Performs realtime window cleanup - removes oldest pages when threshold is exceeded
|
|
265
|
+
* Waits for site delay + 3 seconds before cleanup to ensure delayed requests are captured
|
|
266
|
+
* @param {import('puppeteer').Browser} browserInstance - Browser instance
|
|
267
|
+
* @param {number} threshold - Maximum number of pages to keep (default: 8)
|
|
268
|
+
* @param {boolean} forceDebug - Debug logging flag
|
|
269
|
+
* @param {number} siteDelay - Current site's delay value in milliseconds (default: 4000)
|
|
270
|
+
* @returns {Promise<Object>} Cleanup results
|
|
271
|
+
*/
|
|
272
|
+
async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIME_CLEANUP_THRESHOLD, forceDebug, siteDelay = 4000) {
|
|
273
|
+
try {
|
|
274
|
+
const allPages = await browserInstance.pages();
|
|
275
|
+
|
|
276
|
+
// Skip cleanup if we don't have enough pages to warrant it
|
|
277
|
+
if (allPages.length <= Math.max(threshold, REALTIME_CLEANUP_MIN_PAGES)) {
|
|
278
|
+
if (forceDebug) {
|
|
279
|
+
console.log(formatLogMessage('debug', `[realtime_cleanup] Only ${allPages.length} pages open, threshold is ${threshold} - no cleanup needed`));
|
|
280
|
+
}
|
|
281
|
+
return { success: true, closedCount: 0, totalPages: allPages.length, reason: 'below_threshold' };
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// Calculate cleanup delay: site delay + 3 second buffer
|
|
285
|
+
const cleanupDelay = siteDelay + REALTIME_CLEANUP_BUFFER_MS;
|
|
286
|
+
|
|
287
|
+
if (forceDebug) {
|
|
288
|
+
console.log(formatLogMessage('debug', `[realtime_cleanup] Waiting ${cleanupDelay}ms (site delay: ${siteDelay}ms + ${REALTIME_CLEANUP_BUFFER_MS}ms buffer) before cleanup (threshold: ${threshold})`));
|
|
289
|
+
}
|
|
290
|
+
await new Promise(resolve => setTimeout(resolve, cleanupDelay));
|
|
291
|
+
|
|
292
|
+
const allPagesAfterDelay = await browserInstance.pages();
|
|
293
|
+
|
|
294
|
+
// Find main Puppeteer page (usually about:blank)
|
|
295
|
+
let mainPage = allPagesAfterDelay.find(page => {
|
|
296
|
+
const url = page.url();
|
|
297
|
+
return url === 'about:blank' || url === '' || url.startsWith('chrome://');
|
|
298
|
+
}) || allPagesAfterDelay[0]; // Fallback to first page
|
|
299
|
+
|
|
300
|
+
// Get pages sorted by creation time (oldest first)
|
|
301
|
+
const sortedPages = allPagesAfterDelay
|
|
302
|
+
.filter(page => page !== mainPage && !page.isClosed())
|
|
303
|
+
.sort((a, b) => {
|
|
304
|
+
const timeA = pageCreationTracker.get(a) || 0;
|
|
305
|
+
const timeB = pageCreationTracker.get(b) || 0;
|
|
306
|
+
return timeA - timeB; // Oldest first
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
// Calculate how many pages to close
|
|
310
|
+
const pagesToKeep = threshold - 1; // -1 for main page
|
|
311
|
+
const pagesToClose = sortedPages.slice(0, Math.max(0, sortedPages.length - pagesToKeep));
|
|
312
|
+
|
|
313
|
+
// Filter out pages that are still being used
|
|
314
|
+
const safetyChecks = await Promise.all(
|
|
315
|
+
pagesToClose.map(page => isPageSafeToClose(page, forceDebug))
|
|
316
|
+
);
|
|
317
|
+
|
|
318
|
+
const safePagesToClose = pagesToClose.filter((page, index) => safetyChecks[index]);
|
|
319
|
+
const unsafePagesCount = pagesToClose.length - safePagesToClose.length;
|
|
320
|
+
|
|
321
|
+
if (unsafePagesCount > 0 && forceDebug) {
|
|
322
|
+
console.log(formatLogMessage('debug', `[realtime_cleanup] Skipping ${unsafePagesCount} active pages for safety`));
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
if (safePagesToClose.length === 0) {
|
|
326
|
+
if (forceDebug) {
|
|
327
|
+
const reason = pagesToClose.length === 0 ?
|
|
328
|
+
`${sortedPages.length} content pages, keeping ${pagesToKeep}` :
|
|
329
|
+
`${pagesToClose.length} pages still active`;
|
|
330
|
+
console.log(formatLogMessage('debug', `[realtime_cleanup] No pages need closing (${reason})`));
|
|
331
|
+
}
|
|
332
|
+
return { success: true, closedCount: 0, totalPages: allPagesAfterDelay.length, reason: 'no_cleanup_needed' };
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Close oldest pages
|
|
336
|
+
let closedCount = 0;
|
|
337
|
+
for (const page of safePagesToClose) {
|
|
338
|
+
try {
|
|
339
|
+
if (!page.isClosed()) {
|
|
340
|
+
const pageUrl = page.url();
|
|
341
|
+
await page.close();
|
|
342
|
+
pageCreationTracker.delete(page); // Remove from tracker
|
|
343
|
+
closedCount++;
|
|
344
|
+
|
|
345
|
+
if (forceDebug) {
|
|
346
|
+
console.log(formatLogMessage('debug', `[realtime_cleanup] Closed old page: ${pageUrl.substring(0, 50)}...`));
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
} catch (closeErr) {
|
|
350
|
+
if (forceDebug) {
|
|
351
|
+
console.log(formatLogMessage('debug', `[realtime_cleanup] Failed to close page: ${closeErr.message}`));
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
const remainingPages = allPagesAfterDelay.length - closedCount;
|
|
357
|
+
|
|
358
|
+
if (forceDebug) {
|
|
359
|
+
console.log(formatLogMessage('debug', `[realtime_cleanup] Closed ${closedCount}/${pagesToClose.length} oldest pages (${unsafePagesCount} skipped for safety), ${remainingPages} pages remaining`));
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
return {
|
|
363
|
+
success: true,
|
|
364
|
+
closedCount,
|
|
365
|
+
totalPages: allPagesAfterDelay.length,
|
|
366
|
+
remainingPages,
|
|
367
|
+
threshold,
|
|
368
|
+
cleanupDelay,
|
|
369
|
+
reason: 'cleanup_completed'
|
|
370
|
+
};
|
|
371
|
+
} catch (cleanupErr) {
|
|
372
|
+
if (forceDebug) {
|
|
373
|
+
console.log(formatLogMessage('debug', `[realtime_cleanup] Cleanup failed: ${cleanupErr.message}`));
|
|
374
|
+
}
|
|
375
|
+
return { success: false, error: cleanupErr.message, closedCount: 0 };
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
/**
|
|
380
|
+
* Determines if a page appears to be from a previous scan and can be safely closed
|
|
381
|
+
* @param {import('puppeteer').Page} page - Page to evaluate
|
|
382
|
+
* @param {boolean} forceDebug - Debug logging flag
|
|
383
|
+
* @returns {Promise<boolean>} True if page appears to be from previous scan
|
|
384
|
+
*/
|
|
385
|
+
async function isPageFromPreviousScan(page, forceDebug) {
|
|
386
|
+
try {
|
|
387
|
+
const url = page.url();
|
|
388
|
+
|
|
389
|
+
// Always consider these as old/closeable
|
|
390
|
+
if (url === 'about:blank' ||
|
|
391
|
+
url === '' ||
|
|
392
|
+
url.startsWith('chrome://') ||
|
|
393
|
+
url.startsWith('chrome-error://') ||
|
|
394
|
+
url.startsWith('data:')) {
|
|
395
|
+
return false; // Don't close blank pages here, handled separately
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// Check if page has been idle (no recent navigation)
|
|
399
|
+
// This is a heuristic - pages from previous scans are likely to be idle
|
|
400
|
+
try {
|
|
401
|
+
const title = await page.title();
|
|
402
|
+
// Pages with generic titles or error states are likely old
|
|
403
|
+
if (title.includes('404') ||
|
|
404
|
+
title.includes('Error') ||
|
|
405
|
+
title.includes('Not Found') ||
|
|
406
|
+
title === '') {
|
|
407
|
+
return true;
|
|
408
|
+
}
|
|
409
|
+
} catch (titleErr) {
|
|
410
|
+
// If we can't get title, page might be in bad state
|
|
411
|
+
return true;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// Default: consider most content pages as potentially old in conservative mode
|
|
415
|
+
return false; // Conservative - don't close unless we're sure
|
|
416
|
+
} catch (err) {
|
|
417
|
+
if (forceDebug) {
|
|
418
|
+
console.log(formatLogMessage('debug', `[isPageFromPreviousScan] Error evaluating page ${page.url()}: ${err.message}`));
|
|
419
|
+
}
|
|
420
|
+
return false; // Conservative - don't close if we can't evaluate
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
/**
|
|
425
|
+
* Tracks a new page for realtime cleanup purposes
|
|
426
|
+
* @param {import('puppeteer').Page} page - Page to track
|
|
427
|
+
*/
|
|
428
|
+
function trackPageForRealtime(page) {
|
|
429
|
+
pageCreationTracker.set(page, ++pageCreationCounter);
|
|
430
|
+
updatePageUsage(page, false); // Initialize usage tracking
|
|
431
|
+
}
|
|
432
|
+
|
|
138
433
|
/**
|
|
139
434
|
* Quick browser responsiveness test for use during page setup
|
|
140
435
|
* Designed to catch browser degradation between operations
|
|
@@ -672,10 +967,28 @@ module.exports = {
|
|
|
672
967
|
checkBrowserMemory,
|
|
673
968
|
testBrowserConnectivity,
|
|
674
969
|
performGroupWindowCleanup,
|
|
970
|
+
performRealtimeWindowCleanup,
|
|
971
|
+
trackPageForRealtime,
|
|
675
972
|
testNetworkCapability,
|
|
676
973
|
isQuicklyResponsive,
|
|
677
974
|
performHealthAssessment,
|
|
678
975
|
monitorBrowserHealth,
|
|
679
976
|
isBrowserHealthy,
|
|
680
|
-
isCriticalProtocolError
|
|
977
|
+
isCriticalProtocolError,
|
|
978
|
+
updatePageUsage
|
|
681
979
|
};
|
|
980
|
+
|
|
981
|
+
// Clean up tracking maps when pages are closed
|
|
982
|
+
const originalPageClose = require('puppeteer').Page.prototype.close;
|
|
983
|
+
if (originalPageClose) {
|
|
984
|
+
require('puppeteer').Page.prototype.close = async function(...args) {
|
|
985
|
+
try {
|
|
986
|
+
// Clean up tracking data
|
|
987
|
+
pageCreationTracker.delete(this);
|
|
988
|
+
pageUsageTracker.delete(this);
|
|
989
|
+
} catch (err) {
|
|
990
|
+
// Ignore cleanup errors
|
|
991
|
+
}
|
|
992
|
+
return originalPageClose.apply(this, args);
|
|
993
|
+
};
|
|
994
|
+
}
|
package/nwss.1
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
.TH NWSS-SCRIPT 1 "2025" "scanner-script v1.0.
|
|
1
|
+
.TH NWSS-SCRIPT 1 "2025" "scanner-script v1.0.98" "User Commands"
|
|
2
2
|
.SH NAME
|
|
3
3
|
NWSS scanner-script \- Network scanner for malware detection and domain analysis with advanced similarity filtering
|
|
4
4
|
|
|
@@ -27,6 +27,10 @@ Remove rules that already exist in \fIFILE\fR before output (requires \fB\-o\fR)
|
|
|
27
27
|
.B \--append
|
|
28
28
|
Append new rules to output file instead of overwriting (requires \fB\-o\fR).
|
|
29
29
|
|
|
30
|
+
.TP
|
|
31
|
+
.BR \--color ", " \--colour
|
|
32
|
+
Enable colored console output for status messages.
|
|
33
|
+
|
|
30
34
|
.SS Output Format Options
|
|
31
35
|
.TP
|
|
32
36
|
.B \--localhost
|
|
@@ -125,6 +129,14 @@ Console output only: show matching regex, titles, whois/dig/searchstring results
|
|
|
125
129
|
.B \--remove-tempfiles
|
|
126
130
|
Remove Chrome/Puppeteer temporary files before exit.
|
|
127
131
|
|
|
132
|
+
.TP
|
|
133
|
+
.BR \--max-concurrent " \fINUMBER\fR"
|
|
134
|
+
Maximum concurrent site processing (1-50, overrides config/default).
|
|
135
|
+
|
|
136
|
+
.TP
|
|
137
|
+
.BR \--cleanup-interval " \fINUMBER\fR"
|
|
138
|
+
Browser restart interval in URLs processed (1-1000, overrides config/default).
|
|
139
|
+
|
|
128
140
|
.TP
|
|
129
141
|
.BR \-h ", " \--help
|
|
130
142
|
Show help message and exit.
|
|
@@ -134,6 +146,10 @@ Show help message and exit.
|
|
|
134
146
|
Show version information and exit.
|
|
135
147
|
|
|
136
148
|
.SS Validation Options
|
|
149
|
+
.TP
|
|
150
|
+
.B \--cache-requests
|
|
151
|
+
Cache HTTP requests to avoid re-requesting same URLs within scan.
|
|
152
|
+
|
|
137
153
|
.TP
|
|
138
154
|
.B \--validate-config
|
|
139
155
|
Validate config.json file and exit.
|
|
@@ -150,6 +166,14 @@ Clean rule files by removing invalid lines and optionally duplicates (uses \fB\-
|
|
|
150
166
|
.B \--test-validation
|
|
151
167
|
Run domain validation tests and exit.
|
|
152
168
|
|
|
169
|
+
.TP
|
|
170
|
+
.B \--clear-cache
|
|
171
|
+
Clear persistent cache before scanning (improves fresh start performance).
|
|
172
|
+
|
|
173
|
+
.TP
|
|
174
|
+
.B \--ignore-cache
|
|
175
|
+
Bypass all smart caching functionality during scanning.
|
|
176
|
+
|
|
153
177
|
.SH CONFIGURATION
|
|
154
178
|
|
|
155
179
|
Configuration is provided via JSON files. The default configuration file is \fBconfig.json\fR.
|
|
@@ -184,6 +208,30 @@ Number. Similarity threshold percentage for ignore_similar (default: 80).
|
|
|
184
208
|
.B ignore_similar_ignored_domains
|
|
185
209
|
Boolean. Ignore domains similar to ignoreDomains list (default: true).
|
|
186
210
|
|
|
211
|
+
.TP
|
|
212
|
+
.B max_concurrent_sites
|
|
213
|
+
Number. Maximum concurrent site processing (1-50, default: 6).
|
|
214
|
+
|
|
215
|
+
.TP
|
|
216
|
+
.B resource_cleanup_interval
|
|
217
|
+
Number. Browser restart interval in URLs processed (1-1000, default: 80).
|
|
218
|
+
|
|
219
|
+
.TP
|
|
220
|
+
.B cache_path
|
|
221
|
+
String. Directory path for persistent cache storage (default: ".cache").
|
|
222
|
+
|
|
223
|
+
.TP
|
|
224
|
+
.B cache_max_size
|
|
225
|
+
Number. Maximum number of entries in cache (default: 5000).
|
|
226
|
+
|
|
227
|
+
.TP
|
|
228
|
+
.B cache_autosave_minutes
|
|
229
|
+
Number. Interval for automatic cache saves in minutes (default: 1).
|
|
230
|
+
|
|
231
|
+
.TP
|
|
232
|
+
.B cache_requests
|
|
233
|
+
Boolean. Enable HTTP request response caching (default: false).
|
|
234
|
+
|
|
187
235
|
.SS Per-Site Configuration Options
|
|
188
236
|
|
|
189
237
|
.TP
|
|
@@ -194,6 +242,10 @@ Single URL string or array of URLs to scan.
|
|
|
194
242
|
.B filterRegex
|
|
195
243
|
Regex pattern(s) to match suspicious requests.
|
|
196
244
|
|
|
245
|
+
.TP
|
|
246
|
+
.B regex_and
|
|
247
|
+
Boolean. Use AND logic for multiple filterRegex patterns - ALL patterns must match the same URL (default: false).
|
|
248
|
+
|
|
197
249
|
.TP
|
|
198
250
|
.B comments
|
|
199
251
|
Documentation strings or notes - completely ignored by the scanner. Can be a single string or array of strings. Used for adding context, URLs, timestamps, or any documentation notes to configuration files.
|
|
@@ -234,6 +286,10 @@ Spoof User-Agent: \fB"chrome"\fR, \fB"firefox"\fR, or \fB"safari"\fR.
|
|
|
234
286
|
.B interact
|
|
235
287
|
Boolean. Simulate mouse movements and clicks.
|
|
236
288
|
|
|
289
|
+
.TP
|
|
290
|
+
.B interact_intensity
|
|
291
|
+
String. Interaction simulation intensity: \fB"low"\fR, \fB"medium"\fR, \fB"high"\fR (default: "medium").
|
|
292
|
+
|
|
237
293
|
.TP
|
|
238
294
|
.B delay
|
|
239
295
|
Milliseconds to wait after page load (default: 4000).
|
|
@@ -242,6 +298,10 @@ Milliseconds to wait after page load (default: 4000).
|
|
|
242
298
|
.B reload
|
|
243
299
|
Number of times to reload the page (default: 1).
|
|
244
300
|
|
|
301
|
+
.TP
|
|
302
|
+
.B forcereload
|
|
303
|
+
Boolean. Force an additional reload after reloads.
|
|
304
|
+
|
|
245
305
|
.TP
|
|
246
306
|
.B timeout
|
|
247
307
|
Request timeout in milliseconds (default: 30000).
|
|
@@ -258,6 +318,14 @@ Boolean. Allow third-party request matching (default: true).
|
|
|
258
318
|
.B fingerprint_protection
|
|
259
319
|
Boolean or \fB"random"\fR. Enable browser fingerprint spoofing.
|
|
260
320
|
|
|
321
|
+
.TP
|
|
322
|
+
.B referrer_headers
|
|
323
|
+
String, Array, or Object. Set referrer header for realistic traffic sources.
|
|
324
|
+
|
|
325
|
+
.TP
|
|
326
|
+
.B custom_headers
|
|
327
|
+
Object. Add custom HTTP headers to requests.
|
|
328
|
+
|
|
261
329
|
.TP
|
|
262
330
|
.B ignore_similar
|
|
263
331
|
Boolean. Override global ignore_similar setting for this site.
|
|
@@ -274,6 +342,10 @@ Boolean. Override global ignore_similar_ignored_domains for this site.
|
|
|
274
342
|
.B even_blocked
|
|
275
343
|
Boolean. Add matching rules even if requests are blocked (default: false).
|
|
276
344
|
|
|
345
|
+
.TP
|
|
346
|
+
.B bypass_cache
|
|
347
|
+
Boolean. Skip all caching for this site's URLs (default: false).
|
|
348
|
+
|
|
277
349
|
.TP
|
|
278
350
|
.B whois
|
|
279
351
|
Array of terms that must ALL be found in WHOIS data (AND logic).
|
|
@@ -353,10 +425,6 @@ Object. Custom page.goto() options for Puppeteer navigation. Available options:
|
|
|
353
425
|
.RE
|
|
354
426
|
Example: \fB{"waitUntil": "networkidle2", "timeout": 60000}\fR
|
|
355
427
|
|
|
356
|
-
.TP
|
|
357
|
-
.B forcereload
|
|
358
|
-
Boolean. Force an additional reload with cache disabled after normal reloads.
|
|
359
|
-
|
|
360
428
|
.TP
|
|
361
429
|
.B clear_sitedata
|
|
362
430
|
Boolean. Clear all cookies, cache, and storage before each page load (default: false).
|
|
@@ -389,6 +457,45 @@ Boolean. Launch browser with GUI for this specific site.
|
|
|
389
457
|
.B adblock_rules
|
|
390
458
|
Boolean. Generate adblock filter rules with resource types for this site.
|
|
391
459
|
|
|
460
|
+
.TP
|
|
461
|
+
.B window_cleanup
|
|
462
|
+
Boolean or String. Close old/unused browser windows and tabs after entire URL group completes (default: false).
|
|
463
|
+
.RS
|
|
464
|
+
.IP \(bu 4
|
|
465
|
+
\fBfalse\fR - No cleanup performed
|
|
466
|
+
.IP \(bu 4
|
|
467
|
+
\fBtrue\fR - Conservative cleanup: closes pages that appear to be from previous scans while preserving main Puppeteer window
|
|
468
|
+
.IP \(bu 4
|
|
469
|
+
\fB"all"\fR - Aggressive cleanup: closes all content pages from previous operations while preserving main Puppeteer window
|
|
470
|
+
.RE
|
|
471
|
+
Both modes wait 16 seconds before cleanup to allow final operations to complete, and always preserve the main browser window to maintain browser instance stability. Cleanup targets popup windows, extra tabs, and pages from previously completed site groups, but never affects actively processing pages.
|
|
472
|
+
|
|
473
|
+
.TP
|
|
474
|
+
|
|
475
|
+
.SS Redirect Handling Options
|
|
476
|
+
|
|
477
|
+
.TP
|
|
478
|
+
.B follow_redirects
|
|
479
|
+
Boolean. Follow redirects to new domains (default: true).
|
|
480
|
+
|
|
481
|
+
.TP
|
|
482
|
+
.B max_redirects
|
|
483
|
+
Number. Maximum number of redirects to follow (default: 10).
|
|
484
|
+
|
|
485
|
+
.TP
|
|
486
|
+
.B js_redirect_timeout
|
|
487
|
+
Milliseconds. Time to wait for JavaScript redirects (default: 5000).
|
|
488
|
+
|
|
489
|
+
.TP
|
|
490
|
+
.B detect_js_patterns
|
|
491
|
+
Boolean. Analyze page source for redirect patterns (default: true).
|
|
492
|
+
|
|
493
|
+
.TP
|
|
494
|
+
.B redirect_timeout_multiplier
|
|
495
|
+
Number. Increase timeout for redirected URLs (default: 1.5).
|
|
496
|
+
|
|
497
|
+
.SS Cloudflare Protection Options
|
|
498
|
+
|
|
392
499
|
.TP
|
|
393
500
|
.B cloudflare_phish
|
|
394
501
|
Boolean. Auto-click through Cloudflare phishing warnings (default: false).
|
|
@@ -397,6 +504,24 @@ Boolean. Auto-click through Cloudflare phishing warnings (default: false).
|
|
|
397
504
|
.B cloudflare_bypass
|
|
398
505
|
Boolean. Auto-solve Cloudflare "Verify you are human" challenges (default: false).
|
|
399
506
|
|
|
507
|
+
.TP
|
|
508
|
+
.B cloudflare_parallel_detection
|
|
509
|
+
Boolean. Use parallel detection for faster Cloudflare checks (default: true).
|
|
510
|
+
|
|
511
|
+
.TP
|
|
512
|
+
.B cloudflare_max_retries
|
|
513
|
+
Number. Maximum retry attempts for Cloudflare operations (default: 3).
|
|
514
|
+
|
|
515
|
+
.TP
|
|
516
|
+
.B cloudflare_cache_ttl
|
|
517
|
+
Milliseconds. TTL for Cloudflare detection cache (default: 300000 - 5 minutes).
|
|
518
|
+
|
|
519
|
+
.TP
|
|
520
|
+
.B cloudflare_retry_on_error
|
|
521
|
+
Boolean. Enable retry logic for Cloudflare operations (default: true).
|
|
522
|
+
|
|
523
|
+
.SS FlowProxy Protection Options
|
|
524
|
+
|
|
400
525
|
.TP
|
|
401
526
|
.B flowproxy_detection
|
|
402
527
|
Boolean. Enable flowProxy protection detection and handling (default: false).
|
|
@@ -421,6 +546,24 @@ Milliseconds. Delay for rate limiting (default: 30000).
|
|
|
421
546
|
.B flowproxy_additional_delay
|
|
422
547
|
Milliseconds. Additional processing delay (default: 5000).
|
|
423
548
|
|
|
549
|
+
.SS Advanced Options
|
|
550
|
+
|
|
551
|
+
.TP
|
|
552
|
+
.B interact_duration
|
|
553
|
+
Milliseconds. Duration of interaction simulation (default: 2000).
|
|
554
|
+
|
|
555
|
+
.TP
|
|
556
|
+
.B interact_scrolling
|
|
557
|
+
Boolean. Enable scrolling simulation (default: true).
|
|
558
|
+
|
|
559
|
+
.TP
|
|
560
|
+
.B interact_clicks
|
|
561
|
+
Boolean. Enable element clicking simulation (default: false).
|
|
562
|
+
|
|
563
|
+
.TP
|
|
564
|
+
.B interact_typing
|
|
565
|
+
Boolean. Enable typing simulation (default: false).
|
|
566
|
+
|
|
424
567
|
.TP
|
|
425
568
|
.B verbose
|
|
426
569
|
Boolean. Enable verbose output for this specific site.
|
|
@@ -461,6 +604,28 @@ Boolean. Force Pi-hole regex output format for this site.
|
|
|
461
604
|
.B plain
|
|
462
605
|
Boolean. Force plain domain output for this site.
|
|
463
606
|
|
|
607
|
+
.SS Referrer Header Options
|
|
608
|
+
|
|
609
|
+
.B Simple formats:
|
|
610
|
+
.RS
|
|
611
|
+
.IP \(bu 4
|
|
612
|
+
\fB"referrer_headers": "https://google.com/search?q=example"\fR
|
|
613
|
+
.IP \(bu 4
|
|
614
|
+
\fB"referrer_headers": ["url1", "url2"]\fR
|
|
615
|
+
.RE
|
|
616
|
+
|
|
617
|
+
.B Smart modes:
|
|
618
|
+
.RS
|
|
619
|
+
.IP \(bu 4
|
|
620
|
+
\fB"referrer_headers": {"mode": "random_search", "search_terms": ["reviews"]}\fR
|
|
621
|
+
.IP \(bu 4
|
|
622
|
+
\fB"referrer_headers": {"mode": "social_media"}\fR
|
|
623
|
+
.IP \(bu 4
|
|
624
|
+
\fB"referrer_headers": {"mode": "direct_navigation"}\fR
|
|
625
|
+
.IP \(bu 4
|
|
626
|
+
\fB"referrer_headers": {"mode": "custom", "custom": ["https://news.ycombinator.com/"]}\fR
|
|
627
|
+
.RE
|
|
628
|
+
|
|
464
629
|
.SH SIMILARITY FILTERING
|
|
465
630
|
|
|
466
631
|
The scanner includes advanced similarity filtering to reduce noise and improve detection accuracy by automatically ignoring domains that are very similar to ones already found or explicitly ignored.
|
|
@@ -530,6 +695,16 @@ With default settings (\fBignore_similar_threshold: 80\fR):
|
|
|
530
695
|
}
|
|
531
696
|
.EE
|
|
532
697
|
|
|
698
|
+
.SS Configuration with regex AND logic:
|
|
699
|
+
.EX
|
|
700
|
+
{
|
|
701
|
+
"url": "https://ad-network.com",
|
|
702
|
+
"filterRegex": ["tracking", "analytics"],
|
|
703
|
+
"regex_and": true,
|
|
704
|
+
"resourceTypes": ["script", "fetch"]
|
|
705
|
+
}
|
|
706
|
+
.EE
|
|
707
|
+
|
|
533
708
|
.SS Configuration with similarity filtering:
|
|
534
709
|
.EX
|
|
535
710
|
{
|
|
@@ -661,6 +836,36 @@ node nwss.js -o dnsmasq.conf --dnsmasq --titles
|
|
|
661
836
|
node nwss.js -o pihole_regex.txt --pihole --debug
|
|
662
837
|
.EE
|
|
663
838
|
|
|
839
|
+
.SS Performance tuning with concurrency:
|
|
840
|
+
.EX
|
|
841
|
+
node nwss.js --max-concurrent 12 --cleanup-interval 100 -o rules.txt
|
|
842
|
+
.EE
|
|
843
|
+
|
|
844
|
+
.SS Cache management:
|
|
845
|
+
.EX
|
|
846
|
+
node nwss.js --clear-cache --cache-requests -o rules.txt
|
|
847
|
+
node nwss.js --ignore-cache --debug -o rules.txt
|
|
848
|
+
.EE
|
|
849
|
+
|
|
850
|
+
.SS Stealth configuration with referrer headers:
|
|
851
|
+
.EX
|
|
852
|
+
{
|
|
853
|
+
"url": "https://e-commerce-site.com",
|
|
854
|
+
"userAgent": "chrome",
|
|
855
|
+
"fingerprint_protection": "random",
|
|
856
|
+
"referrer_headers": {
|
|
857
|
+
"mode": "random_search",
|
|
858
|
+
"search_terms": ["product reviews", "best deals"]
|
|
859
|
+
},
|
|
860
|
+
"custom_headers": {
|
|
861
|
+
"Accept-Language": "en-US,en;q=0.9"
|
|
862
|
+
},
|
|
863
|
+
"interact": true,
|
|
864
|
+
"interact_intensity": "high",
|
|
865
|
+
"filterRegex": "analytics|tracking|ads"
|
|
866
|
+
}
|
|
867
|
+
.EE
|
|
868
|
+
|
|
664
869
|
.SS Cloudflare bypass and fingerprint spoofing:
|
|
665
870
|
.EX
|
|
666
871
|
{
|
|
@@ -688,6 +893,35 @@ node nwss.js -o pihole_regex.txt --pihole --debug
|
|
|
688
893
|
}
|
|
689
894
|
.EE
|
|
690
895
|
|
|
896
|
+
.SS Window cleanup for memory management:
|
|
897
|
+
.EX
|
|
898
|
+
{
|
|
899
|
+
"url": [
|
|
900
|
+
"https://site1.com",
|
|
901
|
+
"https://site2.com",
|
|
902
|
+
"https://site3.com"
|
|
903
|
+
],
|
|
904
|
+
"filterRegex": "\\\\.(space|website)\\\\b",
|
|
905
|
+
"window_cleanup": "all",
|
|
906
|
+
"resourceTypes": ["script", "fetch"]
|
|
907
|
+
}
|
|
908
|
+
.EE
|
|
909
|
+
|
|
910
|
+
.SS Conservative window cleanup:
|
|
911
|
+
.EX
|
|
912
|
+
{
|
|
913
|
+
"url": "https://popup-heavy-site.com",
|
|
914
|
+
"filterRegex": "\\\\.(top|buzz)\\\\b",
|
|
915
|
+
"window_cleanup": true,
|
|
916
|
+
"interact": true,
|
|
917
|
+
"reload": 2,
|
|
918
|
+
"comments": [
|
|
919
|
+
"Site opens many popup windows",
|
|
920
|
+
"Conservative cleanup preserves potentially active content"
|
|
921
|
+
]
|
|
922
|
+
}
|
|
923
|
+
.EE
|
|
924
|
+
|
|
691
925
|
.SH OUTPUT FORMATS
|
|
692
926
|
|
|
693
927
|
The scanner supports multiple output formats for different blocking systems:
|
|
@@ -740,6 +974,10 @@ Default configuration file containing scan targets and rules.
|
|
|
740
974
|
.B logs/
|
|
741
975
|
Directory created for debug and matched URL logs when \fB\--debug\fR or \fB\--dumpurls\fR is used.
|
|
742
976
|
|
|
977
|
+
.TP
|
|
978
|
+
.B .cache/
|
|
979
|
+
Default cache directory for smart caching functionality.
|
|
980
|
+
|
|
743
981
|
.TP
|
|
744
982
|
.B user.action
|
|
745
983
|
Common Privoxy action file when using \fB\--privoxy\fR output.
|
|
@@ -747,7 +985,7 @@ Common Privoxy action file when using \fB\--privoxy\fR output.
|
|
|
747
985
|
.SH DETECTION METHODS
|
|
748
986
|
|
|
749
987
|
.SS URL Pattern Matching
|
|
750
|
-
Uses regex patterns to identify suspicious domains and request URLs.
|
|
988
|
+
Uses regex patterns to identify suspicious domains and request URLs with support for AND/OR logic.
|
|
751
989
|
|
|
752
990
|
.SS Content Analysis
|
|
753
991
|
Downloads page content with curl and searches for malicious strings using JavaScript or grep.
|
|
@@ -784,6 +1022,28 @@ Detects and handles FlowProxy protection systems.
|
|
|
784
1022
|
.SS Intelligent Domain Filtering
|
|
785
1023
|
Advanced similarity algorithms prevent duplicate detection across international domains and variations.
|
|
786
1024
|
|
|
1025
|
+
.SS Smart Caching
|
|
1026
|
+
Persistent caching system for improved performance across multiple scans.
|
|
1027
|
+
|
|
1028
|
+
.SS Window Management
|
|
1029
|
+
Intelligent browser window and tab cleanup to prevent memory accumulation:
|
|
1030
|
+
.RS
|
|
1031
|
+
.IP \(bu 4
|
|
1032
|
+
\fBConservative mode\fR (\fBwindow_cleanup: true\fR): Selectively closes pages that appear to be leftovers from previous scans based on URL patterns and page state analysis
|
|
1033
|
+
.IP \(bu 4
|
|
1034
|
+
\fBAggressive mode\fR (\fBwindow_cleanup: "all"\fR): Closes all content pages from previous operations for maximum memory recovery
|
|
1035
|
+
.IP \(bu 4
|
|
1036
|
+
\fBMain window preservation\fR: Both modes always preserve the main Puppeteer browser window (typically about:blank) to maintain browser instance stability
|
|
1037
|
+
.IP \(bu 4
|
|
1038
|
+
\fBPopup window handling\fR: Automatically detects and closes popup windows created by previous site scans
|
|
1039
|
+
.IP \(bu 4
|
|
1040
|
+
\fBTiming protection\fR: 16-second delay ensures no active operations are interrupted during cleanup
|
|
1041
|
+
.IP \(bu 4
|
|
1042
|
+
\fBMemory estimation\fR: Reports estimated memory freed from closed windows for performance monitoring
|
|
1043
|
+
.IP \(bu 4
|
|
1044
|
+
\fBActive page protection\fR: Never affects pages currently being processed by concurrent scanning operations
|
|
1045
|
+
.RE
|
|
1046
|
+
|
|
787
1047
|
.SH EXIT STATUS
|
|
788
1048
|
.TP
|
|
789
1049
|
.B 0
|
|
@@ -821,4 +1081,4 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
821
1081
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
822
1082
|
|
|
823
1083
|
You should have received a copy of the GNU General Public License along with
|
|
824
|
-
this program. If not, see <https://www.gnu.org/licenses/>.
|
|
1084
|
+
this program. If not, see <https://www.gnu.org/licenses/>.
|
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v1.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v1.0.99 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -82,6 +82,8 @@ const CONCURRENCY_LIMITS = {
|
|
|
82
82
|
HIGH_CONCURRENCY_THRESHOLD: 12 // Auto-enable aggressive caching above this
|
|
83
83
|
};
|
|
84
84
|
|
|
85
|
+
const REALTIME_CLEANUP_THRESHOLD = 8; // Default pages to keep for realtime cleanup
|
|
86
|
+
|
|
85
87
|
/**
|
|
86
88
|
* Detects the installed Puppeteer version dynamically
|
|
87
89
|
* @returns {Object} Version info and compatibility settings
|
|
@@ -122,10 +124,10 @@ function detectPuppeteerVersion() {
|
|
|
122
124
|
// Enhanced redirect handling
|
|
123
125
|
const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/redirect');
|
|
124
126
|
// Ensure web browser is working correctly
|
|
125
|
-
const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup } = require('./lib/browserhealth');
|
|
127
|
+
const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup, performRealtimeWindowCleanup, trackPageForRealtime, updatePageUsage } = require('./lib/browserhealth');
|
|
126
128
|
|
|
127
129
|
// --- Script Configuration & Constants ---
|
|
128
|
-
const VERSION = '1.0.
|
|
130
|
+
const VERSION = '1.0.99'; // Script version
|
|
129
131
|
|
|
130
132
|
// get startTime
|
|
131
133
|
const startTime = Date.now();
|
|
@@ -571,7 +573,11 @@ Advanced Options:
|
|
|
571
573
|
dig_subdomain: true/false Use subdomain for dig lookup instead of root domain (default: false)
|
|
572
574
|
digRecordType: "A" DNS record type for dig (default: A)
|
|
573
575
|
|
|
574
|
-
window_cleanup: true/false
|
|
576
|
+
window_cleanup: true/false/"realtime"/"all" Window cleanup mode:
|
|
577
|
+
true/false - Close extra windows after URL group completes (default: false)
|
|
578
|
+
"realtime" - Continuously cleanup oldest pages when threshold exceeded
|
|
579
|
+
"all" - Aggressive cleanup of all content pages after group
|
|
580
|
+
window_cleanup_threshold: <number> For realtime mode: max pages to keep open (default: 8)
|
|
575
581
|
|
|
576
582
|
Referrer Header Options:
|
|
577
583
|
referrer_headers: "https://google.com" Single referrer URL
|
|
@@ -1474,7 +1480,27 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1474
1480
|
throw new Error('Failed to create valid page instance');
|
|
1475
1481
|
}
|
|
1476
1482
|
|
|
1477
|
-
|
|
1483
|
+
// Track page for realtime cleanup
|
|
1484
|
+
trackPageForRealtime(page);
|
|
1485
|
+
|
|
1486
|
+
// Mark page as actively processing
|
|
1487
|
+
updatePageUsage(page, true);
|
|
1488
|
+
|
|
1489
|
+
// Perform realtime cleanup if enabled
|
|
1490
|
+
if (siteConfig.window_cleanup === "realtime") {
|
|
1491
|
+
const threshold = typeof siteConfig.window_cleanup_threshold === 'number'
|
|
1492
|
+
? siteConfig.window_cleanup_threshold
|
|
1493
|
+
: REALTIME_CLEANUP_THRESHOLD;
|
|
1494
|
+
|
|
1495
|
+
// Get the site's delay value for cleanup timing
|
|
1496
|
+
const siteDelay = siteConfig.delay || 4000;
|
|
1497
|
+
|
|
1498
|
+
const realtimeResult = await performRealtimeWindowCleanup(browserInstance, threshold, forceDebug, siteDelay);
|
|
1499
|
+
if (realtimeResult.success && realtimeResult.closedCount > 0 && forceDebug) {
|
|
1500
|
+
console.log(formatLogMessage('debug', `[realtime_cleanup] Cleaned ${realtimeResult.closedCount} old pages, ${realtimeResult.remainingPages} remaining`));
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
|
|
1478
1504
|
// Set aggressive timeouts for problematic operations
|
|
1479
1505
|
// Optimized timeouts for Puppeteer 23.x responsiveness
|
|
1480
1506
|
page.setDefaultTimeout(Math.min(timeout, TIMEOUTS.DEFAULT_PAGE_REDUCED));
|
|
@@ -1850,9 +1876,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1850
1876
|
let curlUserAgent = '';
|
|
1851
1877
|
if (useCurl && siteConfig.userAgent) {
|
|
1852
1878
|
const userAgents = {
|
|
1853
|
-
chrome: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
1854
|
-
firefox: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:
|
|
1855
|
-
safari: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.
|
|
1879
|
+
chrome: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
|
|
1880
|
+
firefox: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
|
|
1881
|
+
safari: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"
|
|
1856
1882
|
};
|
|
1857
1883
|
curlUserAgent = userAgents[siteConfig.userAgent.toLowerCase()] || '';
|
|
1858
1884
|
}
|
|
@@ -2612,6 +2638,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2612
2638
|
request.continue();
|
|
2613
2639
|
});
|
|
2614
2640
|
|
|
2641
|
+
// Mark page as actively processing network requests
|
|
2642
|
+
updatePageUsage(page, true);
|
|
2643
|
+
|
|
2615
2644
|
// Add response handler ONLY if searchstring/searchstring_and is defined AND neither curl nor grep is enabled
|
|
2616
2645
|
if ((hasSearchString || hasSearchStringAnd) && !useCurl && !useGrep) {
|
|
2617
2646
|
const responseHandler = createResponseHandler({
|
|
@@ -2649,6 +2678,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2649
2678
|
// Create optimized interaction configuration for this site
|
|
2650
2679
|
const interactionConfig = createInteractionConfig(currentUrl, siteConfig);
|
|
2651
2680
|
|
|
2681
|
+
// Mark page as actively processing interactions
|
|
2682
|
+
updatePageUsage(page, true);
|
|
2683
|
+
|
|
2652
2684
|
// --- Runtime CSS Element Blocking (Fallback) ---
|
|
2653
2685
|
// Apply CSS blocking after page load as a fallback in case evaluateOnNewDocument didn't work
|
|
2654
2686
|
if (cssBlockedSelectors && Array.isArray(cssBlockedSelectors) && cssBlockedSelectors.length > 0) {
|
|
@@ -2875,6 +2907,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2875
2907
|
console.log(formatLogMessage('info', `${messageColors.loaded('Loaded:')} (${siteCounter}/${totalUrls}) ${currentUrl}`));
|
|
2876
2908
|
await page.evaluate(() => { console.log('Safe to evaluate on loaded page.'); });
|
|
2877
2909
|
|
|
2910
|
+
// Mark page as processing frames
|
|
2911
|
+
updatePageUsage(page, true);
|
|
2912
|
+
|
|
2878
2913
|
// Wait for iframes to load and log them
|
|
2879
2914
|
if (forceDebug) {
|
|
2880
2915
|
try {
|
|
@@ -2899,6 +2934,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2899
2934
|
console.log(formatLogMessage('debug', `Frame debugging failed: ${frameDebugErr.message}`));
|
|
2900
2935
|
}
|
|
2901
2936
|
}
|
|
2937
|
+
|
|
2938
|
+
// Page finished initial loading - mark as idle
|
|
2939
|
+
updatePageUsage(page, false);
|
|
2902
2940
|
} catch (err) {
|
|
2903
2941
|
// Enhanced error handling for redirect timeouts using redirect module
|
|
2904
2942
|
const timeoutResult = await handleRedirectTimeout(page, currentUrl, err, safeGetDomain, forceDebug, formatLogMessage);
|
|
@@ -2916,6 +2954,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2916
2954
|
|
|
2917
2955
|
if (interactEnabled && !disableInteract) {
|
|
2918
2956
|
if (forceDebug) console.log(formatLogMessage('debug', `interaction simulation enabled for ${currentUrl}`));
|
|
2957
|
+
|
|
2958
|
+
// Mark page as processing during interactions
|
|
2959
|
+
updatePageUsage(page, true);
|
|
2919
2960
|
// Use enhanced interaction module
|
|
2920
2961
|
await performPageInteraction(page, currentUrl, interactionConfig, forceDebug);
|
|
2921
2962
|
}
|
|
@@ -2945,6 +2986,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2945
2986
|
// Use fast timeout helper for consistent Puppeteer 23.x compatibility
|
|
2946
2987
|
|
|
2947
2988
|
// Handle reloads - use force reload mechanism if forcereload is enabled
|
|
2989
|
+
// Mark page as processing during reloads
|
|
2990
|
+
updatePageUsage(page, true);
|
|
2991
|
+
|
|
2948
2992
|
const totalReloads = (siteConfig.reload || 1) - 1; // Subtract 1 because initial load counts as first
|
|
2949
2993
|
const useForceReload = siteConfig.forcereload === true;
|
|
2950
2994
|
|
|
@@ -3049,6 +3093,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3049
3093
|
await fastTimeout(delayMs);
|
|
3050
3094
|
}
|
|
3051
3095
|
}
|
|
3096
|
+
|
|
3097
|
+
// Mark page as idle after all processing complete
|
|
3098
|
+
updatePageUsage(page, false);
|
|
3052
3099
|
|
|
3053
3100
|
if (dryRunMode) {
|
|
3054
3101
|
// Get page title for dry run output
|
|
@@ -3172,6 +3219,10 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3172
3219
|
// Guaranteed resource cleanup - this runs regardless of success or failure
|
|
3173
3220
|
|
|
3174
3221
|
if (cdpSessionManager) {
|
|
3222
|
+
// Mark page as idle when cleanup starts
|
|
3223
|
+
if (page && !page.isClosed()) {
|
|
3224
|
+
updatePageUsage(page, false);
|
|
3225
|
+
}
|
|
3175
3226
|
await cdpSessionManager.cleanup();
|
|
3176
3227
|
}
|
|
3177
3228
|
|
|
@@ -3405,14 +3456,19 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3405
3456
|
for (const [siteKey, siteTasks] of tasksBySite) {
|
|
3406
3457
|
const siteConfig = siteTasks[0].config; // All tasks in group have same config
|
|
3407
3458
|
|
|
3408
|
-
if (siteConfig.window_cleanup === true) {
|
|
3459
|
+
if (siteConfig.window_cleanup === true || siteConfig.window_cleanup === "all" || siteConfig.window_cleanup === "realtime") {
|
|
3409
3460
|
const urlCount = siteTasks.length;
|
|
3410
3461
|
const groupDescription = `${urlCount} URLs from site group ${++siteGroupIndex}`;
|
|
3462
|
+
const cleanupMode = siteConfig.window_cleanup === "realtime" ? true : siteConfig.window_cleanup; // Pass through the exact value, but don't pass "realtime" to group cleanup
|
|
3411
3463
|
|
|
3412
3464
|
try {
|
|
3413
|
-
const groupCleanupResult = await performGroupWindowCleanup(browser, groupDescription, forceDebug);
|
|
3465
|
+
const groupCleanupResult = await performGroupWindowCleanup(browser, groupDescription, forceDebug, cleanupMode);
|
|
3414
3466
|
if (!silentMode && groupCleanupResult.success && groupCleanupResult.closedCount > 0) {
|
|
3415
|
-
|
|
3467
|
+
const modeText = cleanupMode === "all" ? "(aggressive)" : "(conservative)";
|
|
3468
|
+
console.log(`🗑️ Group cleanup: ${groupCleanupResult.closedCount} old windows closed ${modeText} after completing ${groupDescription}`);
|
|
3469
|
+
if (groupCleanupResult.mainPagePreserved) {
|
|
3470
|
+
console.log(`✅ Main Puppeteer window preserved during cleanup`);
|
|
3471
|
+
}
|
|
3416
3472
|
}
|
|
3417
3473
|
} catch (groupCleanupErr) {
|
|
3418
3474
|
if (forceDebug) console.log(formatLogMessage('debug', `Group window cleanup failed: ${groupCleanupErr.message}`));
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.99",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|