@fanboynz/network-scanner 1.0.96 → 1.0.98

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -151,6 +151,9 @@ Example:
151
151
  | `blocked` | Array | - | Domains or regexes to block during scanning |
152
152
  | `even_blocked` | Boolean | `false` | Add matching rules even if requests are blocked |
153
153
  | `bypass_cache` | Boolean | `false` | Skip all caching for this site's URLs |
154
+ | `window_cleanup` | Boolean or String | `false` | Close old/unused browser windows/tabs after entire URL group completes |
155
+
156
+ **Window cleanup modes:** `false` (disabled), `true` (conservative - closes obvious leftovers), `"all"` (aggressive - closes all content pages). Both active modes preserve the main Puppeteer window and wait 16 seconds before cleanup to avoid interfering with active operations.
154
157
 
155
158
 
156
159
  ### Redirect Handling Options
@@ -320,6 +323,39 @@ node nwss.js --max-concurrent 12 --cleanup-interval 300 -o rules.txt
320
323
 
321
324
  ### Stealth Configuration Examples
322
325
 
326
+ #### Memory Management with Window Cleanup
327
+ ```json
328
+ {
329
+ "url": [
330
+ "https://popup-heavy-site1.com",
331
+ "https://popup-heavy-site2.com",
332
+ "https://popup-heavy-site3.com"
333
+ ],
334
+ "filterRegex": "\\.(space|website|tech)\\b",
335
+ "window_cleanup": "all",
336
+ "interact": true,
337
+ "reload": 2,
338
+ "resourceTypes": ["script", "fetch"],
339
+ "comments": "Aggressive cleanup for sites that open many popups"
340
+ }
341
+ ```
342
+
343
+ #### Conservative Memory Management
344
+ ```json
345
+ {
346
+ "url": "https://complex-site.com",
347
+ "filterRegex": "analytics|tracking",
348
+ "window_cleanup": true,
349
+ "interact": true,
350
+ "delay": 8000,
351
+ "reload": 3,
352
+ "comments": [
353
+ "Conservative cleanup preserves potentially active content",
354
+ "Good for sites with complex iframe structures"
355
+ ]
356
+ }
357
+ ```
358
+
323
359
  #### E-commerce Site Scanning
324
360
  ```json
325
361
  {
@@ -368,6 +404,22 @@ node nwss.js --max-concurrent 12 --cleanup-interval 300 -o rules.txt
368
404
 
369
405
  ---
370
406
 
407
+ ## Memory Management
408
+
409
+ The scanner includes intelligent window management to prevent memory accumulation during long scans:
410
+
411
+ - **Conservative cleanup** (`window_cleanup: true`): Selectively closes pages that appear to be leftovers from previous scans
412
+ - **Aggressive cleanup** (`window_cleanup: "all"`): Closes all content pages from previous operations for maximum memory recovery
413
+ - **Main window preservation**: Both modes always preserve the main Puppeteer browser window to maintain stability
414
+ - **Popup window handling**: Automatically detects and closes popup windows created by previous site scans
415
+ - **Timing protection**: 16-second delay ensures no active operations are interrupted during cleanup
416
+ - **Active page protection**: Never affects pages currently being processed by concurrent scanning operations
417
+ - **Memory reporting**: Reports estimated memory freed from closed windows for performance monitoring
418
+
419
+ Use aggressive cleanup for sites that open many popups or when processing large numbers of URLs. Use conservative cleanup when you want to preserve potentially active content but still free obvious leftovers.
420
+
421
+ ---
422
+
371
423
  ## INSTALL
372
424
 
373
425
  #### (Ubuntu as example). NOTE: Use Chrome and not Chromium for best compatibility.
@@ -5,6 +5,430 @@
5
5
 
6
6
  const { formatLogMessage, messageColors } = require('./colorize');
7
7
 
8
+
9
+ // Window cleanup delay constant
10
+ const WINDOW_CLEANUP_DELAY_MS = 16000;
11
+ const REALTIME_CLEANUP_BUFFER_MS = 3000; // Additional buffer time after site delay
12
+ const REALTIME_CLEANUP_THRESHOLD = 8; // Default number of pages to keep
13
+ const REALTIME_CLEANUP_MIN_PAGES = 3; // Minimum pages before cleanup kicks in
14
+
15
+ // Track page creation order for realtime cleanup
16
+ const pageCreationTracker = new Map(); // Maps page -> creation timestamp
17
+ let pageCreationCounter = 0;
18
+
19
+ // Track page usage for realtime cleanup safety
20
+ const pageUsageTracker = new Map(); // Maps page -> { lastActivity: timestamp, isProcessing: boolean }
21
+ const PAGE_IDLE_THRESHOLD = 10000; // 10 seconds of inactivity before considering page safe to clean
22
+
23
+ /**
24
+ * Performs group-level window cleanup after all URLs in a site group complete
25
+ * Closes all extra windows except the main browser window
26
+ * @param {import('puppeteer').Browser} browserInstance - Browser instance
27
+ * @param {string} groupDescription - Description of the group for logging
28
+ * @param {boolean} forceDebug - Debug logging flag
29
+ * @param {string|boolean} cleanupMode - Cleanup mode: true/"default" (conservative), "all" (aggressive)
30
+ * @returns {Promise<Object>} Cleanup results
31
+ */
32
+ async function performGroupWindowCleanup(browserInstance, groupDescription, forceDebug, cleanupMode = true) {
33
+ try {
34
+ // Wait before cleanup to allow any final operations to complete
35
+ const modeText = cleanupMode === "all" ? "aggressive cleanup of old windows" : "conservative cleanup of extra windows"
36
+ if (forceDebug) {
37
+ console.log(formatLogMessage('debug', `[group_window_cleanup] Waiting ${WINDOW_CLEANUP_DELAY_MS}ms before ${modeText} for group: ${groupDescription}`));
38
+
39
+ }
40
+ await new Promise(resolve => setTimeout(resolve, WINDOW_CLEANUP_DELAY_MS));
41
+
42
+ const allPages = await browserInstance.pages();
43
+ // Identify the main Puppeteer window (should be about:blank or the initial page)
44
+ let mainPuppeteerPage = null;
45
+ let pagesToClose = [];
46
+
47
+ // Find the main page - typically the first page that's about:blank or has been there longest
48
+ for (const page of allPages) {
49
+ const url = page.url();
50
+ if (url === 'about:blank' || url === '' || url.startsWith('chrome://')) {
51
+ if (!mainPuppeteerPage) {
52
+ mainPuppeteerPage = page; // First blank page is likely the main window
53
+ } else {
54
+ pagesToClose.push(page); // Additional blank pages can be closed
55
+ }
56
+ } else {
57
+ // Any page with actual content should be evaluated for closure
58
+ if (cleanupMode === "all") {
59
+ // Aggressive mode: close all content pages
60
+ pagesToClose.push(page);
61
+ } else {
62
+ // Conservative mode: only close pages that look like leftovers from previous scans
63
+ // Keep pages that might still be actively used
64
+ const isOldPage = await isPageFromPreviousScan(page, forceDebug);
65
+ if (isOldPage) {
66
+ pagesToClose.push(page);
67
+ }
68
+ }
69
+ }
70
+ }
71
+
72
+ // Ensure we always have a main page
73
+ if (!mainPuppeteerPage && allPages.length > 0) {
74
+ mainPuppeteerPage = allPages[0]; // Fallback to first page
75
+ pagesToClose = allPages.slice(1);
76
+ if (forceDebug) {
77
+ console.log(formatLogMessage('debug', `[group_window_cleanup] No blank page found, using first page as main: ${mainPuppeteerPage.url()}`));
78
+ }
79
+ }
80
+
81
+ if (pagesToClose.length === 0) {
82
+ if (forceDebug) {
83
+ console.log(formatLogMessage('debug', `[group_window_cleanup] No windows to close for group: ${groupDescription}`));
84
+ }
85
+ return {
86
+ success: true,
87
+ closedCount: 0,
88
+ totalPages: allPages.length,
89
+ estimatedMemoryFreed: 0,
90
+ mainPagePreserved: true,
91
+ cleanupMode: cleanupMode === "all" ? "all" : "default"
92
+ };
93
+ }
94
+
95
+ // Estimate memory usage before closing
96
+ let totalEstimatedMemory = 0;
97
+ const pageMemoryEstimates = [];
98
+
99
+ for (let i = 0; i < pagesToClose.length; i++) {
100
+ const page = pagesToClose[i];
101
+ let pageMemoryEstimate = 0;
102
+
103
+ try {
104
+ if (!page.isClosed()) {
105
+ // Get page metrics if available
106
+ const metrics = await Promise.race([
107
+ page.metrics(),
108
+ new Promise((_, reject) => setTimeout(() => reject(new Error('metrics timeout')), 1000))
109
+ ]);
110
+
111
+ // Calculate memory estimate based on page metrics
112
+ if (metrics) {
113
+ // Puppeteer metrics provide various memory-related values
114
+ pageMemoryEstimate = (
115
+ (metrics.JSHeapUsedSize || 0) + // JavaScript heap
116
+ (metrics.JSHeapTotalSize || 0) * 0.1 + // Estimated overhead
117
+ (metrics.Nodes || 0) * 100 + // DOM nodes (rough estimate)
118
+ (metrics.JSEventListeners || 0) * 50 // Event listeners
119
+ );
120
+ } else {
121
+ // Fallback: rough estimate based on page complexity
122
+ pageMemoryEstimate = 8 * 1024 * 1024; // 8MB default estimate per page
123
+ }
124
+ }
125
+ } catch (metricsErr) {
126
+ // Fallback estimate if metrics fail
127
+ pageMemoryEstimate = 8 * 1024 * 1024; // 8MB default
128
+ if (forceDebug) {
129
+ console.log(formatLogMessage('debug', `[group_window_cleanup] Could not get metrics for page ${i + 1}, using default estimate: ${metricsErr.message}`));
130
+ }
131
+ }
132
+
133
+ pageMemoryEstimates.push(pageMemoryEstimate);
134
+ totalEstimatedMemory += pageMemoryEstimate;
135
+ }
136
+
137
+ // Close identified old/unused pages
138
+ const closePromises = pagesToClose.map(async (page, index) => {
139
+ try {
140
+ if (!page.isClosed()) {
141
+ if (forceDebug) {
142
+ console.log(formatLogMessage('debug', `[group_window_cleanup] Closing page: ${page.url()}`));
143
+ }
144
+ await page.close();
145
+ return { success: true, url: page.url() || `page-${index}`, estimatedMemory: pageMemoryEstimates[index] };
146
+ }
147
+ return { success: false, reason: 'already_closed', estimatedMemory: 0 };
148
+ } catch (closeErr) {
149
+ if (forceDebug) {
150
+ console.log(formatLogMessage('debug', `[group_window_cleanup] Failed to close old page ${index + 1}: ${closeErr.message}`));
151
+ }
152
+ return { success: false, error: closeErr.message, estimatedMemory: 0 };
153
+ }
154
+ });
155
+
156
+ const closeResults = await Promise.all(closePromises);
157
+ const successfulCloses = closeResults.filter(result => result.success === true).length;
158
+ const actualMemoryFreed = closeResults
159
+ .filter(result => result.success === true)
160
+ .reduce((sum, result) => sum + (result.estimatedMemory || 0), 0);
161
+
162
+ // Format memory for human readability
163
+ const formatMemory = (bytes) => {
164
+ if (bytes >= 1024 * 1024 * 1024) {
165
+ return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)}GB`;
166
+ } else if (bytes >= 1024 * 1024) {
167
+ return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
168
+ } else if (bytes >= 1024) {
169
+ return `${(bytes / 1024).toFixed(1)}KB`;
170
+ } else {
171
+ return `${bytes}B`;
172
+ }
173
+ };
174
+
175
+ if (forceDebug) {
176
+ console.log(formatLogMessage('debug', `[group_window_cleanup] Closed ${successfulCloses}/${pagesToClose.length} old windows for completed group: ${groupDescription} after ${WINDOW_CLEANUP_DELAY_MS}ms delay`));
177
+ console.log(formatLogMessage('debug', `[group_window_cleanup] Estimated memory freed: ${formatMemory(actualMemoryFreed)}`));
178
+ if (mainPuppeteerPage) {
179
+ console.log(formatLogMessage('debug', `[group_window_cleanup] Main Puppeteer window preserved: ${mainPuppeteerPage.url()}`));
180
+ }
181
+ }
182
+
183
+ return {
184
+ success: true,
185
+ closedCount: successfulCloses,
186
+ totalPages: allPages.length,
187
+ mainPagePreserved: mainPuppeteerPage && !mainPuppeteerPage.isClosed(),
188
+ delayUsed: WINDOW_CLEANUP_DELAY_MS,
189
+ estimatedMemoryFreed: actualMemoryFreed,
190
+ estimatedMemoryFreedFormatted: formatMemory(actualMemoryFreed),
191
+ cleanupMode: cleanupMode === "all" ? "all" : "default"
192
+ };
193
+ } catch (cleanupErr) {
194
+ if (forceDebug) {
195
+ console.log(formatLogMessage('debug', `[group_window_cleanup] Group cleanup failed for ${groupDescription}: ${cleanupErr.message}`));
196
+ }
197
+ return { success: false, error: cleanupErr.message, estimatedMemoryFreed: 0 };
198
+ }
199
+ }
200
+
201
+ /**
202
+ * Checks if a page is safe to close (not actively processing)
203
+ * @param {import('puppeteer').Page} page - Page to check
204
+ * @param {boolean} forceDebug - Debug logging flag
205
+ * @returns {Promise<boolean>} True if page is safe to close
206
+ */
207
+ async function isPageSafeToClose(page, forceDebug) {
208
+ try {
209
+ if (page.isClosed()) {
210
+ return true; // Already closed
211
+ }
212
+
213
+ const usage = pageUsageTracker.get(page);
214
+ if (!usage) {
215
+ // No usage data - assume safe if page exists for a while
216
+ return true;
217
+ }
218
+
219
+ // Check if page is actively processing
220
+ if (usage.isProcessing) {
221
+ if (forceDebug) {
222
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Page still processing: ${page.url().substring(0, 50)}...`));
223
+ }
224
+ return false;
225
+ }
226
+
227
+ // Check if page has been idle long enough
228
+ const idleTime = Date.now() - usage.lastActivity;
229
+ const isSafe = idleTime >= PAGE_IDLE_THRESHOLD;
230
+
231
+ if (!isSafe && forceDebug) {
232
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Page not idle long enough: ${Math.round(idleTime/1000)}s < ${PAGE_IDLE_THRESHOLD/1000}s`));
233
+ }
234
+
235
+ return isSafe;
236
+ } catch (err) {
237
+ if (forceDebug) {
238
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Error checking page safety: ${err.message}`));
239
+ }
240
+ return true; // Assume safe if we can't check
241
+ }
242
+ }
243
+
244
+ /**
245
+ * Updates page usage tracking
246
+ * @param {import('puppeteer').Page} page - Page to update
247
+ * @param {boolean} isProcessing - Whether page is actively processing
248
+ */
249
+ function updatePageUsage(page, isProcessing = false) {
250
+ try {
251
+ if (!page.isClosed()) {
252
+ pageUsageTracker.set(page, {
253
+ lastActivity: Date.now(),
254
+ isProcessing: isProcessing
255
+ });
256
+ }
257
+ } catch (err) {
258
+ // Ignore errors in usage tracking
259
+ }
260
+ }
261
+
262
+ /**
263
+ * Performs realtime window cleanup - removes oldest pages when threshold is exceeded
264
+ * Waits for site delay + 3 seconds before cleanup to ensure delayed requests are captured
265
+ * @param {import('puppeteer').Browser} browserInstance - Browser instance
266
+ * @param {number} threshold - Maximum number of pages to keep (default: 8)
267
+ * @param {boolean} forceDebug - Debug logging flag
268
+ * @param {number} siteDelay - Current site's delay value in milliseconds (default: 4000)
269
+ * @returns {Promise<Object>} Cleanup results
270
+ */
271
+ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIME_CLEANUP_THRESHOLD, forceDebug, siteDelay = 4000) {
272
+ try {
273
+ const allPages = await browserInstance.pages();
274
+
275
+ // Skip cleanup if we don't have enough pages to warrant it
276
+ if (allPages.length <= Math.max(threshold, REALTIME_CLEANUP_MIN_PAGES)) {
277
+ if (forceDebug) {
278
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Only ${allPages.length} pages open, threshold is ${threshold} - no cleanup needed`));
279
+ }
280
+ return { success: true, closedCount: 0, totalPages: allPages.length, reason: 'below_threshold' };
281
+ }
282
+
283
+ // Calculate cleanup delay: site delay + 3 second buffer
284
+ const cleanupDelay = siteDelay + REALTIME_CLEANUP_BUFFER_MS;
285
+
286
+ if (forceDebug) {
287
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Waiting ${cleanupDelay}ms (site delay: ${siteDelay}ms + ${REALTIME_CLEANUP_BUFFER_MS}ms buffer) before cleanup (threshold: ${threshold})`));
288
+ }
289
+ await new Promise(resolve => setTimeout(resolve, cleanupDelay));
290
+
291
+ const allPagesAfterDelay = await browserInstance.pages();
292
+
293
+ // Find main Puppeteer page (usually about:blank)
294
+ let mainPage = allPagesAfterDelay.find(page => {
295
+ const url = page.url();
296
+ return url === 'about:blank' || url === '' || url.startsWith('chrome://');
297
+ }) || allPagesAfterDelay[0]; // Fallback to first page
298
+
299
+ // Get pages sorted by creation time (oldest first)
300
+ const sortedPages = allPagesAfterDelay
301
+ .filter(page => page !== mainPage && !page.isClosed())
302
+ .sort((a, b) => {
303
+ const timeA = pageCreationTracker.get(a) || 0;
304
+ const timeB = pageCreationTracker.get(b) || 0;
305
+ return timeA - timeB; // Oldest first
306
+ });
307
+
308
+ // Calculate how many pages to close
309
+ const pagesToKeep = threshold - 1; // -1 for main page
310
+ const pagesToClose = sortedPages.slice(0, Math.max(0, sortedPages.length - pagesToKeep));
311
+
312
+ // Filter out pages that are still being used
313
+ const safetyChecks = await Promise.all(
314
+ pagesToClose.map(page => isPageSafeToClose(page, forceDebug))
315
+ );
316
+
317
+ const safePagesToClose = pagesToClose.filter((page, index) => safetyChecks[index]);
318
+ const unsafePagesCount = pagesToClose.length - safePagesToClose.length;
319
+
320
+ if (unsafePagesCount > 0 && forceDebug) {
321
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Skipping ${unsafePagesCount} active pages for safety`));
322
+ }
323
+
324
+ if (safePagesToClose.length === 0) {
325
+ if (forceDebug) {
326
+ const reason = pagesToClose.length === 0 ?
327
+ `${sortedPages.length} content pages, keeping ${pagesToKeep}` :
328
+ `${pagesToClose.length} pages still active`;
329
+ console.log(formatLogMessage('debug', `[realtime_cleanup] No pages need closing (${reason})`));
330
+ }
331
+ return { success: true, closedCount: 0, totalPages: allPagesAfterDelay.length, reason: 'no_cleanup_needed' };
332
+ }
333
+
334
+ // Close oldest pages
335
+ let closedCount = 0;
336
+ for (const page of safePagesToClose) {
337
+ try {
338
+ if (!page.isClosed()) {
339
+ const pageUrl = page.url();
340
+ await page.close();
341
+ pageCreationTracker.delete(page); // Remove from tracker
342
+ closedCount++;
343
+
344
+ if (forceDebug) {
345
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Closed old page: ${pageUrl.substring(0, 50)}...`));
346
+ }
347
+ }
348
+ } catch (closeErr) {
349
+ if (forceDebug) {
350
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Failed to close page: ${closeErr.message}`));
351
+ }
352
+ }
353
+ }
354
+
355
+ const remainingPages = allPagesAfterDelay.length - closedCount;
356
+
357
+ if (forceDebug) {
358
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Closed ${closedCount}/${pagesToClose.length} oldest pages (${unsafePagesCount} skipped for safety), ${remainingPages} pages remaining`));
359
+ }
360
+
361
+ return {
362
+ success: true,
363
+ closedCount,
364
+ totalPages: allPagesAfterDelay.length,
365
+ remainingPages,
366
+ threshold,
367
+ cleanupDelay,
368
+ reason: 'cleanup_completed'
369
+ };
370
+ } catch (cleanupErr) {
371
+ if (forceDebug) {
372
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Cleanup failed: ${cleanupErr.message}`));
373
+ }
374
+ return { success: false, error: cleanupErr.message, closedCount: 0 };
375
+ }
376
+ }
377
+
378
+ /**
379
+ * Determines if a page appears to be from a previous scan and can be safely closed
380
+ * @param {import('puppeteer').Page} page - Page to evaluate
381
+ * @param {boolean} forceDebug - Debug logging flag
382
+ * @returns {Promise<boolean>} True if page appears to be from previous scan
383
+ */
384
+ async function isPageFromPreviousScan(page, forceDebug) {
385
+ try {
386
+ const url = page.url();
387
+
388
+ // Always consider these as old/closeable
389
+ if (url === 'about:blank' ||
390
+ url === '' ||
391
+ url.startsWith('chrome://') ||
392
+ url.startsWith('chrome-error://') ||
393
+ url.startsWith('data:')) {
394
+ return false; // Don't close blank pages here, handled separately
395
+ }
396
+
397
+ // Check if page has been idle (no recent navigation)
398
+ // This is a heuristic - pages from previous scans are likely to be idle
399
+ try {
400
+ const title = await page.title();
401
+ // Pages with generic titles or error states are likely old
402
+ if (title.includes('404') ||
403
+ title.includes('Error') ||
404
+ title.includes('Not Found') ||
405
+ title === '') {
406
+ return true;
407
+ }
408
+ } catch (titleErr) {
409
+ // If we can't get title, page might be in bad state
410
+ return true;
411
+ }
412
+
413
+ // Default: consider most content pages as potentially old in conservative mode
414
+ return false; // Conservative - don't close unless we're sure
415
+ } catch (err) {
416
+ if (forceDebug) {
417
+ console.log(formatLogMessage('debug', `[isPageFromPreviousScan] Error evaluating page ${page.url()}: ${err.message}`));
418
+ }
419
+ return false; // Conservative - don't close if we can't evaluate
420
+ }
421
+ }
422
+
423
+ /**
424
+ * Tracks a new page for realtime cleanup purposes
425
+ * @param {import('puppeteer').Page} page - Page to track
426
+ */
427
+ function trackPageForRealtime(page) {
428
+ pageCreationTracker.set(page, ++pageCreationCounter);
429
+ updatePageUsage(page, false); // Initialize usage tracking
430
+ }
431
+
8
432
  /**
9
433
  * Quick browser responsiveness test for use during page setup
10
434
  * Designed to catch browser degradation between operations
@@ -541,10 +965,29 @@ module.exports = {
541
965
  checkBrowserHealth,
542
966
  checkBrowserMemory,
543
967
  testBrowserConnectivity,
968
+ performGroupWindowCleanup,
969
+ performRealtimeWindowCleanup,
970
+ trackPageForRealtime,
544
971
  testNetworkCapability,
545
972
  isQuicklyResponsive,
546
973
  performHealthAssessment,
547
974
  monitorBrowserHealth,
548
975
  isBrowserHealthy,
549
- isCriticalProtocolError
976
+ isCriticalProtocolError,
977
+ updatePageUsage
550
978
  };
979
+
980
+ // Clean up tracking maps when pages are closed
981
+ const originalPageClose = require('puppeteer').Page.prototype.close;
982
+ if (originalPageClose) {
983
+ require('puppeteer').Page.prototype.close = async function(...args) {
984
+ try {
985
+ // Clean up tracking data
986
+ pageCreationTracker.delete(this);
987
+ pageUsageTracker.delete(this);
988
+ } catch (err) {
989
+ // Ignore cleanup errors
990
+ }
991
+ return originalPageClose.apply(this, args);
992
+ };
993
+ }
package/nwss.1 CHANGED
@@ -1,4 +1,4 @@
1
- .TH NWSS-SCRIPT 1 "2025" "scanner-script v1.0.32" "User Commands"
1
+ .TH NWSS-SCRIPT 1 "2025" "scanner-script v1.0.98" "User Commands"
2
2
  .SH NAME
3
3
  NWSS scanner-script \- Network scanner for malware detection and domain analysis with advanced similarity filtering
4
4
 
@@ -27,6 +27,10 @@ Remove rules that already exist in \fIFILE\fR before output (requires \fB\-o\fR)
27
27
  .B \--append
28
28
  Append new rules to output file instead of overwriting (requires \fB\-o\fR).
29
29
 
30
+ .TP
31
+ .BR \--color ", " \--colour
32
+ Enable colored console output for status messages.
33
+
30
34
  .SS Output Format Options
31
35
  .TP
32
36
  .B \--localhost
@@ -125,6 +129,14 @@ Console output only: show matching regex, titles, whois/dig/searchstring results
125
129
  .B \--remove-tempfiles
126
130
  Remove Chrome/Puppeteer temporary files before exit.
127
131
 
132
+ .TP
133
+ .BR \--max-concurrent " \fINUMBER\fR"
134
+ Maximum concurrent site processing (1-50, overrides config/default).
135
+
136
+ .TP
137
+ .BR \--cleanup-interval " \fINUMBER\fR"
138
+ Browser restart interval in URLs processed (1-1000, overrides config/default).
139
+
128
140
  .TP
129
141
  .BR \-h ", " \--help
130
142
  Show help message and exit.
@@ -134,6 +146,10 @@ Show help message and exit.
134
146
  Show version information and exit.
135
147
 
136
148
  .SS Validation Options
149
+ .TP
150
+ .B \--cache-requests
151
+ Cache HTTP requests to avoid re-requesting same URLs within scan.
152
+
137
153
  .TP
138
154
  .B \--validate-config
139
155
  Validate config.json file and exit.
@@ -150,6 +166,14 @@ Clean rule files by removing invalid lines and optionally duplicates (uses \fB\-
150
166
  .B \--test-validation
151
167
  Run domain validation tests and exit.
152
168
 
169
+ .TP
170
+ .B \--clear-cache
171
+ Clear persistent cache before scanning (improves fresh start performance).
172
+
173
+ .TP
174
+ .B \--ignore-cache
175
+ Bypass all smart caching functionality during scanning.
176
+
153
177
  .SH CONFIGURATION
154
178
 
155
179
  Configuration is provided via JSON files. The default configuration file is \fBconfig.json\fR.
@@ -184,6 +208,30 @@ Number. Similarity threshold percentage for ignore_similar (default: 80).
184
208
  .B ignore_similar_ignored_domains
185
209
  Boolean. Ignore domains similar to ignoreDomains list (default: true).
186
210
 
211
+ .TP
212
+ .B max_concurrent_sites
213
+ Number. Maximum concurrent site processing (1-50, default: 6).
214
+
215
+ .TP
216
+ .B resource_cleanup_interval
217
+ Number. Browser restart interval in URLs processed (1-1000, default: 80).
218
+
219
+ .TP
220
+ .B cache_path
221
+ String. Directory path for persistent cache storage (default: ".cache").
222
+
223
+ .TP
224
+ .B cache_max_size
225
+ Number. Maximum number of entries in cache (default: 5000).
226
+
227
+ .TP
228
+ .B cache_autosave_minutes
229
+ Number. Interval for automatic cache saves in minutes (default: 1).
230
+
231
+ .TP
232
+ .B cache_requests
233
+ Boolean. Enable HTTP request response caching (default: false).
234
+
187
235
  .SS Per-Site Configuration Options
188
236
 
189
237
  .TP
@@ -194,6 +242,10 @@ Single URL string or array of URLs to scan.
194
242
  .B filterRegex
195
243
  Regex pattern(s) to match suspicious requests.
196
244
 
245
+ .TP
246
+ .B regex_and
247
+ Boolean. Use AND logic for multiple filterRegex patterns - ALL patterns must match the same URL (default: false).
248
+
197
249
  .TP
198
250
  .B comments
199
251
  Documentation strings or notes - completely ignored by the scanner. Can be a single string or array of strings. Used for adding context, URLs, timestamps, or any documentation notes to configuration files.
@@ -234,6 +286,10 @@ Spoof User-Agent: \fB"chrome"\fR, \fB"firefox"\fR, or \fB"safari"\fR.
234
286
  .B interact
235
287
  Boolean. Simulate mouse movements and clicks.
236
288
 
289
+ .TP
290
+ .B interact_intensity
291
+ String. Interaction simulation intensity: \fB"low"\fR, \fB"medium"\fR, \fB"high"\fR (default: "medium").
292
+
237
293
  .TP
238
294
  .B delay
239
295
  Milliseconds to wait after page load (default: 4000).
@@ -242,6 +298,10 @@ Milliseconds to wait after page load (default: 4000).
242
298
  .B reload
243
299
  Number of times to reload the page (default: 1).
244
300
 
301
+ .TP
302
+ .B forcereload
303
+ Boolean. Force an additional reload after reloads.
304
+
245
305
  .TP
246
306
  .B timeout
247
307
  Request timeout in milliseconds (default: 30000).
@@ -258,6 +318,14 @@ Boolean. Allow third-party request matching (default: true).
258
318
  .B fingerprint_protection
259
319
  Boolean or \fB"random"\fR. Enable browser fingerprint spoofing.
260
320
 
321
+ .TP
322
+ .B referrer_headers
323
+ String, Array, or Object. Set referrer header for realistic traffic sources.
324
+
325
+ .TP
326
+ .B custom_headers
327
+ Object. Add custom HTTP headers to requests.
328
+
261
329
  .TP
262
330
  .B ignore_similar
263
331
  Boolean. Override global ignore_similar setting for this site.
@@ -274,6 +342,10 @@ Boolean. Override global ignore_similar_ignored_domains for this site.
274
342
  .B even_blocked
275
343
  Boolean. Add matching rules even if requests are blocked (default: false).
276
344
 
345
+ .TP
346
+ .B bypass_cache
347
+ Boolean. Skip all caching for this site's URLs (default: false).
348
+
277
349
  .TP
278
350
  .B whois
279
351
  Array of terms that must ALL be found in WHOIS data (AND logic).
@@ -353,10 +425,6 @@ Object. Custom page.goto() options for Puppeteer navigation. Available options:
353
425
  .RE
354
426
  Example: \fB{"waitUntil": "networkidle2", "timeout": 60000}\fR
355
427
 
356
- .TP
357
- .B forcereload
358
- Boolean. Force an additional reload with cache disabled after normal reloads.
359
-
360
428
  .TP
361
429
  .B clear_sitedata
362
430
  Boolean. Clear all cookies, cache, and storage before each page load (default: false).
@@ -389,6 +457,45 @@ Boolean. Launch browser with GUI for this specific site.
389
457
  .B adblock_rules
390
458
  Boolean. Generate adblock filter rules with resource types for this site.
391
459
 
460
+ .TP
461
+ .B window_cleanup
462
+ Boolean or String. Close old/unused browser windows and tabs after entire URL group completes (default: false).
463
+ .RS
464
+ .IP \(bu 4
465
+ \fBfalse\fR - No cleanup performed
466
+ .IP \(bu 4
467
+ \fBtrue\fR - Conservative cleanup: closes pages that appear to be from previous scans while preserving main Puppeteer window
468
+ .IP \(bu 4
469
+ \fB"all"\fR - Aggressive cleanup: closes all content pages from previous operations while preserving main Puppeteer window
470
+ .RE
471
+ Both modes wait 16 seconds before cleanup to allow final operations to complete, and always preserve the main browser window to maintain browser instance stability. Cleanup targets popup windows, extra tabs, and pages from previously completed site groups, but never affects actively processing pages.
472
+
473
+ .TP
474
+
475
+ .SS Redirect Handling Options
476
+
477
+ .TP
478
+ .B follow_redirects
479
+ Boolean. Follow redirects to new domains (default: true).
480
+
481
+ .TP
482
+ .B max_redirects
483
+ Number. Maximum number of redirects to follow (default: 10).
484
+
485
+ .TP
486
+ .B js_redirect_timeout
487
+ Milliseconds. Time to wait for JavaScript redirects (default: 5000).
488
+
489
+ .TP
490
+ .B detect_js_patterns
491
+ Boolean. Analyze page source for redirect patterns (default: true).
492
+
493
+ .TP
494
+ .B redirect_timeout_multiplier
495
+ Number. Increase timeout for redirected URLs (default: 1.5).
496
+
497
+ .SS Cloudflare Protection Options
498
+
392
499
  .TP
393
500
  .B cloudflare_phish
394
501
  Boolean. Auto-click through Cloudflare phishing warnings (default: false).
@@ -397,6 +504,24 @@ Boolean. Auto-click through Cloudflare phishing warnings (default: false).
397
504
  .B cloudflare_bypass
398
505
  Boolean. Auto-solve Cloudflare "Verify you are human" challenges (default: false).
399
506
 
507
+ .TP
508
+ .B cloudflare_parallel_detection
509
+ Boolean. Use parallel detection for faster Cloudflare checks (default: true).
510
+
511
+ .TP
512
+ .B cloudflare_max_retries
513
+ Number. Maximum retry attempts for Cloudflare operations (default: 3).
514
+
515
+ .TP
516
+ .B cloudflare_cache_ttl
517
+ Milliseconds. TTL for Cloudflare detection cache (default: 300000 - 5 minutes).
518
+
519
+ .TP
520
+ .B cloudflare_retry_on_error
521
+ Boolean. Enable retry logic for Cloudflare operations (default: true).
522
+
523
+ .SS FlowProxy Protection Options
524
+
400
525
  .TP
401
526
  .B flowproxy_detection
402
527
  Boolean. Enable flowProxy protection detection and handling (default: false).
@@ -421,6 +546,24 @@ Milliseconds. Delay for rate limiting (default: 30000).
421
546
  .B flowproxy_additional_delay
422
547
  Milliseconds. Additional processing delay (default: 5000).
423
548
 
549
+ .SS Advanced Options
550
+
551
+ .TP
552
+ .B interact_duration
553
+ Milliseconds. Duration of interaction simulation (default: 2000).
554
+
555
+ .TP
556
+ .B interact_scrolling
557
+ Boolean. Enable scrolling simulation (default: true).
558
+
559
+ .TP
560
+ .B interact_clicks
561
+ Boolean. Enable element clicking simulation (default: false).
562
+
563
+ .TP
564
+ .B interact_typing
565
+ Boolean. Enable typing simulation (default: false).
566
+
424
567
  .TP
425
568
  .B verbose
426
569
  Boolean. Enable verbose output for this specific site.
@@ -461,6 +604,28 @@ Boolean. Force Pi-hole regex output format for this site.
461
604
  .B plain
462
605
  Boolean. Force plain domain output for this site.
463
606
 
607
+ .SS Referrer Header Options
608
+
609
+ .B Simple formats:
610
+ .RS
611
+ .IP \(bu 4
612
+ \fB"referrer_headers": "https://google.com/search?q=example"\fR
613
+ .IP \(bu 4
614
+ \fB"referrer_headers": ["url1", "url2"]\fR
615
+ .RE
616
+
617
+ .B Smart modes:
618
+ .RS
619
+ .IP \(bu 4
620
+ \fB"referrer_headers": {"mode": "random_search", "search_terms": ["reviews"]}\fR
621
+ .IP \(bu 4
622
+ \fB"referrer_headers": {"mode": "social_media"}\fR
623
+ .IP \(bu 4
624
+ \fB"referrer_headers": {"mode": "direct_navigation"}\fR
625
+ .IP \(bu 4
626
+ \fB"referrer_headers": {"mode": "custom", "custom": ["https://news.ycombinator.com/"]}\fR
627
+ .RE
628
+
464
629
  .SH SIMILARITY FILTERING
465
630
 
466
631
  The scanner includes advanced similarity filtering to reduce noise and improve detection accuracy by automatically ignoring domains that are very similar to ones already found or explicitly ignored.
@@ -530,6 +695,16 @@ With default settings (\fBignore_similar_threshold: 80\fR):
530
695
  }
531
696
  .EE
532
697
 
698
+ .SS Configuration with regex AND logic:
699
+ .EX
700
+ {
701
+ "url": "https://ad-network.com",
702
+ "filterRegex": ["tracking", "analytics"],
703
+ "regex_and": true,
704
+ "resourceTypes": ["script", "fetch"]
705
+ }
706
+ .EE
707
+
533
708
  .SS Configuration with similarity filtering:
534
709
  .EX
535
710
  {
@@ -661,6 +836,36 @@ node nwss.js -o dnsmasq.conf --dnsmasq --titles
661
836
  node nwss.js -o pihole_regex.txt --pihole --debug
662
837
  .EE
663
838
 
839
+ .SS Performance tuning with concurrency:
840
+ .EX
841
+ node nwss.js --max-concurrent 12 --cleanup-interval 100 -o rules.txt
842
+ .EE
843
+
844
+ .SS Cache management:
845
+ .EX
846
+ node nwss.js --clear-cache --cache-requests -o rules.txt
847
+ node nwss.js --ignore-cache --debug -o rules.txt
848
+ .EE
849
+
850
+ .SS Stealth configuration with referrer headers:
851
+ .EX
852
+ {
853
+ "url": "https://e-commerce-site.com",
854
+ "userAgent": "chrome",
855
+ "fingerprint_protection": "random",
856
+ "referrer_headers": {
857
+ "mode": "random_search",
858
+ "search_terms": ["product reviews", "best deals"]
859
+ },
860
+ "custom_headers": {
861
+ "Accept-Language": "en-US,en;q=0.9"
862
+ },
863
+ "interact": true,
864
+ "interact_intensity": "high",
865
+ "filterRegex": "analytics|tracking|ads"
866
+ }
867
+ .EE
868
+
664
869
  .SS Cloudflare bypass and fingerprint spoofing:
665
870
  .EX
666
871
  {
@@ -688,6 +893,35 @@ node nwss.js -o pihole_regex.txt --pihole --debug
688
893
  }
689
894
  .EE
690
895
 
896
+ .SS Window cleanup for memory management:
897
+ .EX
898
+ {
899
+ "url": [
900
+ "https://site1.com",
901
+ "https://site2.com",
902
+ "https://site3.com"
903
+ ],
904
+ "filterRegex": "\\\\.(space|website)\\\\b",
905
+ "window_cleanup": "all",
906
+ "resourceTypes": ["script", "fetch"]
907
+ }
908
+ .EE
909
+
910
+ .SS Conservative window cleanup:
911
+ .EX
912
+ {
913
+ "url": "https://popup-heavy-site.com",
914
+ "filterRegex": "\\\\.(top|buzz)\\\\b",
915
+ "window_cleanup": true,
916
+ "interact": true,
917
+ "reload": 2,
918
+ "comments": [
919
+ "Site opens many popup windows",
920
+ "Conservative cleanup preserves potentially active content"
921
+ ]
922
+ }
923
+ .EE
924
+
691
925
  .SH OUTPUT FORMATS
692
926
 
693
927
  The scanner supports multiple output formats for different blocking systems:
@@ -740,6 +974,10 @@ Default configuration file containing scan targets and rules.
740
974
  .B logs/
741
975
  Directory created for debug and matched URL logs when \fB\--debug\fR or \fB\--dumpurls\fR is used.
742
976
 
977
+ .TP
978
+ .B .cache/
979
+ Default cache directory for smart caching functionality.
980
+
743
981
  .TP
744
982
  .B user.action
745
983
  Common Privoxy action file when using \fB\--privoxy\fR output.
@@ -747,7 +985,7 @@ Common Privoxy action file when using \fB\--privoxy\fR output.
747
985
  .SH DETECTION METHODS
748
986
 
749
987
  .SS URL Pattern Matching
750
- Uses regex patterns to identify suspicious domains and request URLs.
988
+ Uses regex patterns to identify suspicious domains and request URLs with support for AND/OR logic.
751
989
 
752
990
  .SS Content Analysis
753
991
  Downloads page content with curl and searches for malicious strings using JavaScript or grep.
@@ -784,6 +1022,28 @@ Detects and handles FlowProxy protection systems.
784
1022
  .SS Intelligent Domain Filtering
785
1023
  Advanced similarity algorithms prevent duplicate detection across international domains and variations.
786
1024
 
1025
+ .SS Smart Caching
1026
+ Persistent caching system for improved performance across multiple scans.
1027
+
1028
+ .SS Window Management
1029
+ Intelligent browser window and tab cleanup to prevent memory accumulation:
1030
+ .RS
1031
+ .IP \(bu 4
1032
+ \fBConservative mode\fR (\fBwindow_cleanup: true\fR): Selectively closes pages that appear to be leftovers from previous scans based on URL patterns and page state analysis
1033
+ .IP \(bu 4
1034
+ \fBAggressive mode\fR (\fBwindow_cleanup: "all"\fR): Closes all content pages from previous operations for maximum memory recovery
1035
+ .IP \(bu 4
1036
+ \fBMain window preservation\fR: Both modes always preserve the main Puppeteer browser window (typically about:blank) to maintain browser instance stability
1037
+ .IP \(bu 4
1038
+ \fBPopup window handling\fR: Automatically detects and closes popup windows created by previous site scans
1039
+ .IP \(bu 4
1040
+ \fBTiming protection\fR: 16-second delay ensures no active operations are interrupted during cleanup
1041
+ .IP \(bu 4
1042
+ \fBMemory estimation\fR: Reports estimated memory freed from closed windows for performance monitoring
1043
+ .IP \(bu 4
1044
+ \fBActive page protection\fR: Never affects pages currently being processed by concurrent scanning operations
1045
+ .RE
1046
+
787
1047
  .SH EXIT STATUS
788
1048
  .TP
789
1049
  .B 0
@@ -821,4 +1081,4 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
821
1081
  FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
822
1082
 
823
1083
  You should have received a copy of the GNU General Public License along with
824
- this program. If not, see <https://www.gnu.org/licenses/>.
1084
+ this program. If not, see <https://www.gnu.org/licenses/>.
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.96 ===
1
+ // === Network scanner script (nwss.js) v1.0.98 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -64,7 +64,7 @@ const TIMEOUTS = {
64
64
  EMERGENCY_RESTART_DELAY: 2000,
65
65
  BROWSER_STABILIZE_DELAY: 1000,
66
66
  CURL_HANDLER_DELAY: 3000,
67
- PROTOCOL_TIMEOUT: 60000,
67
+ PROTOCOL_TIMEOUT: 120000,
68
68
  REDIRECT_JS_TIMEOUT: 5000
69
69
  };
70
70
 
@@ -82,6 +82,8 @@ const CONCURRENCY_LIMITS = {
82
82
  HIGH_CONCURRENCY_THRESHOLD: 12 // Auto-enable aggressive caching above this
83
83
  };
84
84
 
85
+ const REALTIME_CLEANUP_THRESHOLD = 8; // Default pages to keep for realtime cleanup
86
+
85
87
  /**
86
88
  * Detects the installed Puppeteer version dynamically
87
89
  * @returns {Object} Version info and compatibility settings
@@ -122,10 +124,10 @@ function detectPuppeteerVersion() {
122
124
  // Enhanced redirect handling
123
125
  const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/redirect');
124
126
  // Ensure web browser is working correctly
125
- const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive } = require('./lib/browserhealth');
127
+ const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup, performRealtimeWindowCleanup, trackPageForRealtime, updatePageUsage } = require('./lib/browserhealth');
126
128
 
127
129
  // --- Script Configuration & Constants ---
128
- const VERSION = '1.0.96'; // Script version
130
+ const VERSION = '1.0.98'; // Script version
129
131
 
130
132
  // get startTime
131
133
  const startTime = Date.now();
@@ -571,6 +573,12 @@ Advanced Options:
571
573
  dig_subdomain: true/false Use subdomain for dig lookup instead of root domain (default: false)
572
574
  digRecordType: "A" DNS record type for dig (default: A)
573
575
 
576
+ window_cleanup: true/false/"realtime"/"all" Window cleanup mode:
577
+ true/false - Close extra windows after URL group completes (default: false)
578
+ "realtime" - Continuously cleanup oldest pages when threshold exceeded
579
+ "all" - Aggressive cleanup of all content pages after group
580
+ window_cleanup_threshold: <number> For realtime mode: max pages to keep open (default: 8)
581
+
574
582
  Referrer Header Options:
575
583
  referrer_headers: "https://google.com" Single referrer URL
576
584
  referrer_headers: ["url1", "url2"] Random selection from array
@@ -1472,7 +1480,27 @@ function setupFrameHandling(page, forceDebug) {
1472
1480
  throw new Error('Failed to create valid page instance');
1473
1481
  }
1474
1482
 
1475
-
1483
+ // Track page for realtime cleanup
1484
+ trackPageForRealtime(page);
1485
+
1486
+ // Mark page as actively processing
1487
+ updatePageUsage(page, true);
1488
+
1489
+ // Perform realtime cleanup if enabled
1490
+ if (siteConfig.window_cleanup === "realtime") {
1491
+ const threshold = typeof siteConfig.window_cleanup_threshold === 'number'
1492
+ ? siteConfig.window_cleanup_threshold
1493
+ : REALTIME_CLEANUP_THRESHOLD;
1494
+
1495
+ // Get the site's delay value for cleanup timing
1496
+ const siteDelay = siteConfig.delay || 4000;
1497
+
1498
+ const realtimeResult = await performRealtimeWindowCleanup(browserInstance, threshold, forceDebug, siteDelay);
1499
+ if (realtimeResult.success && realtimeResult.closedCount > 0 && forceDebug) {
1500
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Cleaned ${realtimeResult.closedCount} old pages, ${realtimeResult.remainingPages} remaining`));
1501
+ }
1502
+ }
1503
+
1476
1504
  // Set aggressive timeouts for problematic operations
1477
1505
  // Optimized timeouts for Puppeteer 23.x responsiveness
1478
1506
  page.setDefaultTimeout(Math.min(timeout, TIMEOUTS.DEFAULT_PAGE_REDUCED));
@@ -2610,6 +2638,9 @@ function setupFrameHandling(page, forceDebug) {
2610
2638
  request.continue();
2611
2639
  });
2612
2640
 
2641
+ // Mark page as actively processing network requests
2642
+ updatePageUsage(page, true);
2643
+
2613
2644
  // Add response handler ONLY if searchstring/searchstring_and is defined AND neither curl nor grep is enabled
2614
2645
  if ((hasSearchString || hasSearchStringAnd) && !useCurl && !useGrep) {
2615
2646
  const responseHandler = createResponseHandler({
@@ -2647,6 +2678,9 @@ function setupFrameHandling(page, forceDebug) {
2647
2678
  // Create optimized interaction configuration for this site
2648
2679
  const interactionConfig = createInteractionConfig(currentUrl, siteConfig);
2649
2680
 
2681
+ // Mark page as actively processing interactions
2682
+ updatePageUsage(page, true);
2683
+
2650
2684
  // --- Runtime CSS Element Blocking (Fallback) ---
2651
2685
  // Apply CSS blocking after page load as a fallback in case evaluateOnNewDocument didn't work
2652
2686
  if (cssBlockedSelectors && Array.isArray(cssBlockedSelectors) && cssBlockedSelectors.length > 0) {
@@ -2873,6 +2907,9 @@ function setupFrameHandling(page, forceDebug) {
2873
2907
  console.log(formatLogMessage('info', `${messageColors.loaded('Loaded:')} (${siteCounter}/${totalUrls}) ${currentUrl}`));
2874
2908
  await page.evaluate(() => { console.log('Safe to evaluate on loaded page.'); });
2875
2909
 
2910
+ // Mark page as processing frames
2911
+ updatePageUsage(page, true);
2912
+
2876
2913
  // Wait for iframes to load and log them
2877
2914
  if (forceDebug) {
2878
2915
  try {
@@ -2897,6 +2934,9 @@ function setupFrameHandling(page, forceDebug) {
2897
2934
  console.log(formatLogMessage('debug', `Frame debugging failed: ${frameDebugErr.message}`));
2898
2935
  }
2899
2936
  }
2937
+
2938
+ // Page finished initial loading - mark as idle
2939
+ updatePageUsage(page, false);
2900
2940
  } catch (err) {
2901
2941
  // Enhanced error handling for redirect timeouts using redirect module
2902
2942
  const timeoutResult = await handleRedirectTimeout(page, currentUrl, err, safeGetDomain, forceDebug, formatLogMessage);
@@ -2914,6 +2954,9 @@ function setupFrameHandling(page, forceDebug) {
2914
2954
 
2915
2955
  if (interactEnabled && !disableInteract) {
2916
2956
  if (forceDebug) console.log(formatLogMessage('debug', `interaction simulation enabled for ${currentUrl}`));
2957
+
2958
+ // Mark page as processing during interactions
2959
+ updatePageUsage(page, true);
2917
2960
  // Use enhanced interaction module
2918
2961
  await performPageInteraction(page, currentUrl, interactionConfig, forceDebug);
2919
2962
  }
@@ -2943,6 +2986,9 @@ function setupFrameHandling(page, forceDebug) {
2943
2986
  // Use fast timeout helper for consistent Puppeteer 23.x compatibility
2944
2987
 
2945
2988
  // Handle reloads - use force reload mechanism if forcereload is enabled
2989
+ // Mark page as processing during reloads
2990
+ updatePageUsage(page, true);
2991
+
2946
2992
  const totalReloads = (siteConfig.reload || 1) - 1; // Subtract 1 because initial load counts as first
2947
2993
  const useForceReload = siteConfig.forcereload === true;
2948
2994
 
@@ -3047,6 +3093,9 @@ function setupFrameHandling(page, forceDebug) {
3047
3093
  await fastTimeout(delayMs);
3048
3094
  }
3049
3095
  }
3096
+
3097
+ // Mark page as idle after all processing complete
3098
+ updatePageUsage(page, false);
3050
3099
 
3051
3100
  if (dryRunMode) {
3052
3101
  // Get page title for dry run output
@@ -3170,6 +3219,10 @@ function setupFrameHandling(page, forceDebug) {
3170
3219
  // Guaranteed resource cleanup - this runs regardless of success or failure
3171
3220
 
3172
3221
  if (cdpSessionManager) {
3222
+ // Mark page as idle when cleanup starts
3223
+ if (page && !page.isClosed()) {
3224
+ updatePageUsage(page, false);
3225
+ }
3173
3226
  await cdpSessionManager.cleanup();
3174
3227
  }
3175
3228
 
@@ -3230,9 +3283,21 @@ function setupFrameHandling(page, forceDebug) {
3230
3283
  }, 30000); // Check every 30 seconds
3231
3284
 
3232
3285
  // Process URLs in batches to maintain concurrency while allowing browser restarts
3286
+ let siteGroupIndex = 0;
3233
3287
  for (let batchStart = 0; batchStart < totalUrls; batchStart += RESOURCE_CLEANUP_INTERVAL) {
3234
3288
  const batchEnd = Math.min(batchStart + RESOURCE_CLEANUP_INTERVAL, totalUrls);
3235
3289
  const currentBatch = allTasks.slice(batchStart, batchEnd);
3290
+
3291
+
3292
+ // Group tasks by their source site configuration for window cleanup
3293
+ const tasksBySite = new Map();
3294
+ currentBatch.forEach(task => {
3295
+ const siteKey = `site_${sites.indexOf(task.config)}`;
3296
+ if (!tasksBySite.has(siteKey)) {
3297
+ tasksBySite.set(siteKey, []);
3298
+ }
3299
+ tasksBySite.get(siteKey).push(task);
3300
+ });
3236
3301
 
3237
3302
  // IMPROVED: Only check health if we have indicators of problems
3238
3303
  let healthCheck = { shouldRestart: false, reason: null };
@@ -3386,6 +3451,30 @@ function setupFrameHandling(page, forceDebug) {
3386
3451
  }
3387
3452
 
3388
3453
  results.push(...batchResults);
3454
+
3455
+ // Perform group window cleanup for completed sites
3456
+ for (const [siteKey, siteTasks] of tasksBySite) {
3457
+ const siteConfig = siteTasks[0].config; // All tasks in group have same config
3458
+
3459
+ if (siteConfig.window_cleanup === true || siteConfig.window_cleanup === "all" || siteConfig.window_cleanup === "realtime") {
3460
+ const urlCount = siteTasks.length;
3461
+ const groupDescription = `${urlCount} URLs from site group ${++siteGroupIndex}`;
3462
+ const cleanupMode = siteConfig.window_cleanup === "realtime" ? true : siteConfig.window_cleanup; // Pass through the exact value, but don't pass "realtime" to group cleanup
3463
+
3464
+ try {
3465
+ const groupCleanupResult = await performGroupWindowCleanup(browser, groupDescription, forceDebug, cleanupMode);
3466
+ if (!silentMode && groupCleanupResult.success && groupCleanupResult.closedCount > 0) {
3467
+ const modeText = cleanupMode === "all" ? "(aggressive)" : "(conservative)";
3468
+ console.log(`🗑️ Group cleanup: ${groupCleanupResult.closedCount} old windows closed ${modeText} after completing ${groupDescription}`);
3469
+ if (groupCleanupResult.mainPagePreserved) {
3470
+ console.log(`✅ Main Puppeteer window preserved during cleanup`);
3471
+ }
3472
+ }
3473
+ } catch (groupCleanupErr) {
3474
+ if (forceDebug) console.log(formatLogMessage('debug', `Group window cleanup failed: ${groupCleanupErr.message}`));
3475
+ }
3476
+ }
3477
+ }
3389
3478
 
3390
3479
  processedUrlCount += batchSize;
3391
3480
  urlsSinceLastCleanup += batchSize;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.96",
3
+ "version": "1.0.98",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {