@fanboynz/network-scanner 1.0.97 → 1.0.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -151,7 +151,10 @@ Example:
151
151
  | `blocked` | Array | - | Domains or regexes to block during scanning |
152
152
  | `even_blocked` | Boolean | `false` | Add matching rules even if requests are blocked |
153
153
  | `bypass_cache` | Boolean | `false` | Skip all caching for this site's URLs |
154
- | `window_cleanup` | Boolean | `false` | Close extra browser windows/tabs after entire URL group completes with 16s delay |
154
+ | `window_cleanup` | Boolean or String | `false` | Close old/unused browser windows/tabs after entire URL group completes |
155
+
156
+ **Window cleanup modes:** `false` (disabled), `true` (conservative - closes obvious leftovers), `"all"` (aggressive - closes all content pages). Both active modes preserve the main Puppeteer window and wait 16 seconds before cleanup to avoid interfering with active operations.
157
+
155
158
 
156
159
  ### Redirect Handling Options
157
160
 
@@ -320,6 +323,39 @@ node nwss.js --max-concurrent 12 --cleanup-interval 300 -o rules.txt
320
323
 
321
324
  ### Stealth Configuration Examples
322
325
 
326
+ #### Memory Management with Window Cleanup
327
+ ```json
328
+ {
329
+ "url": [
330
+ "https://popup-heavy-site1.com",
331
+ "https://popup-heavy-site2.com",
332
+ "https://popup-heavy-site3.com"
333
+ ],
334
+ "filterRegex": "\\.(space|website|tech)\\b",
335
+ "window_cleanup": "all",
336
+ "interact": true,
337
+ "reload": 2,
338
+ "resourceTypes": ["script", "fetch"],
339
+ "comments": "Aggressive cleanup for sites that open many popups"
340
+ }
341
+ ```
342
+
343
+ #### Conservative Memory Management
344
+ ```json
345
+ {
346
+ "url": "https://complex-site.com",
347
+ "filterRegex": "analytics|tracking",
348
+ "window_cleanup": true,
349
+ "interact": true,
350
+ "delay": 8000,
351
+ "reload": 3,
352
+ "comments": [
353
+ "Conservative cleanup preserves potentially active content",
354
+ "Good for sites with complex iframe structures"
355
+ ]
356
+ }
357
+ ```
358
+
323
359
  #### E-commerce Site Scanning
324
360
  ```json
325
361
  {
@@ -368,6 +404,22 @@ node nwss.js --max-concurrent 12 --cleanup-interval 300 -o rules.txt
368
404
 
369
405
  ---
370
406
 
407
+ ## Memory Management
408
+
409
+ The scanner includes intelligent window management to prevent memory accumulation during long scans:
410
+
411
+ - **Conservative cleanup** (`window_cleanup: true`): Selectively closes pages that appear to be leftovers from previous scans
412
+ - **Aggressive cleanup** (`window_cleanup: "all"`): Closes all content pages from previous operations for maximum memory recovery
413
+ - **Main window preservation**: Both modes always preserve the main Puppeteer browser window to maintain stability
414
+ - **Popup window handling**: Automatically detects and closes popup windows created by previous site scans
415
+ - **Timing protection**: 16-second delay ensures no active operations are interrupted during cleanup
416
+ - **Active page protection**: Never affects pages currently being processed by concurrent scanning operations
417
+ - **Memory reporting**: Reports estimated memory freed from closed windows for performance monitoring
418
+
419
+ Use aggressive cleanup for sites that open many popups or when processing large numbers of URLs. Use conservative cleanup when you want to preserve potentially active content but still free obvious leftovers.
420
+
421
+ ---
422
+
371
423
  ## INSTALL
372
424
 
373
425
  #### (Ubuntu as example). NOTE: Use Chrome and not Chromium for best compatibility.
@@ -7,7 +7,19 @@ const { formatLogMessage, messageColors } = require('./colorize');
7
7
 
8
8
 
9
9
  // Window cleanup delay constant
10
- const WINDOW_CLEANUP_DELAY_MS = 16000;
10
+ const WINDOW_CLEANUP_DELAY_MS = 15000;
11
+ // window_clean REALTIME
12
+ const REALTIME_CLEANUP_BUFFER_MS = 15000; // Additional buffer time after site delay. Delay increased to fix missing hits.
13
+ const REALTIME_CLEANUP_THRESHOLD = 8; // Default number of pages to keep
14
+ const REALTIME_CLEANUP_MIN_PAGES = 3; // Minimum pages before cleanup kicks in
15
+
16
+ // Track page creation order for realtime cleanup
17
+ const pageCreationTracker = new Map(); // Maps page -> creation timestamp
18
+ let pageCreationCounter = 0;
19
+
20
+ // Track page usage for realtime cleanup safety
21
+ const pageUsageTracker = new Map(); // Maps page -> { lastActivity: timestamp, isProcessing: boolean }
22
+ const PAGE_IDLE_THRESHOLD = 15000; // 15 seconds of inactivity before considering page safe to clean
11
23
 
12
24
  /**
13
25
  * Performs group-level window cleanup after all URLs in a site group complete
@@ -15,33 +27,78 @@ const WINDOW_CLEANUP_DELAY_MS = 16000;
15
27
  * @param {import('puppeteer').Browser} browserInstance - Browser instance
16
28
  * @param {string} groupDescription - Description of the group for logging
17
29
  * @param {boolean} forceDebug - Debug logging flag
30
+ * @param {string|boolean} cleanupMode - Cleanup mode: true/"default" (conservative), "all" (aggressive)
18
31
  * @returns {Promise<Object>} Cleanup results
19
32
  */
20
- async function performGroupWindowCleanup(browserInstance, groupDescription, forceDebug) {
33
+ async function performGroupWindowCleanup(browserInstance, groupDescription, forceDebug, cleanupMode = true) {
21
34
  try {
22
35
  // Wait before cleanup to allow any final operations to complete
36
+ const modeText = cleanupMode === "all" ? "aggressive cleanup of old windows" : "conservative cleanup of extra windows"
23
37
  if (forceDebug) {
24
- console.log(formatLogMessage('debug', `[group_window_cleanup] Waiting ${WINDOW_CLEANUP_DELAY_MS}ms before cleanup for group: ${groupDescription}`));
38
+ console.log(formatLogMessage('debug', `[group_window_cleanup] Waiting ${WINDOW_CLEANUP_DELAY_MS}ms before ${modeText} for group: ${groupDescription}`));
39
+
25
40
  }
26
41
  await new Promise(resolve => setTimeout(resolve, WINDOW_CLEANUP_DELAY_MS));
27
42
 
28
43
  const allPages = await browserInstance.pages();
29
- const mainPage = allPages[0]; // Always keep the first page as main
30
- const extraPages = allPages.slice(1); // All other pages can be closed
44
+ // Identify the main Puppeteer window (should be about:blank or the initial page)
45
+ let mainPuppeteerPage = null;
46
+ let pagesToClose = [];
31
47
 
32
- if (extraPages.length === 0) {
48
+ // Find the main page - typically the first page that's about:blank or has been there longest
49
+ for (const page of allPages) {
50
+ const url = page.url();
51
+ if (url === 'about:blank' || url === '' || url.startsWith('chrome://')) {
52
+ if (!mainPuppeteerPage) {
53
+ mainPuppeteerPage = page; // First blank page is likely the main window
54
+ } else {
55
+ pagesToClose.push(page); // Additional blank pages can be closed
56
+ }
57
+ } else {
58
+ // Any page with actual content should be evaluated for closure
59
+ if (cleanupMode === "all") {
60
+ // Aggressive mode: close all content pages
61
+ pagesToClose.push(page);
62
+ } else {
63
+ // Conservative mode: only close pages that look like leftovers from previous scans
64
+ // Keep pages that might still be actively used
65
+ const isOldPage = await isPageFromPreviousScan(page, forceDebug);
66
+ if (isOldPage) {
67
+ pagesToClose.push(page);
68
+ }
69
+ }
70
+ }
71
+ }
72
+
73
+ // Ensure we always have a main page
74
+ if (!mainPuppeteerPage && allPages.length > 0) {
75
+ mainPuppeteerPage = allPages[0]; // Fallback to first page
76
+ pagesToClose = allPages.slice(1);
33
77
  if (forceDebug) {
34
- console.log(formatLogMessage('debug', `[group_window_cleanup] No extra windows to close for group: ${groupDescription}`));
78
+ console.log(formatLogMessage('debug', `[group_window_cleanup] No blank page found, using first page as main: ${mainPuppeteerPage.url()}`));
35
79
  }
36
- return { success: true, closedCount: 0, totalPages: allPages.length, estimatedMemoryFreed: 0 };
80
+ }
81
+
82
+ if (pagesToClose.length === 0) {
83
+ if (forceDebug) {
84
+ console.log(formatLogMessage('debug', `[group_window_cleanup] No windows to close for group: ${groupDescription}`));
85
+ }
86
+ return {
87
+ success: true,
88
+ closedCount: 0,
89
+ totalPages: allPages.length,
90
+ estimatedMemoryFreed: 0,
91
+ mainPagePreserved: true,
92
+ cleanupMode: cleanupMode === "all" ? "all" : "default"
93
+ };
37
94
  }
38
95
 
39
96
  // Estimate memory usage before closing
40
97
  let totalEstimatedMemory = 0;
41
98
  const pageMemoryEstimates = [];
42
99
 
43
- for (let i = 0; i < extraPages.length; i++) {
44
- const page = extraPages[i];
100
+ for (let i = 0; i < pagesToClose.length; i++) {
101
+ const page = pagesToClose[i];
45
102
  let pageMemoryEstimate = 0;
46
103
 
47
104
  try {
@@ -78,17 +135,20 @@ async function performGroupWindowCleanup(browserInstance, groupDescription, forc
78
135
  totalEstimatedMemory += pageMemoryEstimate;
79
136
  }
80
137
 
81
- // Close all extra pages since the entire group is complete
82
- const closePromises = extraPages.map(async (page, index) => {
138
+ // Close identified old/unused pages
139
+ const closePromises = pagesToClose.map(async (page, index) => {
83
140
  try {
84
141
  if (!page.isClosed()) {
142
+ if (forceDebug) {
143
+ console.log(formatLogMessage('debug', `[group_window_cleanup] Closing page: ${page.url()}`));
144
+ }
85
145
  await page.close();
86
146
  return { success: true, url: page.url() || `page-${index}`, estimatedMemory: pageMemoryEstimates[index] };
87
147
  }
88
148
  return { success: false, reason: 'already_closed', estimatedMemory: 0 };
89
149
  } catch (closeErr) {
90
150
  if (forceDebug) {
91
- console.log(formatLogMessage('debug', `[group_window_cleanup] Failed to close page ${index + 1}: ${closeErr.message}`));
151
+ console.log(formatLogMessage('debug', `[group_window_cleanup] Failed to close old page ${index + 1}: ${closeErr.message}`));
92
152
  }
93
153
  return { success: false, error: closeErr.message, estimatedMemory: 0 };
94
154
  }
@@ -114,18 +174,22 @@ async function performGroupWindowCleanup(browserInstance, groupDescription, forc
114
174
  };
115
175
 
116
176
  if (forceDebug) {
117
- console.log(formatLogMessage('debug', `[group_window_cleanup] Closed ${successfulCloses}/${extraPages.length} windows for completed group: ${groupDescription} after ${WINDOW_CLEANUP_DELAY_MS}ms delay`));
177
+ console.log(formatLogMessage('debug', `[group_window_cleanup] Closed ${successfulCloses}/${pagesToClose.length} old windows for completed group: ${groupDescription} after ${WINDOW_CLEANUP_DELAY_MS}ms delay`));
118
178
  console.log(formatLogMessage('debug', `[group_window_cleanup] Estimated memory freed: ${formatMemory(actualMemoryFreed)}`));
179
+ if (mainPuppeteerPage) {
180
+ console.log(formatLogMessage('debug', `[group_window_cleanup] Main Puppeteer window preserved: ${mainPuppeteerPage.url()}`));
181
+ }
119
182
  }
120
183
 
121
184
  return {
122
185
  success: true,
123
186
  closedCount: successfulCloses,
124
187
  totalPages: allPages.length,
125
- mainPageKept: !mainPage.isClosed(),
188
+ mainPagePreserved: mainPuppeteerPage && !mainPuppeteerPage.isClosed(),
126
189
  delayUsed: WINDOW_CLEANUP_DELAY_MS,
127
190
  estimatedMemoryFreed: actualMemoryFreed,
128
- estimatedMemoryFreedFormatted: formatMemory(actualMemoryFreed)
191
+ estimatedMemoryFreedFormatted: formatMemory(actualMemoryFreed),
192
+ cleanupMode: cleanupMode === "all" ? "all" : "default"
129
193
  };
130
194
  } catch (cleanupErr) {
131
195
  if (forceDebug) {
@@ -135,6 +199,237 @@ async function performGroupWindowCleanup(browserInstance, groupDescription, forc
135
199
  }
136
200
  }
137
201
 
202
+ /**
203
+ * Checks if a page is safe to close (not actively processing)
204
+ * @param {import('puppeteer').Page} page - Page to check
205
+ * @param {boolean} forceDebug - Debug logging flag
206
+ * @returns {Promise<boolean>} True if page is safe to close
207
+ */
208
+ async function isPageSafeToClose(page, forceDebug) {
209
+ try {
210
+ if (page.isClosed()) {
211
+ return true; // Already closed
212
+ }
213
+
214
+ const usage = pageUsageTracker.get(page);
215
+ if (!usage) {
216
+ // No usage data - assume safe if page exists for a while
217
+ return true;
218
+ }
219
+
220
+ // Check if page is actively processing
221
+ if (usage.isProcessing) {
222
+ if (forceDebug) {
223
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Page still processing: ${page.url().substring(0, 50)}...`));
224
+ }
225
+ return false;
226
+ }
227
+
228
+ // Check if page has been idle long enough
229
+ const idleTime = Date.now() - usage.lastActivity;
230
+ const isSafe = idleTime >= PAGE_IDLE_THRESHOLD;
231
+
232
+ if (!isSafe && forceDebug) {
233
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Page not idle long enough: ${Math.round(idleTime/1000)}s < ${PAGE_IDLE_THRESHOLD/1000}s`));
234
+ }
235
+
236
+ return isSafe;
237
+ } catch (err) {
238
+ if (forceDebug) {
239
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Error checking page safety: ${err.message}`));
240
+ }
241
+ return true; // Assume safe if we can't check
242
+ }
243
+ }
244
+
245
+ /**
246
+ * Updates page usage tracking
247
+ * @param {import('puppeteer').Page} page - Page to update
248
+ * @param {boolean} isProcessing - Whether page is actively processing
249
+ */
250
+ function updatePageUsage(page, isProcessing = false) {
251
+ try {
252
+ if (!page.isClosed()) {
253
+ pageUsageTracker.set(page, {
254
+ lastActivity: Date.now(),
255
+ isProcessing: isProcessing
256
+ });
257
+ }
258
+ } catch (err) {
259
+ // Ignore errors in usage tracking
260
+ }
261
+ }
262
+
263
+ /**
264
+ * Performs realtime window cleanup - removes oldest pages when threshold is exceeded
265
+ * Waits for site delay + 3 seconds before cleanup to ensure delayed requests are captured
266
+ * @param {import('puppeteer').Browser} browserInstance - Browser instance
267
+ * @param {number} threshold - Maximum number of pages to keep (default: 8)
268
+ * @param {boolean} forceDebug - Debug logging flag
269
+ * @param {number} siteDelay - Current site's delay value in milliseconds (default: 4000)
270
+ * @returns {Promise<Object>} Cleanup results
271
+ */
272
+ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIME_CLEANUP_THRESHOLD, forceDebug, siteDelay = 4000) {
273
+ try {
274
+ const allPages = await browserInstance.pages();
275
+
276
+ // Skip cleanup if we don't have enough pages to warrant it
277
+ if (allPages.length <= Math.max(threshold, REALTIME_CLEANUP_MIN_PAGES)) {
278
+ if (forceDebug) {
279
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Only ${allPages.length} pages open, threshold is ${threshold} - no cleanup needed`));
280
+ }
281
+ return { success: true, closedCount: 0, totalPages: allPages.length, reason: 'below_threshold' };
282
+ }
283
+
284
+ // Calculate cleanup delay: site delay + 3 second buffer
285
+ const cleanupDelay = siteDelay + REALTIME_CLEANUP_BUFFER_MS;
286
+
287
+ if (forceDebug) {
288
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Waiting ${cleanupDelay}ms (site delay: ${siteDelay}ms + ${REALTIME_CLEANUP_BUFFER_MS}ms buffer) before cleanup (threshold: ${threshold})`));
289
+ }
290
+ await new Promise(resolve => setTimeout(resolve, cleanupDelay));
291
+
292
+ const allPagesAfterDelay = await browserInstance.pages();
293
+
294
+ // Find main Puppeteer page (usually about:blank)
295
+ let mainPage = allPagesAfterDelay.find(page => {
296
+ const url = page.url();
297
+ return url === 'about:blank' || url === '' || url.startsWith('chrome://');
298
+ }) || allPagesAfterDelay[0]; // Fallback to first page
299
+
300
+ // Get pages sorted by creation time (oldest first)
301
+ const sortedPages = allPagesAfterDelay
302
+ .filter(page => page !== mainPage && !page.isClosed())
303
+ .sort((a, b) => {
304
+ const timeA = pageCreationTracker.get(a) || 0;
305
+ const timeB = pageCreationTracker.get(b) || 0;
306
+ return timeA - timeB; // Oldest first
307
+ });
308
+
309
+ // Calculate how many pages to close
310
+ const pagesToKeep = threshold - 1; // -1 for main page
311
+ const pagesToClose = sortedPages.slice(0, Math.max(0, sortedPages.length - pagesToKeep));
312
+
313
+ // Filter out pages that are still being used
314
+ const safetyChecks = await Promise.all(
315
+ pagesToClose.map(page => isPageSafeToClose(page, forceDebug))
316
+ );
317
+
318
+ const safePagesToClose = pagesToClose.filter((page, index) => safetyChecks[index]);
319
+ const unsafePagesCount = pagesToClose.length - safePagesToClose.length;
320
+
321
+ if (unsafePagesCount > 0 && forceDebug) {
322
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Skipping ${unsafePagesCount} active pages for safety`));
323
+ }
324
+
325
+ if (safePagesToClose.length === 0) {
326
+ if (forceDebug) {
327
+ const reason = pagesToClose.length === 0 ?
328
+ `${sortedPages.length} content pages, keeping ${pagesToKeep}` :
329
+ `${pagesToClose.length} pages still active`;
330
+ console.log(formatLogMessage('debug', `[realtime_cleanup] No pages need closing (${reason})`));
331
+ }
332
+ return { success: true, closedCount: 0, totalPages: allPagesAfterDelay.length, reason: 'no_cleanup_needed' };
333
+ }
334
+
335
+ // Close oldest pages
336
+ let closedCount = 0;
337
+ for (const page of safePagesToClose) {
338
+ try {
339
+ if (!page.isClosed()) {
340
+ const pageUrl = page.url();
341
+ await page.close();
342
+ pageCreationTracker.delete(page); // Remove from tracker
343
+ closedCount++;
344
+
345
+ if (forceDebug) {
346
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Closed old page: ${pageUrl.substring(0, 50)}...`));
347
+ }
348
+ }
349
+ } catch (closeErr) {
350
+ if (forceDebug) {
351
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Failed to close page: ${closeErr.message}`));
352
+ }
353
+ }
354
+ }
355
+
356
+ const remainingPages = allPagesAfterDelay.length - closedCount;
357
+
358
+ if (forceDebug) {
359
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Closed ${closedCount}/${pagesToClose.length} oldest pages (${unsafePagesCount} skipped for safety), ${remainingPages} pages remaining`));
360
+ }
361
+
362
+ return {
363
+ success: true,
364
+ closedCount,
365
+ totalPages: allPagesAfterDelay.length,
366
+ remainingPages,
367
+ threshold,
368
+ cleanupDelay,
369
+ reason: 'cleanup_completed'
370
+ };
371
+ } catch (cleanupErr) {
372
+ if (forceDebug) {
373
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Cleanup failed: ${cleanupErr.message}`));
374
+ }
375
+ return { success: false, error: cleanupErr.message, closedCount: 0 };
376
+ }
377
+ }
378
+
379
+ /**
380
+ * Determines if a page appears to be from a previous scan and can be safely closed
381
+ * @param {import('puppeteer').Page} page - Page to evaluate
382
+ * @param {boolean} forceDebug - Debug logging flag
383
+ * @returns {Promise<boolean>} True if page appears to be from previous scan
384
+ */
385
+ async function isPageFromPreviousScan(page, forceDebug) {
386
+ try {
387
+ const url = page.url();
388
+
389
+ // Always consider these as old/closeable
390
+ if (url === 'about:blank' ||
391
+ url === '' ||
392
+ url.startsWith('chrome://') ||
393
+ url.startsWith('chrome-error://') ||
394
+ url.startsWith('data:')) {
395
+ return false; // Don't close blank pages here, handled separately
396
+ }
397
+
398
+ // Check if page has been idle (no recent navigation)
399
+ // This is a heuristic - pages from previous scans are likely to be idle
400
+ try {
401
+ const title = await page.title();
402
+ // Pages with generic titles or error states are likely old
403
+ if (title.includes('404') ||
404
+ title.includes('Error') ||
405
+ title.includes('Not Found') ||
406
+ title === '') {
407
+ return true;
408
+ }
409
+ } catch (titleErr) {
410
+ // If we can't get title, page might be in bad state
411
+ return true;
412
+ }
413
+
414
+ // Default: consider most content pages as potentially old in conservative mode
415
+ return false; // Conservative - don't close unless we're sure
416
+ } catch (err) {
417
+ if (forceDebug) {
418
+ console.log(formatLogMessage('debug', `[isPageFromPreviousScan] Error evaluating page ${page.url()}: ${err.message}`));
419
+ }
420
+ return false; // Conservative - don't close if we can't evaluate
421
+ }
422
+ }
423
+
424
+ /**
425
+ * Tracks a new page for realtime cleanup purposes
426
+ * @param {import('puppeteer').Page} page - Page to track
427
+ */
428
+ function trackPageForRealtime(page) {
429
+ pageCreationTracker.set(page, ++pageCreationCounter);
430
+ updatePageUsage(page, false); // Initialize usage tracking
431
+ }
432
+
138
433
  /**
139
434
  * Quick browser responsiveness test for use during page setup
140
435
  * Designed to catch browser degradation between operations
@@ -672,10 +967,28 @@ module.exports = {
672
967
  checkBrowserMemory,
673
968
  testBrowserConnectivity,
674
969
  performGroupWindowCleanup,
970
+ performRealtimeWindowCleanup,
971
+ trackPageForRealtime,
675
972
  testNetworkCapability,
676
973
  isQuicklyResponsive,
677
974
  performHealthAssessment,
678
975
  monitorBrowserHealth,
679
976
  isBrowserHealthy,
680
- isCriticalProtocolError
977
+ isCriticalProtocolError,
978
+ updatePageUsage
681
979
  };
980
+
981
+ // Clean up tracking maps when pages are closed
982
+ const originalPageClose = require('puppeteer').Page.prototype.close;
983
+ if (originalPageClose) {
984
+ require('puppeteer').Page.prototype.close = async function(...args) {
985
+ try {
986
+ // Clean up tracking data
987
+ pageCreationTracker.delete(this);
988
+ pageUsageTracker.delete(this);
989
+ } catch (err) {
990
+ // Ignore cleanup errors
991
+ }
992
+ return originalPageClose.apply(this, args);
993
+ };
994
+ }
package/nwss.1 CHANGED
@@ -1,4 +1,4 @@
1
- .TH NWSS-SCRIPT 1 "2025" "scanner-script v1.0.32" "User Commands"
1
+ .TH NWSS-SCRIPT 1 "2025" "scanner-script v1.0.98" "User Commands"
2
2
  .SH NAME
3
3
  NWSS scanner-script \- Network scanner for malware detection and domain analysis with advanced similarity filtering
4
4
 
@@ -27,6 +27,10 @@ Remove rules that already exist in \fIFILE\fR before output (requires \fB\-o\fR)
27
27
  .B \--append
28
28
  Append new rules to output file instead of overwriting (requires \fB\-o\fR).
29
29
 
30
+ .TP
31
+ .BR \--color ", " \--colour
32
+ Enable colored console output for status messages.
33
+
30
34
  .SS Output Format Options
31
35
  .TP
32
36
  .B \--localhost
@@ -125,6 +129,14 @@ Console output only: show matching regex, titles, whois/dig/searchstring results
125
129
  .B \--remove-tempfiles
126
130
  Remove Chrome/Puppeteer temporary files before exit.
127
131
 
132
+ .TP
133
+ .BR \--max-concurrent " \fINUMBER\fR"
134
+ Maximum concurrent site processing (1-50, overrides config/default).
135
+
136
+ .TP
137
+ .BR \--cleanup-interval " \fINUMBER\fR"
138
+ Browser restart interval in URLs processed (1-1000, overrides config/default).
139
+
128
140
  .TP
129
141
  .BR \-h ", " \--help
130
142
  Show help message and exit.
@@ -134,6 +146,10 @@ Show help message and exit.
134
146
  Show version information and exit.
135
147
 
136
148
  .SS Validation Options
149
+ .TP
150
+ .B \--cache-requests
151
+ Cache HTTP requests to avoid re-requesting same URLs within scan.
152
+
137
153
  .TP
138
154
  .B \--validate-config
139
155
  Validate config.json file and exit.
@@ -150,6 +166,14 @@ Clean rule files by removing invalid lines and optionally duplicates (uses \fB\-
150
166
  .B \--test-validation
151
167
  Run domain validation tests and exit.
152
168
 
169
+ .TP
170
+ .B \--clear-cache
171
+ Clear persistent cache before scanning (improves fresh start performance).
172
+
173
+ .TP
174
+ .B \--ignore-cache
175
+ Bypass all smart caching functionality during scanning.
176
+
153
177
  .SH CONFIGURATION
154
178
 
155
179
  Configuration is provided via JSON files. The default configuration file is \fBconfig.json\fR.
@@ -184,6 +208,30 @@ Number. Similarity threshold percentage for ignore_similar (default: 80).
184
208
  .B ignore_similar_ignored_domains
185
209
  Boolean. Ignore domains similar to ignoreDomains list (default: true).
186
210
 
211
+ .TP
212
+ .B max_concurrent_sites
213
+ Number. Maximum concurrent site processing (1-50, default: 6).
214
+
215
+ .TP
216
+ .B resource_cleanup_interval
217
+ Number. Browser restart interval in URLs processed (1-1000, default: 80).
218
+
219
+ .TP
220
+ .B cache_path
221
+ String. Directory path for persistent cache storage (default: ".cache").
222
+
223
+ .TP
224
+ .B cache_max_size
225
+ Number. Maximum number of entries in cache (default: 5000).
226
+
227
+ .TP
228
+ .B cache_autosave_minutes
229
+ Number. Interval for automatic cache saves in minutes (default: 1).
230
+
231
+ .TP
232
+ .B cache_requests
233
+ Boolean. Enable HTTP request response caching (default: false).
234
+
187
235
  .SS Per-Site Configuration Options
188
236
 
189
237
  .TP
@@ -194,6 +242,10 @@ Single URL string or array of URLs to scan.
194
242
  .B filterRegex
195
243
  Regex pattern(s) to match suspicious requests.
196
244
 
245
+ .TP
246
+ .B regex_and
247
+ Boolean. Use AND logic for multiple filterRegex patterns - ALL patterns must match the same URL (default: false).
248
+
197
249
  .TP
198
250
  .B comments
199
251
  Documentation strings or notes - completely ignored by the scanner. Can be a single string or array of strings. Used for adding context, URLs, timestamps, or any documentation notes to configuration files.
@@ -234,6 +286,10 @@ Spoof User-Agent: \fB"chrome"\fR, \fB"firefox"\fR, or \fB"safari"\fR.
234
286
  .B interact
235
287
  Boolean. Simulate mouse movements and clicks.
236
288
 
289
+ .TP
290
+ .B interact_intensity
291
+ String. Interaction simulation intensity: \fB"low"\fR, \fB"medium"\fR, \fB"high"\fR (default: "medium").
292
+
237
293
  .TP
238
294
  .B delay
239
295
  Milliseconds to wait after page load (default: 4000).
@@ -242,6 +298,10 @@ Milliseconds to wait after page load (default: 4000).
242
298
  .B reload
243
299
  Number of times to reload the page (default: 1).
244
300
 
301
+ .TP
302
+ .B forcereload
303
+ Boolean. Force an additional reload after reloads.
304
+
245
305
  .TP
246
306
  .B timeout
247
307
  Request timeout in milliseconds (default: 30000).
@@ -258,6 +318,14 @@ Boolean. Allow third-party request matching (default: true).
258
318
  .B fingerprint_protection
259
319
  Boolean or \fB"random"\fR. Enable browser fingerprint spoofing.
260
320
 
321
+ .TP
322
+ .B referrer_headers
323
+ String, Array, or Object. Set referrer header for realistic traffic sources.
324
+
325
+ .TP
326
+ .B custom_headers
327
+ Object. Add custom HTTP headers to requests.
328
+
261
329
  .TP
262
330
  .B ignore_similar
263
331
  Boolean. Override global ignore_similar setting for this site.
@@ -274,6 +342,10 @@ Boolean. Override global ignore_similar_ignored_domains for this site.
274
342
  .B even_blocked
275
343
  Boolean. Add matching rules even if requests are blocked (default: false).
276
344
 
345
+ .TP
346
+ .B bypass_cache
347
+ Boolean. Skip all caching for this site's URLs (default: false).
348
+
277
349
  .TP
278
350
  .B whois
279
351
  Array of terms that must ALL be found in WHOIS data (AND logic).
@@ -353,10 +425,6 @@ Object. Custom page.goto() options for Puppeteer navigation. Available options:
353
425
  .RE
354
426
  Example: \fB{"waitUntil": "networkidle2", "timeout": 60000}\fR
355
427
 
356
- .TP
357
- .B forcereload
358
- Boolean. Force an additional reload with cache disabled after normal reloads.
359
-
360
428
  .TP
361
429
  .B clear_sitedata
362
430
  Boolean. Clear all cookies, cache, and storage before each page load (default: false).
@@ -389,6 +457,45 @@ Boolean. Launch browser with GUI for this specific site.
389
457
  .B adblock_rules
390
458
  Boolean. Generate adblock filter rules with resource types for this site.
391
459
 
460
+ .TP
461
+ .B window_cleanup
462
+ Boolean or String. Close old/unused browser windows and tabs after entire URL group completes (default: false).
463
+ .RS
464
+ .IP \(bu 4
465
+ \fBfalse\fR - No cleanup performed
466
+ .IP \(bu 4
467
+ \fBtrue\fR - Conservative cleanup: closes pages that appear to be from previous scans while preserving main Puppeteer window
468
+ .IP \(bu 4
469
+ \fB"all"\fR - Aggressive cleanup: closes all content pages from previous operations while preserving main Puppeteer window
470
+ .RE
471
+ Both modes wait 16 seconds before cleanup to allow final operations to complete, and always preserve the main browser window to maintain browser instance stability. Cleanup targets popup windows, extra tabs, and pages from previously completed site groups, but never affects actively processing pages.
472
+
473
+ .TP
474
+
475
+ .SS Redirect Handling Options
476
+
477
+ .TP
478
+ .B follow_redirects
479
+ Boolean. Follow redirects to new domains (default: true).
480
+
481
+ .TP
482
+ .B max_redirects
483
+ Number. Maximum number of redirects to follow (default: 10).
484
+
485
+ .TP
486
+ .B js_redirect_timeout
487
+ Milliseconds. Time to wait for JavaScript redirects (default: 5000).
488
+
489
+ .TP
490
+ .B detect_js_patterns
491
+ Boolean. Analyze page source for redirect patterns (default: true).
492
+
493
+ .TP
494
+ .B redirect_timeout_multiplier
495
+ Number. Increase timeout for redirected URLs (default: 1.5).
496
+
497
+ .SS Cloudflare Protection Options
498
+
392
499
  .TP
393
500
  .B cloudflare_phish
394
501
  Boolean. Auto-click through Cloudflare phishing warnings (default: false).
@@ -397,6 +504,24 @@ Boolean. Auto-click through Cloudflare phishing warnings (default: false).
397
504
  .B cloudflare_bypass
398
505
  Boolean. Auto-solve Cloudflare "Verify you are human" challenges (default: false).
399
506
 
507
+ .TP
508
+ .B cloudflare_parallel_detection
509
+ Boolean. Use parallel detection for faster Cloudflare checks (default: true).
510
+
511
+ .TP
512
+ .B cloudflare_max_retries
513
+ Number. Maximum retry attempts for Cloudflare operations (default: 3).
514
+
515
+ .TP
516
+ .B cloudflare_cache_ttl
517
+ Milliseconds. TTL for Cloudflare detection cache (default: 300000 - 5 minutes).
518
+
519
+ .TP
520
+ .B cloudflare_retry_on_error
521
+ Boolean. Enable retry logic for Cloudflare operations (default: true).
522
+
523
+ .SS FlowProxy Protection Options
524
+
400
525
  .TP
401
526
  .B flowproxy_detection
402
527
  Boolean. Enable flowProxy protection detection and handling (default: false).
@@ -421,6 +546,24 @@ Milliseconds. Delay for rate limiting (default: 30000).
421
546
  .B flowproxy_additional_delay
422
547
  Milliseconds. Additional processing delay (default: 5000).
423
548
 
549
+ .SS Advanced Options
550
+
551
+ .TP
552
+ .B interact_duration
553
+ Milliseconds. Duration of interaction simulation (default: 2000).
554
+
555
+ .TP
556
+ .B interact_scrolling
557
+ Boolean. Enable scrolling simulation (default: true).
558
+
559
+ .TP
560
+ .B interact_clicks
561
+ Boolean. Enable element clicking simulation (default: false).
562
+
563
+ .TP
564
+ .B interact_typing
565
+ Boolean. Enable typing simulation (default: false).
566
+
424
567
  .TP
425
568
  .B verbose
426
569
  Boolean. Enable verbose output for this specific site.
@@ -461,6 +604,28 @@ Boolean. Force Pi-hole regex output format for this site.
461
604
  .B plain
462
605
  Boolean. Force plain domain output for this site.
463
606
 
607
+ .SS Referrer Header Options
608
+
609
+ .B Simple formats:
610
+ .RS
611
+ .IP \(bu 4
612
+ \fB"referrer_headers": "https://google.com/search?q=example"\fR
613
+ .IP \(bu 4
614
+ \fB"referrer_headers": ["url1", "url2"]\fR
615
+ .RE
616
+
617
+ .B Smart modes:
618
+ .RS
619
+ .IP \(bu 4
620
+ \fB"referrer_headers": {"mode": "random_search", "search_terms": ["reviews"]}\fR
621
+ .IP \(bu 4
622
+ \fB"referrer_headers": {"mode": "social_media"}\fR
623
+ .IP \(bu 4
624
+ \fB"referrer_headers": {"mode": "direct_navigation"}\fR
625
+ .IP \(bu 4
626
+ \fB"referrer_headers": {"mode": "custom", "custom": ["https://news.ycombinator.com/"]}\fR
627
+ .RE
628
+
464
629
  .SH SIMILARITY FILTERING
465
630
 
466
631
  The scanner includes advanced similarity filtering to reduce noise and improve detection accuracy by automatically ignoring domains that are very similar to ones already found or explicitly ignored.
@@ -530,6 +695,16 @@ With default settings (\fBignore_similar_threshold: 80\fR):
530
695
  }
531
696
  .EE
532
697
 
698
+ .SS Configuration with regex AND logic:
699
+ .EX
700
+ {
701
+ "url": "https://ad-network.com",
702
+ "filterRegex": ["tracking", "analytics"],
703
+ "regex_and": true,
704
+ "resourceTypes": ["script", "fetch"]
705
+ }
706
+ .EE
707
+
533
708
  .SS Configuration with similarity filtering:
534
709
  .EX
535
710
  {
@@ -661,6 +836,36 @@ node nwss.js -o dnsmasq.conf --dnsmasq --titles
661
836
  node nwss.js -o pihole_regex.txt --pihole --debug
662
837
  .EE
663
838
 
839
+ .SS Performance tuning with concurrency:
840
+ .EX
841
+ node nwss.js --max-concurrent 12 --cleanup-interval 100 -o rules.txt
842
+ .EE
843
+
844
+ .SS Cache management:
845
+ .EX
846
+ node nwss.js --clear-cache --cache-requests -o rules.txt
847
+ node nwss.js --ignore-cache --debug -o rules.txt
848
+ .EE
849
+
850
+ .SS Stealth configuration with referrer headers:
851
+ .EX
852
+ {
853
+ "url": "https://e-commerce-site.com",
854
+ "userAgent": "chrome",
855
+ "fingerprint_protection": "random",
856
+ "referrer_headers": {
857
+ "mode": "random_search",
858
+ "search_terms": ["product reviews", "best deals"]
859
+ },
860
+ "custom_headers": {
861
+ "Accept-Language": "en-US,en;q=0.9"
862
+ },
863
+ "interact": true,
864
+ "interact_intensity": "high",
865
+ "filterRegex": "analytics|tracking|ads"
866
+ }
867
+ .EE
868
+
664
869
  .SS Cloudflare bypass and fingerprint spoofing:
665
870
  .EX
666
871
  {
@@ -688,6 +893,35 @@ node nwss.js -o pihole_regex.txt --pihole --debug
688
893
  }
689
894
  .EE
690
895
 
896
+ .SS Window cleanup for memory management:
897
+ .EX
898
+ {
899
+ "url": [
900
+ "https://site1.com",
901
+ "https://site2.com",
902
+ "https://site3.com"
903
+ ],
904
+ "filterRegex": "\\\\.(space|website)\\\\b",
905
+ "window_cleanup": "all",
906
+ "resourceTypes": ["script", "fetch"]
907
+ }
908
+ .EE
909
+
910
+ .SS Conservative window cleanup:
911
+ .EX
912
+ {
913
+ "url": "https://popup-heavy-site.com",
914
+ "filterRegex": "\\\\.(top|buzz)\\\\b",
915
+ "window_cleanup": true,
916
+ "interact": true,
917
+ "reload": 2,
918
+ "comments": [
919
+ "Site opens many popup windows",
920
+ "Conservative cleanup preserves potentially active content"
921
+ ]
922
+ }
923
+ .EE
924
+
691
925
  .SH OUTPUT FORMATS
692
926
 
693
927
  The scanner supports multiple output formats for different blocking systems:
@@ -740,6 +974,10 @@ Default configuration file containing scan targets and rules.
740
974
  .B logs/
741
975
  Directory created for debug and matched URL logs when \fB\--debug\fR or \fB\--dumpurls\fR is used.
742
976
 
977
+ .TP
978
+ .B .cache/
979
+ Default cache directory for smart caching functionality.
980
+
743
981
  .TP
744
982
  .B user.action
745
983
  Common Privoxy action file when using \fB\--privoxy\fR output.
@@ -747,7 +985,7 @@ Common Privoxy action file when using \fB\--privoxy\fR output.
747
985
  .SH DETECTION METHODS
748
986
 
749
987
  .SS URL Pattern Matching
750
- Uses regex patterns to identify suspicious domains and request URLs.
988
+ Uses regex patterns to identify suspicious domains and request URLs with support for AND/OR logic.
751
989
 
752
990
  .SS Content Analysis
753
991
  Downloads page content with curl and searches for malicious strings using JavaScript or grep.
@@ -784,6 +1022,28 @@ Detects and handles FlowProxy protection systems.
784
1022
  .SS Intelligent Domain Filtering
785
1023
  Advanced similarity algorithms prevent duplicate detection across international domains and variations.
786
1024
 
1025
+ .SS Smart Caching
1026
+ Persistent caching system for improved performance across multiple scans.
1027
+
1028
+ .SS Window Management
1029
+ Intelligent browser window and tab cleanup to prevent memory accumulation:
1030
+ .RS
1031
+ .IP \(bu 4
1032
+ \fBConservative mode\fR (\fBwindow_cleanup: true\fR): Selectively closes pages that appear to be leftovers from previous scans based on URL patterns and page state analysis
1033
+ .IP \(bu 4
1034
+ \fBAggressive mode\fR (\fBwindow_cleanup: "all"\fR): Closes all content pages from previous operations for maximum memory recovery
1035
+ .IP \(bu 4
1036
+ \fBMain window preservation\fR: Both modes always preserve the main Puppeteer browser window (typically about:blank) to maintain browser instance stability
1037
+ .IP \(bu 4
1038
+ \fBPopup window handling\fR: Automatically detects and closes popup windows created by previous site scans
1039
+ .IP \(bu 4
1040
+ \fBTiming protection\fR: 16-second delay ensures no active operations are interrupted during cleanup
1041
+ .IP \(bu 4
1042
+ \fBMemory estimation\fR: Reports estimated memory freed from closed windows for performance monitoring
1043
+ .IP \(bu 4
1044
+ \fBActive page protection\fR: Never affects pages currently being processed by concurrent scanning operations
1045
+ .RE
1046
+
787
1047
  .SH EXIT STATUS
788
1048
  .TP
789
1049
  .B 0
@@ -821,4 +1081,4 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
821
1081
  FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
822
1082
 
823
1083
  You should have received a copy of the GNU General Public License along with
824
- this program. If not, see <https://www.gnu.org/licenses/>.
1084
+ this program. If not, see <https://www.gnu.org/licenses/>.
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.97 ===
1
+ // === Network scanner script (nwss.js) v1.0.99 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -82,6 +82,8 @@ const CONCURRENCY_LIMITS = {
82
82
  HIGH_CONCURRENCY_THRESHOLD: 12 // Auto-enable aggressive caching above this
83
83
  };
84
84
 
85
+ const REALTIME_CLEANUP_THRESHOLD = 8; // Default pages to keep for realtime cleanup
86
+
85
87
  /**
86
88
  * Detects the installed Puppeteer version dynamically
87
89
  * @returns {Object} Version info and compatibility settings
@@ -122,10 +124,10 @@ function detectPuppeteerVersion() {
122
124
  // Enhanced redirect handling
123
125
  const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/redirect');
124
126
  // Ensure web browser is working correctly
125
- const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup } = require('./lib/browserhealth');
127
+ const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup, performRealtimeWindowCleanup, trackPageForRealtime, updatePageUsage } = require('./lib/browserhealth');
126
128
 
127
129
  // --- Script Configuration & Constants ---
128
- const VERSION = '1.0.97'; // Script version
130
+ const VERSION = '1.0.99'; // Script version
129
131
 
130
132
  // get startTime
131
133
  const startTime = Date.now();
@@ -571,7 +573,11 @@ Advanced Options:
571
573
  dig_subdomain: true/false Use subdomain for dig lookup instead of root domain (default: false)
572
574
  digRecordType: "A" DNS record type for dig (default: A)
573
575
 
574
- window_cleanup: true/false Close extra browser windows/tabs after entire URL group completes with 5s delay (default: false)
576
+ window_cleanup: true/false/"realtime"/"all" Window cleanup mode:
577
+ true/false - Close extra windows after URL group completes (default: false)
578
+ "realtime" - Continuously cleanup oldest pages when threshold exceeded
579
+ "all" - Aggressive cleanup of all content pages after group
580
+ window_cleanup_threshold: <number> For realtime mode: max pages to keep open (default: 8)
575
581
 
576
582
  Referrer Header Options:
577
583
  referrer_headers: "https://google.com" Single referrer URL
@@ -1474,7 +1480,27 @@ function setupFrameHandling(page, forceDebug) {
1474
1480
  throw new Error('Failed to create valid page instance');
1475
1481
  }
1476
1482
 
1477
-
1483
+ // Track page for realtime cleanup
1484
+ trackPageForRealtime(page);
1485
+
1486
+ // Mark page as actively processing
1487
+ updatePageUsage(page, true);
1488
+
1489
+ // Perform realtime cleanup if enabled
1490
+ if (siteConfig.window_cleanup === "realtime") {
1491
+ const threshold = typeof siteConfig.window_cleanup_threshold === 'number'
1492
+ ? siteConfig.window_cleanup_threshold
1493
+ : REALTIME_CLEANUP_THRESHOLD;
1494
+
1495
+ // Get the site's delay value for cleanup timing
1496
+ const siteDelay = siteConfig.delay || 4000;
1497
+
1498
+ const realtimeResult = await performRealtimeWindowCleanup(browserInstance, threshold, forceDebug, siteDelay);
1499
+ if (realtimeResult.success && realtimeResult.closedCount > 0 && forceDebug) {
1500
+ console.log(formatLogMessage('debug', `[realtime_cleanup] Cleaned ${realtimeResult.closedCount} old pages, ${realtimeResult.remainingPages} remaining`));
1501
+ }
1502
+ }
1503
+
1478
1504
  // Set aggressive timeouts for problematic operations
1479
1505
  // Optimized timeouts for Puppeteer 23.x responsiveness
1480
1506
  page.setDefaultTimeout(Math.min(timeout, TIMEOUTS.DEFAULT_PAGE_REDUCED));
@@ -1850,9 +1876,9 @@ function setupFrameHandling(page, forceDebug) {
1850
1876
  let curlUserAgent = '';
1851
1877
  if (useCurl && siteConfig.userAgent) {
1852
1878
  const userAgents = {
1853
- chrome: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
1854
- firefox: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0",
1855
- safari: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15"
1879
+ chrome: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
1880
+ firefox: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
1881
+ safari: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"
1856
1882
  };
1857
1883
  curlUserAgent = userAgents[siteConfig.userAgent.toLowerCase()] || '';
1858
1884
  }
@@ -2612,6 +2638,9 @@ function setupFrameHandling(page, forceDebug) {
2612
2638
  request.continue();
2613
2639
  });
2614
2640
 
2641
+ // Mark page as actively processing network requests
2642
+ updatePageUsage(page, true);
2643
+
2615
2644
  // Add response handler ONLY if searchstring/searchstring_and is defined AND neither curl nor grep is enabled
2616
2645
  if ((hasSearchString || hasSearchStringAnd) && !useCurl && !useGrep) {
2617
2646
  const responseHandler = createResponseHandler({
@@ -2649,6 +2678,9 @@ function setupFrameHandling(page, forceDebug) {
2649
2678
  // Create optimized interaction configuration for this site
2650
2679
  const interactionConfig = createInteractionConfig(currentUrl, siteConfig);
2651
2680
 
2681
+ // Mark page as actively processing interactions
2682
+ updatePageUsage(page, true);
2683
+
2652
2684
  // --- Runtime CSS Element Blocking (Fallback) ---
2653
2685
  // Apply CSS blocking after page load as a fallback in case evaluateOnNewDocument didn't work
2654
2686
  if (cssBlockedSelectors && Array.isArray(cssBlockedSelectors) && cssBlockedSelectors.length > 0) {
@@ -2875,6 +2907,9 @@ function setupFrameHandling(page, forceDebug) {
2875
2907
  console.log(formatLogMessage('info', `${messageColors.loaded('Loaded:')} (${siteCounter}/${totalUrls}) ${currentUrl}`));
2876
2908
  await page.evaluate(() => { console.log('Safe to evaluate on loaded page.'); });
2877
2909
 
2910
+ // Mark page as processing frames
2911
+ updatePageUsage(page, true);
2912
+
2878
2913
  // Wait for iframes to load and log them
2879
2914
  if (forceDebug) {
2880
2915
  try {
@@ -2899,6 +2934,9 @@ function setupFrameHandling(page, forceDebug) {
2899
2934
  console.log(formatLogMessage('debug', `Frame debugging failed: ${frameDebugErr.message}`));
2900
2935
  }
2901
2936
  }
2937
+
2938
+ // Page finished initial loading - mark as idle
2939
+ updatePageUsage(page, false);
2902
2940
  } catch (err) {
2903
2941
  // Enhanced error handling for redirect timeouts using redirect module
2904
2942
  const timeoutResult = await handleRedirectTimeout(page, currentUrl, err, safeGetDomain, forceDebug, formatLogMessage);
@@ -2916,6 +2954,9 @@ function setupFrameHandling(page, forceDebug) {
2916
2954
 
2917
2955
  if (interactEnabled && !disableInteract) {
2918
2956
  if (forceDebug) console.log(formatLogMessage('debug', `interaction simulation enabled for ${currentUrl}`));
2957
+
2958
+ // Mark page as processing during interactions
2959
+ updatePageUsage(page, true);
2919
2960
  // Use enhanced interaction module
2920
2961
  await performPageInteraction(page, currentUrl, interactionConfig, forceDebug);
2921
2962
  }
@@ -2945,6 +2986,9 @@ function setupFrameHandling(page, forceDebug) {
2945
2986
  // Use fast timeout helper for consistent Puppeteer 23.x compatibility
2946
2987
 
2947
2988
  // Handle reloads - use force reload mechanism if forcereload is enabled
2989
+ // Mark page as processing during reloads
2990
+ updatePageUsage(page, true);
2991
+
2948
2992
  const totalReloads = (siteConfig.reload || 1) - 1; // Subtract 1 because initial load counts as first
2949
2993
  const useForceReload = siteConfig.forcereload === true;
2950
2994
 
@@ -3049,6 +3093,9 @@ function setupFrameHandling(page, forceDebug) {
3049
3093
  await fastTimeout(delayMs);
3050
3094
  }
3051
3095
  }
3096
+
3097
+ // Mark page as idle after all processing complete
3098
+ updatePageUsage(page, false);
3052
3099
 
3053
3100
  if (dryRunMode) {
3054
3101
  // Get page title for dry run output
@@ -3172,6 +3219,10 @@ function setupFrameHandling(page, forceDebug) {
3172
3219
  // Guaranteed resource cleanup - this runs regardless of success or failure
3173
3220
 
3174
3221
  if (cdpSessionManager) {
3222
+ // Mark page as idle when cleanup starts
3223
+ if (page && !page.isClosed()) {
3224
+ updatePageUsage(page, false);
3225
+ }
3175
3226
  await cdpSessionManager.cleanup();
3176
3227
  }
3177
3228
 
@@ -3405,14 +3456,19 @@ function setupFrameHandling(page, forceDebug) {
3405
3456
  for (const [siteKey, siteTasks] of tasksBySite) {
3406
3457
  const siteConfig = siteTasks[0].config; // All tasks in group have same config
3407
3458
 
3408
- if (siteConfig.window_cleanup === true) {
3459
+ if (siteConfig.window_cleanup === true || siteConfig.window_cleanup === "all" || siteConfig.window_cleanup === "realtime") {
3409
3460
  const urlCount = siteTasks.length;
3410
3461
  const groupDescription = `${urlCount} URLs from site group ${++siteGroupIndex}`;
3462
+ const cleanupMode = siteConfig.window_cleanup === "realtime" ? true : siteConfig.window_cleanup; // Pass through the exact value, but don't pass "realtime" to group cleanup
3411
3463
 
3412
3464
  try {
3413
- const groupCleanupResult = await performGroupWindowCleanup(browser, groupDescription, forceDebug);
3465
+ const groupCleanupResult = await performGroupWindowCleanup(browser, groupDescription, forceDebug, cleanupMode);
3414
3466
  if (!silentMode && groupCleanupResult.success && groupCleanupResult.closedCount > 0) {
3415
- console.log(`🗑️ Group cleanup: ${groupCleanupResult.closedCount} windows closed after completing ${groupDescription}`);
3467
+ const modeText = cleanupMode === "all" ? "(aggressive)" : "(conservative)";
3468
+ console.log(`🗑️ Group cleanup: ${groupCleanupResult.closedCount} old windows closed ${modeText} after completing ${groupDescription}`);
3469
+ if (groupCleanupResult.mainPagePreserved) {
3470
+ console.log(`✅ Main Puppeteer window preserved during cleanup`);
3471
+ }
3416
3472
  }
3417
3473
  } catch (groupCleanupErr) {
3418
3474
  if (forceDebug) console.log(formatLogMessage('debug', `Group window cleanup failed: ${groupCleanupErr.message}`));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.97",
3
+ "version": "1.0.99",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {