@fanboynz/network-scanner 2.0.6 → 2.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -134,7 +134,7 @@ Example:
134
134
  | Field | Values | Default | Description |
135
135
  |:---------------------|:-------|:-------:|:------------|
136
136
  | `url` | String or Array | - | Website URL(s) to scan |
137
- | `userAgent` | `chrome`, `firefox`, `safari` | - | User agent for page (latest versions: Chrome 131, Firefox 133, Safari 18.2) |
137
+ | `userAgent` | `chrome`, `chrome_mac`, `chrome_linux`, `firefox`, `firefox_mac`, `firefox_linux`, `safari` | - | User agent for page |
138
138
  | `filterRegex` | String or Array | `.*` | Regex or list of regexes to match requests |
139
139
  | `regex_and` | Boolean | `false` | Use AND logic for multiple filterRegex patterns - ALL patterns must match the same URL |
140
140
  | `comments` | String or Array | - | String of comments or references |
@@ -0,0 +1,350 @@
1
+ // === Clear Site Data Module ===
2
+ // Handles comprehensive site data clearing via CDP and page-level fallbacks
3
+ // Resolves SecurityError issues with localStorage/sessionStorage access
4
+
5
+ const { formatLogMessage } = require('./colorize');
6
+
7
+ /**
8
+ * Clears site data using CDP (bypasses same-origin restrictions)
9
+ * @param {Page} page - Puppeteer page instance
10
+ * @param {string} currentUrl - URL being processed
11
+ * @param {boolean} forceDebug - Debug logging flag
12
+ * @param {boolean} quickMode - If true, only clear cache/cookies (for reloads)
13
+ * @returns {Promise<{success: boolean, operations: string[]}>}
14
+ */
15
+ async function clearSiteDataViaCDP(page, currentUrl, forceDebug, quickMode = false) {
16
+ let clearDataSession = null;
17
+ const completedOperations = [];
18
+
19
+ try {
20
+ clearDataSession = await Promise.race([
21
+ page.target().createCDPSession(),
22
+ new Promise((_, reject) =>
23
+ setTimeout(() => reject(new Error('CDP session timeout')), 10000)
24
+ )
25
+ ]);
26
+
27
+ const origin = new URL(currentUrl).origin;
28
+
29
+ // Always clear cache and cookies (even in quick mode)
30
+ const basicOperations = [
31
+ { cmd: 'Network.clearBrowserCookies', name: 'cookies' },
32
+ { cmd: 'Network.clearBrowserCache', name: 'cache' }
33
+ ];
34
+
35
+ for (const op of basicOperations) {
36
+ try {
37
+ await clearDataSession.send(op.cmd);
38
+ completedOperations.push(op.name);
39
+ } catch (opErr) {
40
+ if (forceDebug) {
41
+ console.log(formatLogMessage('debug', `[clear_sitedata] ${op.name} clear failed: ${opErr.message}`));
42
+ }
43
+ }
44
+ }
45
+
46
+ // Full storage clearing (skip in quick mode for reloads)
47
+ if (!quickMode) {
48
+ // Try comprehensive storage clearing first
49
+ try {
50
+ await clearDataSession.send('Storage.clearDataForOrigin', {
51
+ origin: origin,
52
+ storageTypes: 'all'
53
+ });
54
+ completedOperations.push('all_storage');
55
+ } catch (allStorageErr) {
56
+ // Fallback: try individual storage types
57
+ const storageTypes = [
58
+ { type: 'local_storage', name: 'localStorage' },
59
+ { type: 'session_storage', name: 'sessionStorage' },
60
+ { type: 'indexeddb', name: 'indexedDB' },
61
+ { type: 'websql', name: 'webSQL' },
62
+ { type: 'service_workers', name: 'serviceWorkers' }
63
+ ];
64
+
65
+ for (const storage of storageTypes) {
66
+ try {
67
+ await clearDataSession.send('Storage.clearDataForOrigin', {
68
+ origin: origin,
69
+ storageTypes: storage.type
70
+ });
71
+ completedOperations.push(storage.name);
72
+ } catch (individualErr) {
73
+ if (forceDebug) {
74
+ console.log(formatLogMessage('debug', `[clear_sitedata] ${storage.name} clear failed: ${individualErr.message}`));
75
+ }
76
+ }
77
+ }
78
+ }
79
+ }
80
+
81
+ if (forceDebug && completedOperations.length > 0) {
82
+ console.log(formatLogMessage('debug', `[clear_sitedata] CDP cleared: ${completedOperations.join(', ')}`));
83
+ }
84
+
85
+ return { success: completedOperations.length > 0, operations: completedOperations };
86
+
87
+ } catch (cdpErr) {
88
+ if (forceDebug) {
89
+ console.log(formatLogMessage('debug', `[clear_sitedata] CDP session failed: ${cdpErr.message}`));
90
+ }
91
+ return { success: false, operations: completedOperations };
92
+ } finally {
93
+ if (clearDataSession) {
94
+ try {
95
+ await clearDataSession.detach();
96
+ } catch (detachErr) {
97
+ // Ignore detach errors
98
+ }
99
+ }
100
+ }
101
+ }
102
+
103
+ /**
104
+ * Fallback page-level clearing with security error handling
105
+ * @param {Page} page - Puppeteer page instance
106
+ * @param {boolean} forceDebug - Debug logging flag
107
+ * @returns {Promise<{success: boolean, operations: string[]}>}
108
+ */
109
+ async function clearSiteDataViaPage(page, forceDebug) {
110
+ try {
111
+ const result = await page.evaluate(() => {
112
+ const cleared = [];
113
+
114
+ // Test and clear localStorage
115
+ try {
116
+ if (window.localStorage && typeof window.localStorage.setItem === 'function') {
117
+ const testKey = '__nwss_access_test__';
118
+ localStorage.setItem(testKey, 'test');
119
+ localStorage.removeItem(testKey);
120
+ localStorage.clear();
121
+ cleared.push('localStorage');
122
+ }
123
+ } catch (e) {
124
+ // Security error expected on some sites
125
+ }
126
+
127
+ // Test and clear sessionStorage
128
+ try {
129
+ if (window.sessionStorage && typeof window.sessionStorage.setItem === 'function') {
130
+ const testKey = '__nwss_access_test__';
131
+ sessionStorage.setItem(testKey, 'test');
132
+ sessionStorage.removeItem(testKey);
133
+ sessionStorage.clear();
134
+ cleared.push('sessionStorage');
135
+ }
136
+ } catch (e) {
137
+ // Security error expected on some sites
138
+ }
139
+
140
+ // Clear IndexedDB
141
+ try {
142
+ if (window.indexedDB && typeof window.indexedDB.databases === 'function') {
143
+ window.indexedDB.databases().then(dbs => {
144
+ dbs.forEach(db => {
145
+ try {
146
+ window.indexedDB.deleteDatabase(db.name);
147
+ } catch (dbErr) {
148
+ // Individual DB deletion may fail
149
+ }
150
+ });
151
+ }).catch(() => {
152
+ // Database listing may fail
153
+ });
154
+ cleared.push('indexedDB');
155
+ }
156
+ } catch (e) {
157
+ // IndexedDB may not be available
158
+ }
159
+
160
+ return cleared;
161
+ });
162
+
163
+ if (forceDebug && result.length > 0) {
164
+ console.log(formatLogMessage('debug', `[clear_sitedata] Page-level cleared: ${result.join(', ')}`));
165
+ }
166
+
167
+ return { success: result.length > 0, operations: result };
168
+ } catch (pageErr) {
169
+ if (forceDebug) {
170
+ console.log(formatLogMessage('debug', `[clear_sitedata] Page evaluation failed: ${pageErr.message}`));
171
+ }
172
+ return { success: false, operations: [] };
173
+ }
174
+ }
175
+
176
+ /**
177
+ * Main entry point for site data clearing
178
+ * Attempts CDP clearing first, falls back to page-level if needed
179
+ * @param {Page} page - Puppeteer page instance
180
+ * @param {string} currentUrl - URL being processed
181
+ * @param {boolean} forceDebug - Debug logging flag
182
+ * @param {boolean} quickMode - If true, only clear cache/cookies (for reloads)
183
+ * @returns {Promise<{success: boolean, operations: string[], method: string}>}
184
+ */
185
+ async function clearSiteData(page, currentUrl, forceDebug, quickMode = false) {
186
+ // Try CDP clearing first (preferred method)
187
+ const cdpResult = await clearSiteDataViaCDP(page, currentUrl, forceDebug, quickMode);
188
+
189
+ if (cdpResult.success) {
190
+ return {
191
+ success: true,
192
+ operations: cdpResult.operations,
193
+ method: 'CDP'
194
+ };
195
+ }
196
+
197
+ // Fallback to page-level clearing if CDP failed and not in quick mode
198
+ if (!quickMode) {
199
+ if (forceDebug) {
200
+ console.log(formatLogMessage('debug', `CDP clearing failed, attempting page-level fallback for ${currentUrl}`));
201
+ }
202
+
203
+ const pageResult = await clearSiteDataViaPage(page, forceDebug);
204
+
205
+ return {
206
+ success: pageResult.success,
207
+ operations: pageResult.operations,
208
+ method: pageResult.success ? 'page-level' : 'failed'
209
+ };
210
+ }
211
+
212
+ return {
213
+ success: false,
214
+ operations: [],
215
+ method: 'failed'
216
+ };
217
+ }
218
+
219
+ /**
220
+ * Enhanced site data clearing with additional browser-level operations
221
+ * Includes cache warming prevention and comprehensive storage cleanup
222
+ * @param {Page} page - Puppeteer page instance
223
+ * @param {string} currentUrl - URL being processed
224
+ * @param {boolean} forceDebug - Debug logging flag
225
+ * @returns {Promise<{success: boolean, operations: string[], method: string}>}
226
+ */
227
+ async function clearSiteDataEnhanced(page, currentUrl, forceDebug) {
228
+ let clearDataSession = null;
229
+ const completedOperations = [];
230
+
231
+ try {
232
+ clearDataSession = await Promise.race([
233
+ page.target().createCDPSession(),
234
+ new Promise((_, reject) =>
235
+ setTimeout(() => reject(new Error('Enhanced CDP session timeout')), 15000)
236
+ )
237
+ ]);
238
+
239
+ const origin = new URL(currentUrl).origin;
240
+
241
+ // Enhanced clearing operations
242
+ const enhancedOperations = [
243
+ // Network layer
244
+ { cmd: 'Network.clearBrowserCookies', name: 'cookies' },
245
+ { cmd: 'Network.clearBrowserCache', name: 'cache' },
246
+
247
+ // Storage layer - comprehensive
248
+ {
249
+ cmd: 'Storage.clearDataForOrigin',
250
+ params: { origin, storageTypes: 'all' },
251
+ name: 'all_storage'
252
+ },
253
+
254
+ // Runtime layer
255
+ { cmd: 'Runtime.discardConsoleEntries', name: 'console' },
256
+
257
+ // Security layer
258
+ { cmd: 'Security.disable', name: 'security_reset' }
259
+ ];
260
+
261
+ for (const op of enhancedOperations) {
262
+ try {
263
+ if (op.params) {
264
+ await clearDataSession.send(op.cmd, op.params);
265
+ } else {
266
+ await clearDataSession.send(op.cmd);
267
+ }
268
+ completedOperations.push(op.name);
269
+ } catch (opErr) {
270
+ if (forceDebug) {
271
+ console.log(formatLogMessage('debug', `[clear_sitedata_enhanced] ${op.name} failed: ${opErr.message}`));
272
+ }
273
+
274
+ // For storage operations, try individual fallbacks
275
+ if (op.name === 'all_storage') {
276
+ const individualTypes = ['local_storage', 'session_storage', 'indexeddb', 'websql', 'service_workers'];
277
+ for (const type of individualTypes) {
278
+ try {
279
+ await clearDataSession.send('Storage.clearDataForOrigin', {
280
+ origin,
281
+ storageTypes: type
282
+ });
283
+ completedOperations.push(type);
284
+ } catch (individualErr) {
285
+ // Continue trying other types
286
+ }
287
+ }
288
+ }
289
+ }
290
+ }
291
+
292
+ // Additional DOM cleanup via page evaluation
293
+ try {
294
+ await page.evaluate(() => {
295
+ // Clear any cached DOM queries
296
+ if (window.document && document.querySelectorAll) {
297
+ // Force garbage collection of cached selectors
298
+ const div = document.createElement('div');
299
+ document.body.appendChild(div);
300
+ document.body.removeChild(div);
301
+ }
302
+
303
+ // Clear performance entries
304
+ if (window.performance && performance.clearMarks) {
305
+ performance.clearMarks();
306
+ performance.clearMeasures();
307
+ }
308
+ });
309
+ completedOperations.push('dom_cleanup');
310
+ } catch (domErr) {
311
+ if (forceDebug) {
312
+ console.log(formatLogMessage('debug', `[clear_sitedata_enhanced] DOM cleanup failed: ${domErr.message}`));
313
+ }
314
+ }
315
+
316
+ if (forceDebug && completedOperations.length > 0) {
317
+ console.log(formatLogMessage('debug', `[clear_sitedata_enhanced] Cleared: ${completedOperations.join(', ')}`));
318
+ }
319
+
320
+ return {
321
+ success: completedOperations.length > 0,
322
+ operations: completedOperations,
323
+ method: 'enhanced_CDP'
324
+ };
325
+
326
+ } catch (enhancedErr) {
327
+ if (forceDebug) {
328
+ console.log(formatLogMessage('debug', `[clear_sitedata_enhanced] Failed: ${enhancedErr.message}`));
329
+ }
330
+
331
+ // Fallback to regular clearing
332
+ return await clearSiteData(page, currentUrl, forceDebug, false);
333
+
334
+ } finally {
335
+ if (clearDataSession) {
336
+ try {
337
+ await clearDataSession.detach();
338
+ } catch (detachErr) {
339
+ // Ignore detach errors
340
+ }
341
+ }
342
+ }
343
+ }
344
+
345
+ module.exports = {
346
+ clearSiteData,
347
+ clearSiteDataViaCDP,
348
+ clearSiteDataViaPage,
349
+ clearSiteDataEnhanced
350
+ };
package/lib/cloudflare.js CHANGED
@@ -19,7 +19,7 @@ const { formatLogMessage } = require('./colorize');
19
19
  /**
20
20
  * Module version information
21
21
  */
22
- const CLOUDFLARE_MODULE_VERSION = '2.6.1';
22
+ const CLOUDFLARE_MODULE_VERSION = '2.6.2';
23
23
 
24
24
  /**
25
25
  * Timeout constants for various operations (in milliseconds)
@@ -1880,4 +1880,4 @@ module.exports = {
1880
1880
  detectChallengeLoop,
1881
1881
  // Memory management
1882
1882
  cleanup
1883
- };
1883
+ };
package/nwss.1 CHANGED
@@ -280,7 +280,8 @@ Array of CSS selectors to hide elements on the page.
280
280
 
281
281
  .TP
282
282
  .B userAgent
283
- Spoof User-Agent: \fB"chrome"\fR, \fB"firefox"\fR, or \fB"safari"\fR.
283
+ Spoof User-Agent: \fB"chrome"\fR, \fB"chrome_mac"\fR, \fB"chrome_linux"\fR, \fB"firefox"\fR, \fB"firefox_mac"\fR, \fB"firefox_linux"\fR, or \fB"safari"\fR.
284
+
284
285
 
285
286
  .TP
286
287
  .B interact
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v2.0.6 ===
1
+ // === Network scanner script (nwss.js) v2.0.8 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -44,6 +44,8 @@ const { performPageInteraction, createInteractionConfig } = require('./lib/inter
44
44
  const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
45
45
  const { createSmartCache } = require('./lib/smart-cache'); // Smart cache system
46
46
  const { clearPersistentCache } = require('./lib/smart-cache');
47
+ // Enhanced site data clearing functionality
48
+ const { clearSiteData } = require('./lib/clear_sitedata');
47
49
 
48
50
  // Fast setTimeout helper for Puppeteer 22.x compatibility
49
51
  // Uses standard Promise constructor for better performance than node:timers/promises
@@ -127,7 +129,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
127
129
  const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup, performRealtimeWindowCleanup, trackPageForRealtime, updatePageUsage } = require('./lib/browserhealth');
128
130
 
129
131
  // --- Script Configuration & Constants ---
130
- const VERSION = '2.0.6'; // Script version
132
+ const VERSION = '2.0.8'; // Script version
131
133
 
132
134
  // get startTime
133
135
  const startTime = Date.now();
@@ -505,7 +507,7 @@ Redirect Handling Options:
505
507
  resourceTypes: ["script", "stylesheet"] Only process requests of these resource types (default: all types)
506
508
  interact: true/false Simulate mouse movements/clicks
507
509
  isBrave: true/false Spoof Brave browser detection
508
- userAgent: "chrome"|"firefox"|"safari" Custom desktop User-Agent
510
+ userAgent: "chrome"|"chrome_mac"|"chrome_linux"|"firefox"|"firefox_mac"|"firefox_linux"|"safari" Custom desktop User-Agent
509
511
  interact_intensity: "low"|"medium"|"high" Interaction simulation intensity (default: medium)
510
512
  delay: <milliseconds> Delay after load (default: 4000)
511
513
  reload: <number> Reload page n times after load (default: 1)
@@ -1926,24 +1928,10 @@ function setupFrameHandling(page, forceDebug) {
1926
1928
 
1927
1929
  if (siteConfig.clear_sitedata === true) {
1928
1930
  try {
1929
- let clearDataSession = null;
1930
- try {
1931
- clearDataSession = await page.target().createCDPSession();
1932
- await clearDataSession.send('Network.clearBrowserCookies');
1933
- await clearDataSession.send('Network.clearBrowserCache');
1934
- } finally {
1935
- if (clearDataSession) {
1936
- try { await clearDataSession.detach(); } catch (detachErr) { /* ignore */ }
1937
- }
1938
- }
1939
- await page.evaluate(() => {
1940
- localStorage.clear();
1941
- sessionStorage.clear();
1942
- indexedDB.databases().then(dbs => dbs.forEach(db => indexedDB.deleteDatabase(db.name)));
1943
- });
1931
+ const clearResult = await clearSiteData(page, currentUrl, forceDebug);
1944
1932
  if (forceDebug) console.log(formatLogMessage('debug', `Cleared site data for ${currentUrl}`));
1945
1933
  } catch (clearErr) {
1946
- console.warn(messageColors.warn(`[clear_sitedata failed] ${currentUrl}: ${clearErr.message}`));
1934
+ if (forceDebug) console.log(formatLogMessage('debug', `[clear_sitedata] Failed for ${currentUrl}: ${clearErr.message}`));
1947
1935
  }
1948
1936
  }
1949
1937
 
@@ -1969,7 +1957,11 @@ function setupFrameHandling(page, forceDebug) {
1969
1957
  if (useCurl && siteConfig.userAgent) {
1970
1958
  const userAgents = {
1971
1959
  chrome: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
1960
+ chrome_mac: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
1961
+ chrome_linux: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
1972
1962
  firefox: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
1963
+ firefox_mac: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:142.0) Gecko/20100101 Firefox/142.0",
1964
+ firefox_linux: "Mozilla/5.0 (X11; Linux x86_64; rv:142.0) Gecko/20100101 Firefox/142.0",
1973
1965
  safari: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"
1974
1966
  };
1975
1967
  curlUserAgent = userAgents[siteConfig.userAgent.toLowerCase()] || '';
@@ -3116,24 +3108,10 @@ function setupFrameHandling(page, forceDebug) {
3116
3108
 
3117
3109
  if (siteConfig.clear_sitedata === true) {
3118
3110
  try {
3119
- let reloadClearSession = null;
3120
- try {
3121
- reloadClearSession = await page.target().createCDPSession();
3122
- await reloadClearSession.send('Network.clearBrowserCookies');
3123
- await reloadClearSession.send('Network.clearBrowserCache');
3124
- } finally {
3125
- if (reloadClearSession) {
3126
- try { await reloadClearSession.detach(); } catch (detachErr) { /* ignore */ }
3127
- }
3128
- }
3129
- await page.evaluate(() => {
3130
- localStorage.clear();
3131
- sessionStorage.clear();
3132
- indexedDB.databases().then(dbs => dbs.forEach(db => indexedDB.deleteDatabase(db.name)));
3133
- });
3111
+ const clearResult = await clearSiteData(page, currentUrl, forceDebug, true); // Quick mode for reloads
3134
3112
  if (forceDebug) console.log(formatLogMessage('debug', `Cleared site data before reload #${i} for ${currentUrl}`));
3135
3113
  } catch (reloadClearErr) {
3136
- console.warn(messageColors.warn(`[clear_sitedata before reload failed] ${currentUrl}: ${reloadClearErr.message}`));
3114
+ if (forceDebug) console.log(formatLogMessage('debug', `[clear_sitedata] Before reload failed for ${currentUrl}`));
3137
3115
  }
3138
3116
  }
3139
3117
 
@@ -3393,10 +3371,13 @@ function setupFrameHandling(page, forceDebug) {
3393
3371
  // Hang detection for debugging concurrency issues
3394
3372
  let currentBatchInfo = { batchStart: 0, batchSize: 0 };
3395
3373
  const hangDetectionInterval = setInterval(() => {
3374
+ // Only show hang detection messages in debug mode
3375
+ if (forceDebug) {
3396
3376
  const currentBatch = Math.floor(currentBatchInfo.batchStart / RESOURCE_CLEANUP_INTERVAL) + 1;
3397
3377
  const totalBatches = Math.ceil(totalUrls / RESOURCE_CLEANUP_INTERVAL);
3398
3378
  console.log(formatLogMessage('debug', `[HANG CHECK] Processed: ${processedUrlCount}/${totalUrls} URLs, Batch: ${currentBatch}/${totalBatches}, Current batch size: ${currentBatchInfo.batchSize}`));
3399
3379
  console.log(formatLogMessage('debug', `[HANG CHECK] URLs since cleanup: ${urlsSinceLastCleanup}, Recent failures: ${results.slice(-3).filter(r => !r.success).length}/3`));
3380
+ }
3400
3381
  }, 30000); // Check every 30 seconds
3401
3382
 
3402
3383
  // Process URLs in batches to maintain concurrency while allowing browser restarts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.6",
3
+ "version": "2.0.8",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {