@fanboynz/network-scanner 2.0.20 → 2.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/lib/dry-run.js +515 -0
  2. package/nwss.1 +32 -1
  3. package/nwss.js +93 -143
  4. package/package.json +1 -1
package/lib/dry-run.js ADDED
@@ -0,0 +1,515 @@
1
+ // === Dry Run Module (dry-run.js) ===
2
+ // Handles dry run mode functionality for network scanner
3
+
4
+ const fs = require('fs');
5
+ const { messageColors, formatLogMessage } = require('./colorize');
6
+
7
+ // Constants for dry run collection keys
8
+ const DRY_RUN_KEYS = {
9
+ MATCHES: 'dryRunMatches',
10
+ NET_TOOLS: 'dryRunNetTools',
11
+ SEARCH_STRING: 'dryRunSearchString'
12
+ };
13
+
14
+ /**
15
+ * Initialize dry run collections for a matched domains map
16
+ * @param {Map} matchedDomains - The matched domains map to initialize
17
+ * @throws {Error} If matchedDomains is not a Map instance
18
+ */
19
+ function initializeDryRunCollections(matchedDomains) {
20
+ if (!(matchedDomains instanceof Map)) {
21
+ throw new Error('matchedDomains must be a Map instance for dry-run mode');
22
+ }
23
+
24
+ matchedDomains.set(DRY_RUN_KEYS.MATCHES, []);
25
+ matchedDomains.set(DRY_RUN_KEYS.NET_TOOLS, []);
26
+ matchedDomains.set(DRY_RUN_KEYS.SEARCH_STRING, new Map());
27
+ }
28
+
29
+ /**
30
+ * Validates match data object structure
31
+ * @param {Object} matchData - Match data to validate
32
+ * @throws {Error} If matchData is invalid
33
+ */
34
+ function validateMatchData(matchData) {
35
+ if (!matchData || typeof matchData !== 'object') {
36
+ throw new Error('Match data must be an object');
37
+ }
38
+
39
+ const requiredFields = ['regex', 'domain', 'resourceType', 'fullUrl'];
40
+ for (const field of requiredFields) {
41
+ if (!(field in matchData)) {
42
+ throw new Error(`Match data missing required field: ${field}`);
43
+ }
44
+ }
45
+ }
46
+
47
+ /**
48
+ * Validates nettools data object structure
49
+ * @param {Object} netToolsData - NetTools data to validate
50
+ * @throws {Error} If netToolsData is invalid
51
+ */
52
+ function validateNetToolsData(netToolsData) {
53
+ if (!netToolsData || typeof netToolsData !== 'object') {
54
+ throw new Error('NetTools data must be an object');
55
+ }
56
+
57
+ const requiredFields = ['domain', 'tool', 'matchType', 'matchedTerm'];
58
+ for (const field of requiredFields) {
59
+ if (!(field in netToolsData)) {
60
+ throw new Error(`NetTools data missing required field: ${field}`);
61
+ }
62
+ }
63
+ }
64
+
65
+ /**
66
+ * Add a match to dry run collections
67
+ * @param {Map} matchedDomains - The matched domains map
68
+ * @param {Object} matchData - Match data object
69
+ * @throws {Error} If parameters are invalid
70
+ */
71
+ function addDryRunMatch(matchedDomains, matchData) {
72
+ if (!(matchedDomains instanceof Map)) {
73
+ throw new Error('matchedDomains must be a Map instance');
74
+ }
75
+
76
+ validateMatchData(matchData);
77
+
78
+ if (!matchedDomains.has(DRY_RUN_KEYS.MATCHES)) {
79
+ throw new Error('Dry run collections not initialized. Call initializeDryRunCollections first.');
80
+ }
81
+
82
+ matchedDomains.get(DRY_RUN_KEYS.MATCHES).push({
83
+ ...matchData,
84
+ timestamp: new Date().toISOString()
85
+ });
86
+ }
87
+
88
+ /**
89
+ * Add a nettools result to dry run collections
90
+ * @param {Map} matchedDomains - The matched domains map
91
+ * @param {Object} netToolsData - NetTools result data
92
+ * @throws {Error} If parameters are invalid
93
+ */
94
+ function addDryRunNetTools(matchedDomains, netToolsData) {
95
+ if (!(matchedDomains instanceof Map)) {
96
+ throw new Error('matchedDomains must be a Map instance');
97
+ }
98
+
99
+ validateNetToolsData(netToolsData);
100
+
101
+ if (!matchedDomains.has(DRY_RUN_KEYS.NET_TOOLS)) {
102
+ throw new Error('Dry run collections not initialized. Call initializeDryRunCollections first.');
103
+ }
104
+
105
+ matchedDomains.get(DRY_RUN_KEYS.NET_TOOLS).push({
106
+ ...netToolsData,
107
+ timestamp: new Date().toISOString()
108
+ });
109
+ }
110
+
111
+ /**
112
+ * Add a search string result to dry run collections
113
+ * @param {Map} matchedDomains - The matched domains map
114
+ * @param {string} url - The URL that was searched
115
+ * @param {Object} searchResult - Search result data
116
+ * @throws {Error} If parameters are invalid
117
+ */
118
+ function addDryRunSearchString(matchedDomains, url, searchResult) {
119
+ if (!(matchedDomains instanceof Map)) {
120
+ throw new Error('matchedDomains must be a Map instance');
121
+ }
122
+
123
+ if (!url || typeof url !== 'string') {
124
+ throw new Error('URL must be a non-empty string');
125
+ }
126
+
127
+ if (!searchResult || typeof searchResult !== 'object') {
128
+ throw new Error('Search result must be an object');
129
+ }
130
+
131
+ if (!matchedDomains.has(DRY_RUN_KEYS.SEARCH_STRING)) {
132
+ throw new Error('Dry run collections not initialized. Call initializeDryRunCollections first.');
133
+ }
134
+
135
+ matchedDomains.get(DRY_RUN_KEYS.SEARCH_STRING).set(url, {
136
+ ...searchResult,
137
+ timestamp: new Date().toISOString()
138
+ });
139
+ }
140
+
141
+ /**
142
+ * Safely truncate long URLs for display
143
+ * @param {string} url - URL to truncate
144
+ * @param {number} maxLength - Maximum length to display
145
+ * @returns {string} Truncated URL with ellipsis if needed
146
+ */
147
+ function truncateUrl(url, maxLength = 80) {
148
+ if (!url || url.length <= maxLength) {
149
+ return url;
150
+ }
151
+ return url.substring(0, maxLength - 3) + '...';
152
+ }
153
+
154
+ /**
155
+ * Format search string match information
156
+ * @param {Object} searchStringMatch - Search string match data
157
+ * @returns {string} Formatted match description
158
+ */
159
+ function formatSearchStringMatch(searchStringMatch) {
160
+ if (!searchStringMatch) return null;
161
+
162
+ const matchType = searchStringMatch.type || 'unknown';
163
+ const term = searchStringMatch.term || 'unknown';
164
+ return `${matchType} - "${term}"`;
165
+ }
166
+
167
+ /**
168
+ * Generate adblock rule from domain and resource type
169
+ * @param {string} domain - Domain name
170
+ * @param {string} resourceType - Resource type (optional)
171
+ * @returns {string} Formatted adblock rule
172
+ */
173
+ function generateAdblockRule(domain, resourceType = null) {
174
+ if (!domain) return '';
175
+
176
+ if (resourceType && resourceType !== 'other') {
177
+ return `||${domain}^${resourceType}`;
178
+ }
179
+ return `||${domain}^`;
180
+ }
181
+
182
+ /**
183
+ * Outputs dry run results to console with formatted display
184
+ * If outputFile is specified, also captures output for file writing
185
+ * @param {string} url - The URL being processed
186
+ * @param {Array} matchedItems - Array of matched items with regex, domain, and resource type
187
+ * @param {Array} netToolsResults - Array of whois/dig results
188
+ * @param {string} pageTitle - Title of the page (if available)
189
+ * @param {string} outputFile - Output file path (optional)
190
+ * @param {Array} dryRunOutput - Array to collect output lines for file writing
191
+ */
192
+ function outputDryRunResults(url, matchedItems = [], netToolsResults = [], pageTitle = '', outputFile = null, dryRunOutput = []) {
193
+ try {
194
+ const lines = [];
195
+ const truncatedUrl = truncateUrl(url);
196
+
197
+ lines.push(`\n=== DRY RUN RESULTS === ${truncatedUrl}`);
198
+ console.log(`\n${messageColors.scanning('=== DRY RUN RESULTS ===')} ${truncatedUrl}`);
199
+
200
+ if (pageTitle && pageTitle.trim()) {
201
+ const cleanTitle = pageTitle.trim().substring(0, 200); // Limit title length
202
+ lines.push(`Title: ${cleanTitle}`);
203
+ console.log(`${messageColors.info('Title:')} ${cleanTitle}`);
204
+ }
205
+
206
+ const totalMatches = matchedItems.length + netToolsResults.length;
207
+
208
+ if (totalMatches === 0) {
209
+ const noMatchMsg = `No matching rules found on ${truncatedUrl}`;
210
+ lines.push(noMatchMsg);
211
+
212
+ if (outputFile) {
213
+ dryRunOutput.push(...lines);
214
+ dryRunOutput.push(''); // Add empty line
215
+ }
216
+ console.log(messageColors.warn(noMatchMsg));
217
+ return;
218
+ }
219
+
220
+ lines.push(`Matches found: ${totalMatches}`);
221
+ console.log(`${messageColors.success('Matches found:')} ${totalMatches}`);
222
+
223
+ // Process regex matches
224
+ matchedItems.forEach((item, index) => {
225
+ try {
226
+ lines.push('');
227
+ lines.push(`[${index + 1}] Regex Match:`);
228
+ lines.push(` Pattern: ${item.regex || 'unknown'}`);
229
+ lines.push(` Domain: ${item.domain || 'unknown'}`);
230
+ lines.push(` Resource Type: ${item.resourceType || 'unknown'}`);
231
+ lines.push(` Full URL: ${truncateUrl(item.fullUrl || '')}`);
232
+
233
+ console.log(`\n${messageColors.highlight(`[${index + 1}]`)} ${messageColors.match('Regex Match:')}`);
234
+ console.log(` Pattern: ${item.regex || 'unknown'}`);
235
+ console.log(` Domain: ${item.domain || 'unknown'}`);
236
+ console.log(` Resource Type: ${item.resourceType || 'unknown'}`);
237
+ console.log(` Full URL: ${truncateUrl(item.fullUrl || '')}`);
238
+
239
+ // Show blocked status if applicable
240
+ if (item.wasBlocked) {
241
+ lines.push(` Status: BLOCKED (even_blocked enabled)`);
242
+ console.log(` ${messageColors.warn('Status:')} BLOCKED (even_blocked enabled)`);
243
+ }
244
+
245
+ // Show searchstring results if available
246
+ if (item.searchStringMatch) {
247
+ const matchDesc = formatSearchStringMatch(item.searchStringMatch);
248
+ lines.push(` ? Searchstring Match: ${matchDesc}`);
249
+ console.log(` ${messageColors.success('? Searchstring Match:')} ${matchDesc}`);
250
+ } else if (item.searchStringChecked) {
251
+ lines.push(` ? Searchstring: No matches found in content`);
252
+ console.log(` ${messageColors.warn('? Searchstring:')} No matches found in content`);
253
+ }
254
+
255
+ // Generate adblock rule
256
+ const adblockRule = generateAdblockRule(item.domain, item.resourceType);
257
+ lines.push(` Adblock Rule: ${adblockRule}`);
258
+ console.log(` ${messageColors.info('Adblock Rule:')} ${adblockRule}`);
259
+
260
+ } catch (itemErr) {
261
+ const errorMsg = `Error processing match item ${index + 1}: ${itemErr.message}`;
262
+ lines.push(` Error: ${errorMsg}`);
263
+ console.log(` ${messageColors.warn('Error:')} ${errorMsg}`);
264
+ }
265
+ });
266
+
267
+ // Process nettools results
268
+ netToolsResults.forEach((result, index) => {
269
+ try {
270
+ const resultIndex = matchedItems.length + index + 1;
271
+ lines.push('');
272
+ lines.push(`[${resultIndex}] NetTools Match:`);
273
+ lines.push(` Domain: ${result.domain || 'unknown'}`);
274
+ lines.push(` Tool: ${(result.tool || 'unknown').toUpperCase()}`);
275
+
276
+ const matchDesc = `${result.matchType || 'unknown'} - "${result.matchedTerm || 'unknown'}"`;
277
+ lines.push(` ? Match: ${matchDesc}`);
278
+
279
+ if (result.details) {
280
+ lines.push(` Details: ${result.details}`);
281
+ }
282
+
283
+ console.log(`\n${messageColors.highlight(`[${resultIndex}]`)} ${messageColors.match('NetTools Match:')}`);
284
+ console.log(` Domain: ${result.domain || 'unknown'}`);
285
+ console.log(` Tool: ${(result.tool || 'unknown').toUpperCase()}`);
286
+ console.log(` ${messageColors.success('? Match:')} ${matchDesc}`);
287
+
288
+ if (result.details) {
289
+ console.log(` Details: ${result.details}`);
290
+ }
291
+
292
+ // Generate adblock rule for nettools matches
293
+ const adblockRule = generateAdblockRule(result.domain);
294
+ lines.push(` Adblock Rule: ${adblockRule}`);
295
+ console.log(` ${messageColors.info('Adblock Rule:')} ${adblockRule}`);
296
+
297
+ } catch (resultErr) {
298
+ const errorMsg = `Error processing nettools result ${index + 1}: ${resultErr.message}`;
299
+ lines.push(` Error: ${errorMsg}`);
300
+ console.log(` ${messageColors.warn('Error:')} ${errorMsg}`);
301
+ }
302
+ });
303
+
304
+ // Store output for file writing if outputFile is specified
305
+ if (outputFile) {
306
+ dryRunOutput.push(...lines);
307
+ dryRunOutput.push(''); // Add empty line between sites
308
+ }
309
+
310
+ } catch (outputErr) {
311
+ const errorMsg = `Error in outputDryRunResults: ${outputErr.message}`;
312
+ console.error(messageColors.error(errorMsg));
313
+ if (outputFile) {
314
+ dryRunOutput.push(`Error: ${errorMsg}`);
315
+ }
316
+ }
317
+ }
318
+
319
+ /**
320
+ * Process dry run results for a URL and output them
321
+ * @param {string} currentUrl - The URL being processed
322
+ * @param {Map} matchedDomains - The matched domains map with dry run collections
323
+ * @param {Object} page - Puppeteer page object for getting title
324
+ * @param {string} outputFile - Output file path (optional)
325
+ * @param {Array} dryRunOutput - Array to collect output lines for file writing
326
+ * @param {boolean} forceDebug - Debug logging flag
327
+ * @returns {Object} Dry run result summary
328
+ */
329
+ async function processDryRunResults(currentUrl, matchedDomains, page, outputFile = null, dryRunOutput = [], forceDebug = false) {
330
+ try {
331
+ // Validate inputs
332
+ if (!currentUrl || typeof currentUrl !== 'string') {
333
+ throw new Error('currentUrl must be a non-empty string');
334
+ }
335
+
336
+ if (!(matchedDomains instanceof Map)) {
337
+ throw new Error('matchedDomains must be a Map instance');
338
+ }
339
+
340
+ // Get page title for dry run output with error handling
341
+ let pageTitle = '';
342
+ try {
343
+ if (page && typeof page.title === 'function') {
344
+ pageTitle = await page.title();
345
+ }
346
+ } catch (titleErr) {
347
+ if (forceDebug) {
348
+ console.log(formatLogMessage('debug', `Failed to get page title for ${currentUrl}: ${titleErr.message}`));
349
+ }
350
+ pageTitle = 'Title unavailable';
351
+ }
352
+
353
+ // Get collected matches with safe fallbacks
354
+ const dryRunMatches = matchedDomains.get(DRY_RUN_KEYS.MATCHES) || [];
355
+ const dryRunNetTools = matchedDomains.get(DRY_RUN_KEYS.NET_TOOLS) || [];
356
+ const dryRunSearchString = matchedDomains.get(DRY_RUN_KEYS.SEARCH_STRING) || new Map();
357
+
358
+ // Enhance matches with searchstring results
359
+ const enhancedMatches = dryRunMatches.map((match, index) => {
360
+ try {
361
+ const searchResult = dryRunSearchString.get(match.fullUrl);
362
+ return {
363
+ ...match,
364
+ searchStringMatch: searchResult && searchResult.matched ? searchResult : null,
365
+ searchStringChecked: Boolean(match.needsSearchStringCheck)
366
+ };
367
+ } catch (enhanceErr) {
368
+ if (forceDebug) {
369
+ console.log(formatLogMessage('debug', `Error enhancing match ${index}: ${enhanceErr.message}`));
370
+ }
371
+ return {
372
+ ...match,
373
+ searchStringMatch: null,
374
+ searchStringChecked: false
375
+ };
376
+ }
377
+ });
378
+
379
+ outputDryRunResults(currentUrl, enhancedMatches, dryRunNetTools, pageTitle, outputFile, dryRunOutput);
380
+
381
+ const totalMatches = enhancedMatches.length + dryRunNetTools.length;
382
+
383
+ return {
384
+ success: true,
385
+ matchCount: totalMatches,
386
+ enhancedMatches,
387
+ netToolsResults: dryRunNetTools,
388
+ pageTitle,
389
+ regexMatches: enhancedMatches.length,
390
+ netToolsMatches: dryRunNetTools.length
391
+ };
392
+
393
+ } catch (processErr) {
394
+ const errorMsg = `Error processing dry run results for ${currentUrl}: ${processErr.message}`;
395
+ console.error(messageColors.error(errorMsg));
396
+
397
+ if (forceDebug) {
398
+ console.log(formatLogMessage('debug', `Stack trace: ${processErr.stack}`));
399
+ }
400
+
401
+ return {
402
+ success: false,
403
+ error: errorMsg,
404
+ matchCount: 0,
405
+ enhancedMatches: [],
406
+ netToolsResults: [],
407
+ pageTitle: '',
408
+ regexMatches: 0,
409
+ netToolsMatches: 0
410
+ };
411
+ }
412
+ }
413
+
414
+ /**
415
+ * Write dry run output to file with enhanced error handling
416
+ * @param {string} outputFile - Output file path
417
+ * @param {Array} dryRunOutput - Array of output lines
418
+ * @param {boolean} silentMode - Silent mode flag
419
+ * @returns {Object} Operation result with details
420
+ */
421
+ function writeDryRunOutput(outputFile, dryRunOutput, silentMode = false) {
422
+ try {
423
+ if (!outputFile || typeof outputFile !== 'string') {
424
+ return { success: false, error: 'Invalid output file path' };
425
+ }
426
+
427
+ if (!Array.isArray(dryRunOutput) || dryRunOutput.length === 0) {
428
+ if (!silentMode) {
429
+ console.log(messageColors.info('No dry run output to write'));
430
+ }
431
+ return { success: true, written: false, reason: 'No output to write' };
432
+ }
433
+
434
+ const dryRunContent = dryRunOutput.join('\n');
435
+
436
+ // Ensure output directory exists
437
+ const path = require('path');
438
+ const outputDir = path.dirname(outputFile);
439
+ if (outputDir !== '.' && !fs.existsSync(outputDir)) {
440
+ fs.mkdirSync(outputDir, { recursive: true });
441
+ }
442
+
443
+ fs.writeFileSync(outputFile, dryRunContent);
444
+
445
+ if (!silentMode) {
446
+ console.log(`${messageColors.fileOp('?? Dry run results saved to:')} ${outputFile}`);
447
+ }
448
+
449
+ return {
450
+ success: true,
451
+ written: true,
452
+ file: outputFile,
453
+ lines: dryRunOutput.length,
454
+ bytes: Buffer.byteLength(dryRunContent, 'utf8')
455
+ };
456
+
457
+ } catch (writeErr) {
458
+ const errorMsg = `Failed to write dry run output to ${outputFile}: ${writeErr.message}`;
459
+ console.error(`? ${errorMsg}`);
460
+
461
+ return {
462
+ success: false,
463
+ error: errorMsg,
464
+ written: false
465
+ };
466
+ }
467
+ }
468
+
469
+ /**
470
+ * Get statistics from dry run collections
471
+ * @param {Map} matchedDomains - The matched domains map
472
+ * @returns {Object} Statistics object
473
+ */
474
+ function getDryRunStats(matchedDomains) {
475
+ if (!(matchedDomains instanceof Map)) {
476
+ return { error: 'Invalid matchedDomains Map' };
477
+ }
478
+
479
+ const matches = matchedDomains.get(DRY_RUN_KEYS.MATCHES) || [];
480
+ const netTools = matchedDomains.get(DRY_RUN_KEYS.NET_TOOLS) || [];
481
+ const searchStrings = matchedDomains.get(DRY_RUN_KEYS.SEARCH_STRING) || new Map();
482
+
483
+ return {
484
+ totalMatches: matches.length + netTools.length,
485
+ regexMatches: matches.length,
486
+ netToolsMatches: netTools.length,
487
+ searchStringResults: searchStrings.size,
488
+ domains: new Set([
489
+ ...matches.map(m => m.domain).filter(Boolean),
490
+ ...netTools.map(n => n.domain).filter(Boolean)
491
+ ]).size
492
+ };
493
+ }
494
+
495
+ module.exports = {
496
+ // Constants
497
+ DRY_RUN_KEYS,
498
+
499
+ // Core functions
500
+ initializeDryRunCollections,
501
+ addDryRunMatch,
502
+ addDryRunNetTools,
503
+ addDryRunSearchString,
504
+ processDryRunResults,
505
+ writeDryRunOutput,
506
+
507
+ // Utility functions
508
+ getDryRunStats,
509
+ validateMatchData,
510
+ validateNetToolsData,
511
+ truncateUrl,
512
+ formatSearchStringMatch,
513
+ generateAdblockRule,
514
+ outputDryRunResults
515
+ };
package/nwss.1 CHANGED
@@ -303,7 +303,7 @@ Number of times to reload the page (default: 1).
303
303
 
304
304
  .TP
305
305
  .B forcereload
306
- Boolean. Force an additional reload after reloads.
306
+ Boolean or Array. Force cache-clearing reload for all URLs or specific domains. Can be \fBtrue\fR/\fBfalse\fR or array of domain names like \fB["domain1.com", "domain2.com"]\fR. When set to \fBtrue\fR, applies to all URLs in the site configuration. When set to an array, applies only to URLs whose domains match the specified domains. Supports exact hostname matching and subdomain matching (e.g., "example.com" matches both "example.com" and "subdomain.example.com"). Domain matching is case-insensitive and automatically handles protocols, ports, and paths.
307
307
 
308
308
  .TP
309
309
  .B timeout
@@ -784,6 +784,37 @@ With default settings (\fBignore_similar_threshold: 80\fR):
784
784
  }
785
785
  .EE
786
786
 
787
+ .SS Force reload examples:
788
+ .EX
789
+ # Force reload for all URLs in a site configuration
790
+ {
791
+ "url": ["https://site1.com", "https://site2.com"],
792
+ "filterRegex": "ads|tracking",
793
+ "forcereload": true
794
+ }
795
+
796
+ # Force reload only for specific domains
797
+ {
798
+ "url": [
799
+ "https://example.com/page1",
800
+ "https://test.org/page2",
801
+ "https://demo.net/page3"
802
+ ],
803
+ "filterRegex": "\\\\.(space|website)\\\\b",
804
+ "forcereload": ["example.com", "demo.net"],
805
+ "comments": [
806
+ "Only example.com and demo.net URLs get force reload",
807
+ "test.org URLs use standard reload"
808
+ ]
809
+ }
810
+
811
+ # Mixed configuration with domain-specific force reload
812
+ {
813
+ "url": "https://cdn.example.com/content/page.html",
814
+ "forcereload": ["example.com"]
815
+ }
816
+ .EE
817
+
787
818
  .SS Configuration with documentation comments:
788
819
  .EX
789
820
  {
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v2.0.20 ===
1
+ // === Network scanner script (nwss.js) v2.0.22 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -44,6 +44,8 @@ const { performPageInteraction, createInteractionConfig } = require('./lib/inter
44
44
  const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
45
45
  const { createSmartCache } = require('./lib/smart-cache'); // Smart cache system
46
46
  const { clearPersistentCache } = require('./lib/smart-cache');
47
+ // Dry run functionality
48
+ const { initializeDryRunCollections, addDryRunMatch, addDryRunNetTools, processDryRunResults, writeDryRunOutput } = require('./lib/dry-run');
47
49
  // Enhanced site data clearing functionality
48
50
  const { clearSiteData } = require('./lib/clear_sitedata');
49
51
 
@@ -130,7 +132,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
130
132
  const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup, performRealtimeWindowCleanup, trackPageForRealtime, updatePageUsage, cleanupPageBeforeReload } = require('./lib/browserhealth');
131
133
 
132
134
  // --- Script Configuration & Constants ---
133
- const VERSION = '2.0.20'; // Script version
135
+ const VERSION = '2.0.22'; // Script version
134
136
 
135
137
  // get startTime
136
138
  const startTime = Date.now();
@@ -516,7 +518,7 @@ Redirect Handling Options:
516
518
  interact_intensity: "low"|"medium"|"high" Interaction simulation intensity (default: medium)
517
519
  delay: <milliseconds> Delay after load (default: 4000)
518
520
  reload: <number> Reload page n times after load (default: 1)
519
- forcereload: true/false Force an additional reload after reloads
521
+ forcereload: true/false or ["domain1.com", "domain2.com"] Force cache-clearing reload for all URLs or specific domains
520
522
  clear_sitedata: true/false Clear all cookies, cache, storage before each load (default: false)
521
523
  subDomains: 1/0 Output full subdomains (default: 0)
522
524
  localhost: true/false Force localhost output (127.0.0.1)
@@ -953,103 +955,6 @@ function shouldEnableCDPForUrl(url, cdpSpecificList) {
953
955
  }
954
956
  }
955
957
 
956
- /**
957
- * Outputs dry run results to console with formatted display
958
- * If outputFile is specified, also captures output for file writing
959
- * @param {string} url - The URL being processed
960
- * @param {Array} matchedItems - Array of matched items with regex, domain, and resource type
961
- * @param {Array} netToolsResults - Array of whois/dig results
962
- * @param {string} pageTitle - Title of the page (if available)
963
- */
964
- function outputDryRunResults(url, matchedItems, netToolsResults, pageTitle) {
965
- const lines = [];
966
-
967
- lines.push(`\n=== DRY RUN RESULTS === ${url}`);
968
-
969
- console.log(`\n${messageColors.scanning('=== DRY RUN RESULTS ===')} ${url}`);
970
-
971
- if (pageTitle && pageTitle.trim()) {
972
- lines.push(`Title: ${pageTitle.trim()}`);
973
- console.log(`${messageColors.info('Title:')} ${pageTitle.trim()}`);
974
- }
975
-
976
- if (matchedItems.length === 0 && netToolsResults.length === 0) {
977
- lines.push(`No matching rules found on ${url}`);
978
-
979
- // Store output for file writing if outputFile is specified
980
- if (outputFile) {
981
- dryRunOutput.push(...lines);
982
- dryRunOutput.push(''); // Add empty line
983
- }
984
- console.log(messageColors.warn(`No matching rules found on ${url}`));
985
- return;
986
- }
987
-
988
- const totalMatches = matchedItems.length + netToolsResults.length;
989
- lines.push(`Matches found: ${totalMatches}`);
990
- console.log(`${messageColors.success('Matches found:')} ${totalMatches}`);
991
-
992
- matchedItems.forEach((item, index) => {
993
- lines.push('');
994
- lines.push(`[${index + 1}] Regex Match:`);
995
- lines.push(` Pattern: ${item.regex}`);
996
- lines.push(` Domain: ${item.domain}`);
997
- lines.push(` Resource Type: ${item.resourceType}`);
998
- lines.push(` Full URL: ${item.fullUrl}`);
999
-
1000
- console.log(`\n${messageColors.highlight(`[${index + 1}]`)} ${messageColors.match('Regex Match:')}`);
1001
- console.log(` Pattern: ${item.regex}`);
1002
- console.log(` Domain: ${item.domain}`);
1003
- console.log(` Resource Type: ${item.resourceType}`);
1004
- console.log(` Full URL: ${item.fullUrl}`);
1005
-
1006
- // Show searchstring results if available
1007
- if (item.searchStringMatch) {
1008
- lines.push(` ✓ Searchstring Match: ${item.searchStringMatch.type} - "${item.searchStringMatch.term}"`);
1009
- console.log(` ${messageColors.success('✓ Searchstring Match:')} ${item.searchStringMatch.type} - "${item.searchStringMatch.term}"`);
1010
- } else if (item.searchStringChecked) {
1011
- lines.push(` ✗ Searchstring: No matches found in content`);
1012
- console.log(` ${messageColors.warn('✗ Searchstring:')} No matches found in content`);
1013
- }
1014
-
1015
- // Generate adblock rule
1016
- const adblockRule = `||${item.domain}^$${item.resourceType}`;
1017
- lines.push(` Adblock Rule: ${adblockRule}`);
1018
- console.log(` ${messageColors.info('Adblock Rule:')} ${adblockRule}`);
1019
- });
1020
-
1021
- // Display nettools results
1022
- netToolsResults.forEach((result, index) => {
1023
- const resultIndex = matchedItems.length + index + 1;
1024
- lines.push('');
1025
- lines.push(`[${resultIndex}] NetTools Match:`);
1026
- lines.push(` Domain: ${result.domain}`);
1027
- lines.push(` Tool: ${result.tool.toUpperCase()}`);
1028
- lines.push(` ✓ Match: ${result.matchType} - "${result.matchedTerm}"`);
1029
- if (result.details) {
1030
- lines.push(` Details: ${result.details}`);
1031
- }
1032
- console.log(`\n${messageColors.highlight(`[${resultIndex}]`)} ${messageColors.match('NetTools Match:')}`);
1033
- console.log(` Domain: ${result.domain}`);
1034
- console.log(` Tool: ${result.tool.toUpperCase()}`);
1035
- console.log(` ${messageColors.success('✓ Match:')} ${result.matchType} - "${result.matchedTerm}"`);
1036
- if (result.details) {
1037
- console.log(` Details: ${result.details}`);
1038
- }
1039
-
1040
- // Generate adblock rule for nettools matches
1041
- const adblockRule = `||${result.domain}^`;
1042
- lines.push(` Adblock Rule: ${adblockRule}`);
1043
- console.log(` ${messageColors.info('Adblock Rule:')} ${adblockRule}`);
1044
- });
1045
-
1046
- // Store output for file writing if outputFile is specified
1047
- if (outputFile) {
1048
- dryRunOutput.push(...lines);
1049
- dryRunOutput.push(''); // Add empty line between sites
1050
- }
1051
- }
1052
-
1053
958
  /**
1054
959
  * Helper function to check if a URL should be processed (valid HTTP/HTTPS)
1055
960
  * @param {string} url - URL to validate
@@ -1543,9 +1448,7 @@ function setupFrameHandling(page, forceDebug) {
1543
1448
 
1544
1449
  // Initialize dry run matches collection
1545
1450
  if (dryRunMode) {
1546
- matchedDomains.set('dryRunMatches', []);
1547
- matchedDomains.set('dryRunNetTools', []);
1548
- matchedDomains.set('dryRunSearchString', new Map()); // Map URL to search results
1451
+ initializeDryRunCollections(matchedDomains);
1549
1452
  }
1550
1453
  const timeout = siteConfig.timeout || TIMEOUTS.DEFAULT_PAGE;
1551
1454
 
@@ -2434,7 +2337,7 @@ function setupFrameHandling(page, forceDebug) {
2434
2337
  const allowedResourceTypes = siteConfig.resourceTypes;
2435
2338
  if (!allowedResourceTypes || !Array.isArray(allowedResourceTypes) || allowedResourceTypes.includes(resourceType)) {
2436
2339
  if (dryRunMode) {
2437
- matchedDomains.get('dryRunMatches').push({
2340
+ addDryRunMatch(matchedDomains, {
2438
2341
  regex: matchedRegexPattern,
2439
2342
  domain: reqDomain,
2440
2343
  resourceType: resourceType,
@@ -2598,7 +2501,7 @@ function setupFrameHandling(page, forceDebug) {
2598
2501
  // If NO searchstring AND NO nettools are defined, match immediately (existing behavior)
2599
2502
  if (!hasSearchString && !hasSearchStringAnd && !hasNetTools) {
2600
2503
  if (dryRunMode) {
2601
- matchedDomains.get('dryRunMatches').push({
2504
+ addDryRunMatch(matchedDomains, {
2602
2505
  regex: matchedRegexPattern,
2603
2506
  domain: reqDomain,
2604
2507
  resourceType: resourceType,
@@ -2641,8 +2544,7 @@ function setupFrameHandling(page, forceDebug) {
2641
2544
  }
2642
2545
 
2643
2546
  if (dryRunMode) {
2644
- // For dry run, we'll collect the domain for nettools checking
2645
- matchedDomains.get('dryRunMatches').push({
2547
+ addDryRunMatch(matchedDomains, {
2646
2548
  regex: matchedRegexPattern,
2647
2549
  domain: reqDomain,
2648
2550
  resourceType: resourceType,
@@ -2722,7 +2624,7 @@ function setupFrameHandling(page, forceDebug) {
2722
2624
  console.log(formatLogMessage('debug', `${reqUrl} matched regex ${matchedRegexPattern} and resourceType ${resourceType}, queued for ${searchType} content search`));
2723
2625
  }
2724
2626
  if (dryRunMode) {
2725
- matchedDomains.get('dryRunMatches').push({
2627
+ addDryRunMatch(matchedDomains, {
2726
2628
  regex: matchedRegexPattern,
2727
2629
  domain: reqDomain,
2728
2630
  resourceType: resourceType,
@@ -3202,7 +3104,80 @@ function setupFrameHandling(page, forceDebug) {
3202
3104
  updatePageUsage(page, true);
3203
3105
 
3204
3106
  const totalReloads = (siteConfig.reload || 1) - 1; // Subtract 1 because initial load counts as first
3205
- const useForceReload = siteConfig.forcereload === true;
3107
+
3108
+ // Enhanced forcereload logic: support boolean or domain array
3109
+ let useForceReload = false;
3110
+ if (siteConfig.forcereload === true) {
3111
+ // Original behavior: force reload for all URLs
3112
+ useForceReload = true;
3113
+ } else if (Array.isArray(siteConfig.forcereload)) {
3114
+ // Input validation: filter out invalid entries
3115
+ const validDomains = siteConfig.forcereload.filter(domain => {
3116
+ if (typeof domain !== 'string') {
3117
+ if (forceDebug) {
3118
+ console.log(formatLogMessage('debug', `Invalid forcereload entry (not string): ${typeof domain} - ${JSON.stringify(domain)}`));
3119
+ }
3120
+ return false;
3121
+ }
3122
+
3123
+ if (domain.trim() === '') {
3124
+ if (forceDebug) {
3125
+ console.log(formatLogMessage('debug', `Invalid forcereload entry (empty string)`));
3126
+ }
3127
+ return false;
3128
+ }
3129
+
3130
+ return true;
3131
+ });
3132
+
3133
+ if (validDomains.length === 0) {
3134
+ if (forceDebug) {
3135
+ console.log(formatLogMessage('debug', `No valid domains in forcereload array for ${currentUrl}`));
3136
+ }
3137
+ useForceReload = false;
3138
+ } else {
3139
+ // New behavior: force reload only for matching domains
3140
+ const currentDomain = safeGetDomain(currentUrl, true); // Get full hostname
3141
+ const currentRootDomain = safeGetDomain(currentUrl, false); // Get root domain
3142
+
3143
+ useForceReload = validDomains.some(domain => {
3144
+ // Enhanced domain cleaning: handle protocols, ports, paths, and normalize case
3145
+ let cleanDomain = domain.trim();
3146
+ cleanDomain = cleanDomain.replace(/^https?:\/\//, ''); // Remove protocol
3147
+ cleanDomain = cleanDomain.replace(/:\d+$/, ''); // Remove port (e.g., :8080)
3148
+ cleanDomain = cleanDomain.replace(/\/.*$/, ''); // Remove path
3149
+ cleanDomain = cleanDomain.toLowerCase(); // Normalize case
3150
+
3151
+ // Additional validation: basic domain format check
3152
+ if (!/^[a-z0-9.-]+$/.test(cleanDomain)) {
3153
+ if (forceDebug) {
3154
+ console.log(formatLogMessage('debug', `Skipping invalid domain format in forcereload: ${domain} -> ${cleanDomain}`));
3155
+ }
3156
+ return false;
3157
+ }
3158
+
3159
+ // Check if current URL matches this domain
3160
+ // Support both exact hostname match and subdomain match
3161
+ if (currentDomain.toLowerCase() === cleanDomain || currentRootDomain.toLowerCase() === cleanDomain) {
3162
+ return true;
3163
+ }
3164
+
3165
+ // Check if current hostname ends with the domain (subdomain match)
3166
+ if (currentDomain.toLowerCase().endsWith('.' + cleanDomain)) {
3167
+ return true;
3168
+ }
3169
+
3170
+ return false;
3171
+ });
3172
+ }
3173
+
3174
+ if (forceDebug && useForceReload) {
3175
+ console.log(formatLogMessage('debug', `Force reload enabled for ${currentUrl} - matches domain in forcereload list`));
3176
+ } else if (forceDebug && validDomains.length > 0) {
3177
+ console.log(formatLogMessage('debug', `Force reload not applied for ${currentUrl} - no domain match in [${validDomains.join(', ')}]`));
3178
+ }
3179
+ }
3180
+ // If forcereload is not specified, false, or any other value, useForceReload remains false
3206
3181
 
3207
3182
  if (useForceReload && forceDebug) {
3208
3183
  console.log(formatLogMessage('debug', `Using force reload mechanism for all ${totalReloads + 1} reload(s) on ${currentUrl}`));
@@ -3360,38 +3335,18 @@ function setupFrameHandling(page, forceDebug) {
3360
3335
  updatePageUsage(page, false);
3361
3336
 
3362
3337
  if (dryRunMode) {
3363
- // Get page title for dry run output
3364
- let pageTitle = '';
3365
- try {
3366
- pageTitle = await page.title();
3367
- } catch (titleErr) {
3368
- if (forceDebug) {
3369
- console.log(formatLogMessage('debug', `Failed to get page title for ${currentUrl}: ${titleErr.message}`));
3370
- }
3338
+ // Process dry run results using the module
3339
+ const dryRunResult = await processDryRunResults(currentUrl, matchedDomains, page, outputFile, dryRunOutput, forceDebug);
3340
+
3341
+ if (!dryRunResult.success) {
3342
+ console.warn(messageColors.warn(`Dry run processing failed for ${currentUrl}: ${dryRunResult.error}`));
3371
3343
  }
3372
3344
 
3373
- // Get collected matches and enhance with searchstring results
3374
- const dryRunMatches = matchedDomains.get('dryRunMatches') || [];
3375
- const dryRunNetTools = matchedDomains.get('dryRunNetTools') || [];
3376
- const dryRunSearchString = matchedDomains.get('dryRunSearchString') || new Map();
3377
-
3378
- // Enhance matches with searchstring results
3379
- const enhancedMatches = dryRunMatches.map(match => {
3380
- const searchResult = dryRunSearchString.get(match.fullUrl);
3381
- return {
3382
- ...match,
3383
- searchStringMatch: searchResult && searchResult.matched ? searchResult : null,
3384
- searchStringChecked: match.needsSearchStringCheck
3385
- };
3386
- });
3387
-
3388
3345
  // Wait a moment for async nettools/searchstring operations to complete
3389
3346
  // Use fast timeout helper for Puppeteer 22.x compatibility
3390
3347
  await fastTimeout(TIMEOUTS.CURL_HANDLER_DELAY); // Wait for async operations
3391
3348
 
3392
- outputDryRunResults(currentUrl, enhancedMatches, dryRunNetTools, pageTitle);
3393
-
3394
- return { url: currentUrl, rules: [], success: true, dryRun: true, matchCount: dryRunMatches.length + dryRunNetTools.length };
3349
+ return { url: currentUrl, rules: [], success: true, dryRun: true, matchCount: dryRunResult.matchCount };
3395
3350
  } else {
3396
3351
  // Format rules using the output module
3397
3352
  const globalOptions = {
@@ -3811,14 +3766,9 @@ function setupFrameHandling(page, forceDebug) {
3811
3766
 
3812
3767
  // Handle dry run output file writing
3813
3768
  if (dryRunMode && outputFile && dryRunOutput.length > 0) {
3814
- try {
3815
- const dryRunContent = dryRunOutput.join('\n');
3816
- fs.writeFileSync(outputFile, dryRunContent);
3817
- if (!silentMode) {
3818
- console.log(`${messageColors.fileOp('📄 Dry run results saved to:')} ${outputFile}`);
3819
- }
3820
- } catch (writeErr) {
3821
- console.error(`❌ Failed to write dry run output to ${outputFile}: ${writeErr.message}`);
3769
+ const writeResult = writeDryRunOutput(outputFile, dryRunOutput, silentMode);
3770
+ if (!writeResult.success && forceDebug) {
3771
+ console.log(formatLogMessage('debug', `Dry run file write failed: ${writeResult.error}`));
3822
3772
  }
3823
3773
  }
3824
3774
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.20",
3
+ "version": "2.0.22",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {