@fanboynz/network-scanner 2.0.57 → 2.0.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CLAUDE.md ADDED
@@ -0,0 +1,65 @@
1
+ # Network Scanner (NWSS)
2
+
3
+ Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, VPN/proxy routing, and multiple output formats.
4
+
5
+ ## Project Structure
6
+
7
+ - `nwss.js` — Main entry point (~4,600 lines). CLI args, URL processing, orchestration.
8
+ - `config.json` — Default scan configuration (sites, filters, options).
9
+ - `lib/` — 28 focused, single-purpose modules:
10
+ - `fingerprint.js` — Bot detection evasion (device/GPU/timezone spoofing)
11
+ - `cloudflare.js` — Cloudflare challenge detection and solving
12
+ - `browserhealth.js` — Memory management and browser lifecycle
13
+ - `interaction.js` — Human-like mouse/scroll/typing simulation
14
+ - `smart-cache.js` — Multi-layer caching with persistence
15
+ - `nettools.js` — WHOIS/dig integration
16
+ - `output.js` — Multi-format rule output (adblock, dnsmasq, unbound, pihole, etc.)
17
+ - `proxy.js` — SOCKS5/HTTP proxy support
18
+ - `wireguard_vpn.js` / `openvpn_vpn.js` — VPN routing
19
+ - `adblock.js` — Adblock filter parsing and validation
20
+ - `validate_rules.js` — Domain and rule format validation
21
+ - `colorize.js` — Console output formatting and colors
22
+ - `domain-cache.js` — Domain detection cache for performance
23
+ - `post-processing.js` — Result cleanup and deduplication
24
+ - `redirect.js`, `referrer.js`, `cdp.js`, `curl.js`, `grep.js`, `compare.js`, `compress.js`, `dry-run.js`, `browserexit.js`, `clear_sitedata.js`, `flowproxy.js`, `ignore_similar.js`, `searchstring.js`
25
+ - `.github/workflows/npm-publish.yml` — Automated npm publishing
26
+ - `nwss.1` — Man page
27
+
28
+ ## Tech Stack
29
+
30
+ - **Node.js** >=20.0.0
31
+ - **puppeteer** >=20.0.0 — Headless browser automation
32
+ - **psl** — Public Suffix List for domain parsing
33
+ - **lru-cache** — LRU cache implementation
34
+ - **p-limit** — Concurrency limiting (dynamically imported)
35
+ - **eslint** — Linting (`npm run lint`)
36
+
37
+ ## Conventions
38
+
39
+ - Store modular functionality in `./lib/` with focused, single-purpose modules
40
+ - Use `messageColors` and `formatLogMessage` from `./lib/colorize` for consistent console output
41
+ - Implement timeout protection for all Puppeteer operations using `Promise.race` patterns
42
+ - Handle browser lifecycle with comprehensive cleanup in try-finally blocks
43
+ - Validate all external tool availability before use (grep, curl, whois, dig)
44
+ - Use `forceDebug` flag for detailed logging, `silentMode` for minimal output
45
+ - Use `Object.freeze` for constant configuration objects (TIMEOUTS, CACHE_LIMITS, CONCURRENCY_LIMITS)
46
+ - Use `fastTimeout(ms)` helper instead of `node:timers/promises` for Puppeteer 22.x compatibility
47
+
48
+ ## Running
49
+
50
+ ```bash
51
+ node nwss.js # Run with default config.json
52
+ node nwss.js config-custom.json # Run with custom config
53
+ node nwss.js --validate-config # Validate configuration
54
+ node nwss.js --dry-run # Preview without network calls
55
+ node nwss.js --headful # Launch with browser GUI
56
+ ```
57
+
58
+ ## Files to Ignore
59
+
60
+ - `node_modules/**`
61
+ - `logs/**`
62
+ - `sources/**`
63
+ - `.cache/**`
64
+ - `*.log`
65
+ - `*.gz`
package/README.md CHANGED
@@ -59,8 +59,11 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
59
59
  | `--compress-logs` | Compress log files with gzip (requires `--dumpurls`) |
60
60
  | `--sub-domains` | Output full subdomains instead of collapsing to root |
61
61
  | `--no-interact` | Disable page interactions globally |
62
+ | `--ghost-cursor` | Use ghost-cursor Bezier mouse movements globally (requires `npm i ghost-cursor`) |
62
63
  | `--custom-json <file>` | Use a custom config JSON file instead of config.json |
63
64
  | `--headful` | Launch browser with GUI (not headless) |
65
+ | `--keep-open` | Keep browser and tabs open after scan completes (use with `--headful` for debugging) |
66
+ | `--use-puppeteer-core` | Use `puppeteer-core` with system Chrome instead of bundled Chromium |
64
67
  | `--cdp` | Enable Chrome DevTools Protocol logging (now per-page if enabled) |
65
68
  | `--remove-dupes` | Remove duplicate domains from output (only with `-o`) |
66
69
  | `--dry-run` | Console output only: show matching regex, titles, whois/dig/searchstring results, and adblock rules |
@@ -267,6 +270,11 @@ When a page redirects to a new domain, first-party/third-party detection is base
267
270
  | `interact_clicks` | Boolean | `false` | Enable element clicking simulation |
268
271
  | `interact_typing` | Boolean | `false` | Enable typing simulation |
269
272
  | `interact_intensity` | String | `"medium"` | Interaction simulation intensity: "low", "medium", "high" |
273
+ | `cursor_mode` | `"ghost"` | - | Use ghost-cursor Bezier mouse movements (requires `npm i ghost-cursor`) |
274
+ | `ghost_cursor_speed` | Number | auto | Ghost-cursor movement speed multiplier |
275
+ | `ghost_cursor_hesitate` | Milliseconds | `50` | Delay before ghost-cursor clicks |
276
+ | `ghost_cursor_overshoot` | Pixels | auto | Max ghost-cursor overshoot distance before correcting |
277
+ | `ghost_cursor_duration` | Milliseconds | `interact_duration` or `2000` | How long ghost-cursor movements run |
270
278
  | `dnsmasq` | Boolean | `false` | Force dnsmasq output for this site |
271
279
  | `dnsmasq_old` | Boolean | `false` | Force dnsmasq old format output for this site |
272
280
  | `unbound` | Boolean | `false` | Force unbound output for this site |
@@ -523,6 +531,27 @@ node nwss.js --max-concurrent 12 --cleanup-interval 300 -o rules.txt
523
531
  }
524
532
  ```
525
533
 
534
+ #### Ghost Cursor (Advanced Bezier Mouse)
535
+ ```json
536
+ {
537
+ "url": "https://anti-bot-site.com",
538
+ "interact": true,
539
+ "cursor_mode": "ghost",
540
+ "ghost_cursor_duration": 3000,
541
+ "ghost_cursor_speed": 1.2,
542
+ "fingerprint_protection": "random",
543
+ "filterRegex": "tracking|analytics",
544
+ "comments": "ghost-cursor uses Bezier curves with overshoot for realistic mouse paths"
545
+ }
546
+ ```
547
+
548
+ Or enable globally via CLI:
549
+ ```bash
550
+ node nwss.js --ghost-cursor --debug -o rules.txt
551
+ ```
552
+
553
+ > **Note:** ghost-cursor is an optional dependency. Install with `npm install ghost-cursor`. If not installed, the scanner falls back to the built-in mouse simulation automatically.
554
+
526
555
  #### E-commerce Site Scanning
527
556
  ```json
528
557
  {
@@ -698,5 +727,7 @@ your_username ALL=(root) NOPASSWD: /usr/bin/wg-quick, /usr/bin/wg
698
727
  - If an `.ovpn` file contains embedded credentials, no additional auth config is needed in the JSON
699
728
  - VPN affects system-level routing — all concurrent scans will route through the active tunnel
700
729
  - Both `vpn` (WireGuard) and `openvpn` can be set, but `vpn` takes precedence
730
+ - Ghost-cursor (`cursor_mode: "ghost"`) is optional — install with `npm i ghost-cursor`. Falls back to built-in mouse if not installed
731
+ - Ghost-cursor duration defaults to `interact_duration` (or 2000ms), capped by the 15s hard timeout
701
732
 
702
733
  ---
package/lib/adblock.js CHANGED
@@ -51,11 +51,12 @@ function parseAdblockRules(filePath, options = {}) {
51
51
  caseSensitive = false
52
52
  } = options;
53
53
 
54
- if (!fs.existsSync(filePath)) {
54
+ let fileContent;
55
+ try {
56
+ fileContent = fs.readFileSync(filePath, 'utf-8');
57
+ } catch (err) {
55
58
  throw new Error(`Adblock rules file not found: ${filePath}`);
56
59
  }
57
-
58
- const fileContent = fs.readFileSync(filePath, 'utf-8');
59
60
  const lines = fileContent.split('\n');
60
61
 
61
62
  const rules = {
@@ -5,6 +5,7 @@
5
5
 
6
6
 
7
7
  const fs = require('fs');
8
+ const path = require('path');
8
9
  const { execSync } = require('child_process');
9
10
 
10
11
  // Constants for temp file cleanup
@@ -15,20 +16,55 @@ const CHROME_TEMP_PATHS = [
15
16
  ];
16
17
 
17
18
  const CHROME_TEMP_PATTERNS = [
18
- 'com.google.Chrome.*', // Google Chrome temp files (no leading dot)
19
- '.org.chromium.Chromium.*',
20
- 'puppeteer-*'
19
+ /^\.?com\.google\.Chrome\./,
20
+ /^\.?org\.chromium\.Chromium\./,
21
+ /^puppeteer-/
21
22
  ];
22
23
 
24
+ /**
25
+ * Count and remove matching Chrome/Puppeteer temp entries from a directory using fs
26
+ * @param {string} basePath - Directory to scan
27
+ * @param {boolean} forceDebug - Whether to output debug logs
28
+ * @returns {number} Number of items cleaned
29
+ */
30
+ function cleanTempDir(basePath, forceDebug) {
31
+ let entries;
32
+ try {
33
+ entries = fs.readdirSync(basePath);
34
+ } catch {
35
+ if (forceDebug) console.log(`[debug] [temp-cleanup] Cannot read ${basePath}`);
36
+ return 0;
37
+ }
38
+
39
+ let cleaned = 0;
40
+ for (const entry of entries) {
41
+ let matched = false;
42
+ for (const re of CHROME_TEMP_PATTERNS) {
43
+ if (re.test(entry)) { matched = true; break; }
44
+ }
45
+ if (!matched) continue;
46
+
47
+ try {
48
+ fs.rmSync(path.join(basePath, entry), { recursive: true, force: true });
49
+ cleaned++;
50
+ if (forceDebug) console.log(`[debug] [temp-cleanup] Removed ${basePath}/${entry}`);
51
+ } catch (rmErr) {
52
+ if (forceDebug) console.log(`[debug] [temp-cleanup] Failed to remove ${basePath}/${entry}: ${rmErr.message}`);
53
+ }
54
+ }
55
+
56
+ return cleaned;
57
+ }
58
+
23
59
  /**
24
60
  * Clean Chrome temporary files and directories
25
61
  * @param {Object} options - Cleanup options
26
62
  * @param {boolean} options.includeSnapTemp - Whether to clean snap temp directories
27
63
  * @param {boolean} options.forceDebug - Whether to output debug logs
28
64
  * @param {boolean} options.comprehensive - Whether to perform comprehensive cleanup of all temp locations
29
- * @returns {Promise<Object>} Cleanup results
65
+ * @returns {Object} Cleanup results
30
66
  */
31
- async function cleanupChromeTempFiles(options = {}) {
67
+ function cleanupChromeTempFiles(options = {}) {
32
68
  const {
33
69
  includeSnapTemp = false,
34
70
  forceDebug = false,
@@ -36,57 +72,20 @@ async function cleanupChromeTempFiles(options = {}) {
36
72
  } = options;
37
73
 
38
74
  try {
39
-
40
- // Base cleanup commands for standard temp directories
41
- const cleanupCommands = [
42
- 'rm -rf /tmp/com.google.Chrome.* 2>/dev/null || true',
43
- 'rm -rf /tmp/.com.google.Chrome.* 2>/dev/null || true',
44
- 'rm -rf /tmp/.org.chromium.Chromium.* 2>/dev/null || true',
45
- 'rm -rf /tmp/puppeteer-* 2>/dev/null || true',
46
- 'rm -rf /dev/shm/.com.google.Chrome.* 2>/dev/null || true',
47
- 'rm -rf /dev/shm/.org.chromium.Chromium.* 2>/dev/null || true'
48
- ];
49
-
50
- // Add snap-specific cleanup if requested
51
- if (includeSnapTemp || comprehensive) {
52
- cleanupCommands.push('rm -rf /dev/shm/com.google.Chrome.* 2>/dev/null || true');
53
- cleanupCommands.push(
54
- 'rm -rf /tmp/snap-private-tmp/snap.chromium/tmp/.org.chromium.Chromium.* 2>/dev/null || true',
55
- 'rm -rf /tmp/snap-private-tmp/snap.chromium/tmp/puppeteer-* 2>/dev/null || true'
56
- );
57
- }
75
+ const paths = comprehensive || includeSnapTemp
76
+ ? CHROME_TEMP_PATHS
77
+ : CHROME_TEMP_PATHS.slice(0, 2); // /tmp and /dev/shm only
58
78
 
59
79
  let totalCleaned = 0;
60
-
61
- for (const command of cleanupCommands) {
62
- try {
63
- // Extract glob pattern and count matches before deletion
64
- const globPattern = command.match(/rm -rf ([^ ]+)/)?.[1];
65
- if (!globPattern) continue;
66
- const fileCount = parseInt(execSync(`ls -1d ${globPattern} 2>/dev/null | wc -l || echo 0`, { stdio: 'pipe' }).toString().trim()) || 0;
67
-
68
- if (fileCount > 0) {
69
- execSync(command, { stdio: 'ignore' });
70
- totalCleaned += fileCount;
71
-
72
- if (forceDebug) {
73
- console.log(`[debug] [temp-cleanup] Cleaned ${fileCount} items from ${globPattern}`);
74
- }
75
- }
76
- } catch (cmdErr) {
77
- // Ignore individual command errors but log in debug mode
78
- if (forceDebug) {
79
- console.log(`[debug] [temp-cleanup] Cleanup command failed: ${command} (${cmdErr.message})`);
80
- }
81
- }
80
+ for (const basePath of paths) {
81
+ totalCleaned += cleanTempDir(basePath, forceDebug);
82
82
  }
83
83
 
84
84
  if (forceDebug) {
85
- console.log(`[debug] [temp-cleanup] Standard cleanup completed (${totalCleaned} items)`);
85
+ console.log(`[debug] [temp-cleanup] Cleanup completed (${totalCleaned} items)`);
86
86
  }
87
-
87
+
88
88
  return { success: true, itemsCleaned: totalCleaned };
89
-
90
89
  } catch (cleanupErr) {
91
90
  if (forceDebug) {
92
91
  console.log(`[debug] [temp-cleanup] Chrome cleanup error: ${cleanupErr.message}`);
@@ -96,72 +95,38 @@ async function cleanupChromeTempFiles(options = {}) {
96
95
  }
97
96
 
98
97
  /**
99
- * Comprehensive temp file cleanup that systematically checks all known Chrome temp locations
98
+ * Comprehensive temp file cleanup that checks all known Chrome temp locations
100
99
  * @param {Object} options - Cleanup options
101
100
  * @param {boolean} options.forceDebug - Whether to output debug logs
102
101
  * @param {boolean} options.verbose - Whether to show verbose output
103
- * @returns {Promise<Object>} Cleanup results
102
+ * @returns {Object} Cleanup results
104
103
  */
105
- async function comprehensiveChromeTempCleanup(options = {}) {
104
+ function comprehensiveChromeTempCleanup(options = {}) {
106
105
  const { forceDebug = false, verbose = false } = options;
107
-
106
+
108
107
  try {
109
- let totalCleaned = 0;
110
-
111
108
  if (verbose && !forceDebug) {
112
109
  console.log(`[temp-cleanup] Scanning Chrome/Puppeteer temporary files...`);
113
110
  }
114
-
111
+
112
+ let totalCleaned = 0;
115
113
  for (const basePath of CHROME_TEMP_PATHS) {
116
- // Check if the base path exists before trying to clean it
117
- try {
118
- const pathExists = fs.existsSync(basePath);
119
-
120
- if (!pathExists) {
121
- if (forceDebug) {
122
- console.log(`[debug] [temp-cleanup] Skipping non-existent path: ${basePath}`);
123
- }
124
- continue;
125
- }
126
-
127
- for (const pattern of CHROME_TEMP_PATTERNS) {
128
- const fullPattern = `${basePath}/${pattern}`;
129
-
130
- // Count items before deletion
131
- const countCommand = `ls -1d ${fullPattern} 2>/dev/null | wc -l || echo 0`;
132
- const itemCount = parseInt(execSync(countCommand, { stdio: 'pipe' }).toString().trim()) || 0;
133
-
134
- if (itemCount > 0) {
135
- const deleteCommand = `rm -rf ${fullPattern} 2>/dev/null || true`;
136
- execSync(deleteCommand, { stdio: 'ignore' });
137
- totalCleaned += itemCount;
138
-
139
- if (forceDebug) {
140
- console.log(`[debug] [temp-cleanup] Removed ${itemCount} items matching ${fullPattern}`);
141
- }
142
- }
143
- }
144
- } catch (pathErr) {
145
- if (forceDebug) {
146
- console.log(`[debug] [temp-cleanup] Error checking path ${basePath}: ${pathErr.message}`);
147
- }
148
- }
114
+ totalCleaned += cleanTempDir(basePath, forceDebug);
149
115
  }
150
-
116
+
151
117
  if (verbose && totalCleaned > 0) {
152
- console.log(`[temp-cleanup] ? Removed ${totalCleaned} temporary file(s)/folder(s)`);
118
+ console.log(`[temp-cleanup] Removed ${totalCleaned} temporary file(s)/folder(s)`);
153
119
  } else if (verbose && totalCleaned === 0) {
154
- console.log(`[temp-cleanup] ? Clean - no remaining temporary files`);
120
+ console.log(`[temp-cleanup] Clean - no remaining temporary files`);
155
121
  } else if (forceDebug) {
156
122
  console.log(`[debug] [temp-cleanup] Comprehensive cleanup completed (${totalCleaned} items)`);
157
123
  }
158
-
124
+
159
125
  return { success: true, itemsCleaned: totalCleaned };
160
-
161
126
  } catch (err) {
162
127
  const errorMsg = `Comprehensive temp file cleanup failed: ${err.message}`;
163
128
  if (verbose) {
164
- console.warn(`[temp-cleanup] ? ${errorMsg}`);
129
+ console.warn(`[temp-cleanup] ${errorMsg}`);
165
130
  } else if (forceDebug) {
166
131
  console.log(`[debug] [temp-cleanup] ${errorMsg}`);
167
132
  }
@@ -317,7 +282,7 @@ async function forceBrowserKill(browser, forceDebug = false) {
317
282
  }
318
283
 
319
284
  // Wait for graceful termination
320
- await new Promise(resolve => setTimeout(resolve, 3000));
285
+ await new Promise(resolve => setTimeout(resolve, 1000));
321
286
 
322
287
  // Force kill any remaining processes with SIGKILL
323
288
  for (const pid of pidsToKill) {
@@ -438,11 +438,12 @@ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIM
438
438
  let closedCount = 0;
439
439
  for (const page of safePagesToClose) {
440
440
  try {
441
- // Cache both page state and URL for this iteration
442
441
  const isPageClosed = page.isClosed();
443
- const pageUrl = page.url();
444
-
445
- if (!isPageClosed) {
442
+
443
+ // Re-check processing state — may have changed since safety check
444
+ const usage = pageUsageTracker.get(page);
445
+ if (!isPageClosed && !(usage && usage.isProcessing)) {
446
+ const pageUrl = page.url();
446
447
  await page.close();
447
448
  pageCreationTracker.delete(page); // Remove from tracker
448
449
  pageUsageTracker.delete(page);
@@ -569,6 +570,16 @@ function trackPageForRealtime(page) {
569
570
  updatePageUsage(page, false); // Initialize usage tracking
570
571
  }
571
572
 
573
+ /**
574
+ * Removes a page from all tracking Maps immediately.
575
+ * Call this before page.close() to prevent stale entries during concurrent execution.
576
+ * @param {import('puppeteer').Page} page - Page to untrack
577
+ */
578
+ function untrackPage(page) {
579
+ pageCreationTracker.delete(page);
580
+ pageUsageTracker.delete(page);
581
+ }
582
+
572
583
  /**
573
584
  * Purges stale entries from tracking Maps (pages that were closed without cleanup)
574
585
  * Should be called periodically to prevent memory leaks
@@ -1225,6 +1236,7 @@ module.exports = {
1225
1236
  isBrowserHealthy,
1226
1237
  isCriticalProtocolError,
1227
1238
  updatePageUsage,
1239
+ untrackPage,
1228
1240
  cleanupPageBeforeReload,
1229
1241
  purgeStaleTrackers
1230
1242
  };
package/lib/cdp.js CHANGED
@@ -28,15 +28,19 @@
28
28
  const { formatLogMessage } = require('./colorize');
29
29
 
30
30
  /**
31
- * Creates a reusable timeout promise to reduce function allocation overhead
31
+ * Race a promise against a timeout, clearing the timer when the promise settles.
32
+ * Prevents leaked setTimeout handles that hold closure references until they fire.
33
+ * @param {Promise} promise - The operation to race
32
34
  * @param {number} ms - Timeout in milliseconds
33
35
  * @param {string} message - Error message for timeout
34
- * @returns {Promise} Promise that rejects after timeout
36
+ * @returns {Promise} Resolves/rejects with the operation result, or rejects on timeout
35
37
  */
36
- function createTimeoutPromise(ms, message) {
37
- return new Promise((_, reject) =>
38
- setTimeout(() => reject(new Error(message)), ms)
39
- );
38
+ function raceWithTimeout(promise, ms, message) {
39
+ let timeoutId;
40
+ const timeoutPromise = new Promise((_, reject) => {
41
+ timeoutId = setTimeout(() => reject(new Error(message)), ms);
42
+ });
43
+ return Promise.race([promise, timeoutPromise]).finally(() => clearTimeout(timeoutId));
40
44
  }
41
45
 
42
46
  /**
@@ -59,10 +63,7 @@ const createSessionResult = (session = null, cleanup = async () => {}, isEnhance
59
63
  * @returns {Promise<import('puppeteer').Page>} Page instance
60
64
  */
61
65
  async function createPageWithTimeout(browser, timeout = 30000) {
62
- return Promise.race([
63
- browser.newPage(),
64
- createTimeoutPromise(timeout, 'Page creation timeout - browser may be unresponsive')
65
- ]);
66
+ return raceWithTimeout(browser.newPage(), timeout, 'Page creation timeout - browser may be unresponsive');
66
67
  }
67
68
 
68
69
  /**
@@ -73,24 +74,18 @@ async function createPageWithTimeout(browser, timeout = 30000) {
73
74
  */
74
75
  async function setRequestInterceptionWithTimeout(page, timeout = 15000) {
75
76
  try {
76
- await Promise.race([
77
- page.setRequestInterception(true),
78
- createTimeoutPromise(timeout, 'Request interception timeout - first attempt')
79
- ]);
77
+ await raceWithTimeout(page.setRequestInterception(true), timeout, 'Request interception timeout - first attempt');
80
78
  } catch (firstError) {
81
79
  // Check for immediate critical failures
82
- if (firstError.message.includes('Target closed') ||
80
+ if (firstError.message.includes('Target closed') ||
83
81
  firstError.message.includes('Session closed') ||
84
82
  firstError.message.includes('Browser has been closed')) {
85
83
  throw new Error('CRITICAL_BROWSER_ERROR: ' + firstError.message);
86
84
  }
87
-
85
+
88
86
  // Retry with extended timeout
89
87
  try {
90
- await Promise.race([
91
- page.setRequestInterception(true),
92
- createTimeoutPromise(timeout * 2, 'Request interception timeout - retry failed')
93
- ]);
88
+ await raceWithTimeout(page.setRequestInterception(true), timeout * 2, 'Request interception timeout - retry failed');
94
89
  } catch (retryError) {
95
90
  if (retryError.message.includes('Network.enable timed out') ||
96
91
  retryError.message.includes('ProtocolError')) {
@@ -168,10 +163,7 @@ async function createCDPSession(page, currentUrl, options = {}) {
168
163
  try {
169
164
  // Create CDP session using modern Puppeteer 20+ API
170
165
  // Add timeout protection for CDP session creation
171
- cdpSession = await Promise.race([
172
- page.createCDPSession(),
173
- createTimeoutPromise(20000, 'CDP session creation timeout')
174
- ]);
166
+ cdpSession = await raceWithTimeout(page.createCDPSession(), 20000, 'CDP session creation timeout');
175
167
 
176
168
  // Enable network domain - required for network event monitoring
177
169
  await cdpSession.send('Network.enable');
@@ -255,150 +247,6 @@ async function createCDPSession(page, currentUrl, options = {}) {
255
247
  }
256
248
  }
257
249
 
258
- /**
259
- * Validates CDP availability and configuration
260
- *
261
- * USAGE IN YOUR APPLICATION:
262
- * const validation = validateCDPConfig(siteConfig, globalCDPFlag);
263
- * if (!validation.isValid) {
264
- * console.warn('CDP configuration issues detected');
265
- * }
266
- * validation.recommendations.forEach(rec => console.log('Recommendation:', rec));
267
- *
268
- * @param {object} siteConfig - Site configuration object
269
- * @param {boolean} globalCDP - Global CDP flag
270
- * @param {Array} cdpSpecificDomains - Array of domains for cdp_specific feature
271
- * @returns {object} Validation result with recommendations
272
- */
273
- function validateCDPConfig(siteConfig, globalCDP, cdpSpecificDomains = []) {
274
- const warnings = [];
275
- const recommendations = [];
276
-
277
- // Check for conflicting configurations
278
- if (globalCDP && siteConfig.cdp === false) {
279
- warnings.push('Site-specific CDP disabled but global CDP is enabled - global setting will override');
280
- }
281
-
282
- // Validate cdp_specific configuration
283
- if (siteConfig.cdp_specific) {
284
- if (!Array.isArray(siteConfig.cdp_specific)) {
285
- warnings.push('cdp_specific must be an array of domain strings');
286
- } else if (siteConfig.cdp_specific.length === 0) {
287
- warnings.push('cdp_specific is empty - no domains will have CDP enabled');
288
- } else {
289
- // Validate domain format
290
- const hasInvalidDomains = siteConfig.cdp_specific.some(domain =>
291
- typeof domain !== 'string' || domain.trim() === ''
292
- );
293
-
294
- if (hasInvalidDomains) {
295
- // Only filter invalid domains if we need to show them
296
- const invalidDomains = siteConfig.cdp_specific.filter(domain =>
297
- typeof domain !== 'string' || domain.trim() === ''
298
- );
299
- warnings.push(`cdp_specific contains invalid domains: ${invalidDomains.join(', ')}`);
300
- }
301
- }
302
- }
303
-
304
- // Performance recommendations
305
- const cdpEnabled = globalCDP || siteConfig.cdp === true ||
306
- (Array.isArray(siteConfig.cdp_specific) && siteConfig.cdp_specific.length > 0);
307
-
308
- if (cdpEnabled) {
309
- recommendations.push('CDP logging enabled - this may impact performance for high-traffic sites');
310
-
311
- if (siteConfig.timeout && siteConfig.timeout < 30000) {
312
- recommendations.push('Consider increasing timeout when using CDP logging to avoid protocol timeouts');
313
- }
314
- }
315
-
316
- return {
317
- isValid: true,
318
- warnings,
319
- recommendations
320
- };
321
- }
322
-
323
- /**
324
- * Enhanced CDP session with additional network monitoring features
325
- *
326
- * ADVANCED FEATURES:
327
- * - JavaScript exception monitoring
328
- * - Security state change detection
329
- * - Failed network request tracking
330
- * - Enhanced error reporting
331
- *
332
- * USE CASES:
333
- * - Security analysis requiring comprehensive monitoring
334
- * - Debugging complex single-page applications
335
- * - Performance analysis of web applications
336
- * - Research requiring detailed browser insights
337
- *
338
- * PERFORMANCE IMPACT:
339
- * - Adds additional CDP domain subscriptions
340
- * - Higher memory usage due to more event listeners
341
- * - Recommended only for detailed analysis scenarios
342
- *
343
- * @param {import('puppeteer').Page} page - The Puppeteer page instance
344
- * @param {string} currentUrl - The URL being processed
345
- * @param {object} options - Configuration options (same as createCDPSession)
346
- * @returns {Promise<object>} Enhanced CDP session object with isEnhanced flag
347
- */
348
- async function createEnhancedCDPSession(page, currentUrl, options = {}) {
349
- const basicSession = await createCDPSession(page, currentUrl, options);
350
-
351
- if (!basicSession.session) {
352
- // Ensure enhanced flag is set even for null sessions
353
- return { ...basicSession, isEnhanced: false };
354
- }
355
-
356
- const { session } = basicSession;
357
- const { forceDebug } = options;
358
-
359
- try {
360
- // Enable additional CDP domains for enhanced monitoring
361
- await session.send('Runtime.enable'); // For JavaScript exceptions
362
- await session.send('Security.enable'); // For security state changes
363
-
364
- // Monitor JavaScript exceptions - useful for debugging problematic sites
365
- session.on('Runtime.exceptionThrown', (params) => {
366
- if (forceDebug) {
367
- console.log(formatLogMessage('debug', `[cdp][exception] ${params.exceptionDetails.text}`));
368
- }
369
- });
370
-
371
- // Monitor security state changes - detect mixed content, certificate issues, etc.
372
- session.on('Security.securityStateChanged', (params) => {
373
- if (forceDebug && params.securityState !== 'secure') {
374
- console.log(formatLogMessage('debug', `[cdp][security] Security state: ${params.securityState}`));
375
- }
376
- });
377
-
378
- // Monitor failed network requests - useful for understanding site issues
379
- session.on('Network.loadingFailed', (params) => {
380
- if (forceDebug) {
381
- console.log(formatLogMessage('debug', `[cdp][failed] ${params.errorText}: ${params.requestId}`));
382
- }
383
- });
384
-
385
- return {
386
- session,
387
- cleanup: basicSession.cleanup,
388
- isEnhanced: true // Flag to indicate enhanced features are active
389
- };
390
-
391
- } catch (enhancedErr) {
392
- if (forceDebug) {
393
- console.log(formatLogMessage('debug', `Enhanced CDP features failed, falling back to basic session: ${enhancedErr.message}`));
394
- }
395
-
396
- // Graceful degradation: return basic session if enhanced features fail
397
- // This ensures your application continues working even if advanced features break
398
- return { ...basicSession, isEnhanced: false };
399
- }
400
- }
401
-
402
250
  // EXPORT INTERFACE FOR OTHER APPLICATIONS:
403
251
  // This module provides a clean, reusable interface for CDP integration.
404
252
  // Simply require this module and use the exported functions.
@@ -406,7 +254,7 @@ async function createEnhancedCDPSession(page, currentUrl, options = {}) {
406
254
  // CUSTOMIZATION TIPS:
407
255
  // 1. Replace './colorize' import with your own logging system
408
256
  // 2. Modify the request logging format in the Network.requestWillBeSent handler
409
- // 3. Add additional CDP domain subscriptions in createEnhancedCDPSession
257
+ // 3. Add additional CDP domain subscriptions in createCDPSession
410
258
  // 4. Customize error categorization in the catch blocks
411
259
  //
412
260
  // TROUBLESHOOTING:
package/lib/compare.js CHANGED
@@ -9,10 +9,6 @@ const path = require('path');
9
9
  */
10
10
  function loadComparisonRules(compareFilePath, forceDebug = false) {
11
11
  try {
12
- if (!fs.existsSync(compareFilePath)) {
13
- throw new Error(`Comparison file not found: ${compareFilePath}`);
14
- }
15
-
16
12
  const content = fs.readFileSync(compareFilePath, 'utf8');
17
13
  const lines = content.split('\n')
18
14
  .map(line => line.trim())