@fanboynz/network-scanner 2.0.57 → 2.0.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm-publish.yml +3 -3
- package/CHANGELOG.md +940 -0
- package/CLAUDE.md +65 -0
- package/README.md +31 -0
- package/lib/adblock.js +4 -3
- package/lib/browserexit.js +61 -96
- package/lib/browserhealth.js +16 -4
- package/lib/cdp.js +17 -169
- package/lib/compare.js +0 -4
- package/lib/compress.js +6 -15
- package/lib/dry-run.js +1 -1
- package/lib/fingerprint.js +47 -37
- package/lib/flowproxy.js +8 -8
- package/lib/ghost-cursor.js +258 -0
- package/lib/grep.js +1 -1
- package/lib/interaction.js +23 -45
- package/lib/openvpn_vpn.js +16 -21
- package/lib/output.js +12 -6
- package/lib/validate_rules.js +12 -27
- package/nwss.js +147 -52
- package/package.json +5 -1
- package/.clauderc +0 -30
package/CLAUDE.md
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Network Scanner (NWSS)
|
|
2
|
+
|
|
3
|
+
Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, VPN/proxy routing, and multiple output formats.
|
|
4
|
+
|
|
5
|
+
## Project Structure
|
|
6
|
+
|
|
7
|
+
- `nwss.js` — Main entry point (~4,600 lines). CLI args, URL processing, orchestration.
|
|
8
|
+
- `config.json` — Default scan configuration (sites, filters, options).
|
|
9
|
+
- `lib/` — 28 focused, single-purpose modules:
|
|
10
|
+
- `fingerprint.js` — Bot detection evasion (device/GPU/timezone spoofing)
|
|
11
|
+
- `cloudflare.js` — Cloudflare challenge detection and solving
|
|
12
|
+
- `browserhealth.js` — Memory management and browser lifecycle
|
|
13
|
+
- `interaction.js` — Human-like mouse/scroll/typing simulation
|
|
14
|
+
- `smart-cache.js` — Multi-layer caching with persistence
|
|
15
|
+
- `nettools.js` — WHOIS/dig integration
|
|
16
|
+
- `output.js` — Multi-format rule output (adblock, dnsmasq, unbound, pihole, etc.)
|
|
17
|
+
- `proxy.js` — SOCKS5/HTTP proxy support
|
|
18
|
+
- `wireguard_vpn.js` / `openvpn_vpn.js` — VPN routing
|
|
19
|
+
- `adblock.js` — Adblock filter parsing and validation
|
|
20
|
+
- `validate_rules.js` — Domain and rule format validation
|
|
21
|
+
- `colorize.js` — Console output formatting and colors
|
|
22
|
+
- `domain-cache.js` — Domain detection cache for performance
|
|
23
|
+
- `post-processing.js` — Result cleanup and deduplication
|
|
24
|
+
- `redirect.js`, `referrer.js`, `cdp.js`, `curl.js`, `grep.js`, `compare.js`, `compress.js`, `dry-run.js`, `browserexit.js`, `clear_sitedata.js`, `flowproxy.js`, `ignore_similar.js`, `searchstring.js`
|
|
25
|
+
- `.github/workflows/npm-publish.yml` — Automated npm publishing
|
|
26
|
+
- `nwss.1` — Man page
|
|
27
|
+
|
|
28
|
+
## Tech Stack
|
|
29
|
+
|
|
30
|
+
- **Node.js** >=20.0.0
|
|
31
|
+
- **puppeteer** >=20.0.0 — Headless browser automation
|
|
32
|
+
- **psl** — Public Suffix List for domain parsing
|
|
33
|
+
- **lru-cache** — LRU cache implementation
|
|
34
|
+
- **p-limit** — Concurrency limiting (dynamically imported)
|
|
35
|
+
- **eslint** — Linting (`npm run lint`)
|
|
36
|
+
|
|
37
|
+
## Conventions
|
|
38
|
+
|
|
39
|
+
- Store modular functionality in `./lib/` with focused, single-purpose modules
|
|
40
|
+
- Use `messageColors` and `formatLogMessage` from `./lib/colorize` for consistent console output
|
|
41
|
+
- Implement timeout protection for all Puppeteer operations using `Promise.race` patterns
|
|
42
|
+
- Handle browser lifecycle with comprehensive cleanup in try-finally blocks
|
|
43
|
+
- Validate all external tool availability before use (grep, curl, whois, dig)
|
|
44
|
+
- Use `forceDebug` flag for detailed logging, `silentMode` for minimal output
|
|
45
|
+
- Use `Object.freeze` for constant configuration objects (TIMEOUTS, CACHE_LIMITS, CONCURRENCY_LIMITS)
|
|
46
|
+
- Use `fastTimeout(ms)` helper instead of `node:timers/promises` for Puppeteer 22.x compatibility
|
|
47
|
+
|
|
48
|
+
## Running
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
node nwss.js # Run with default config.json
|
|
52
|
+
node nwss.js config-custom.json # Run with custom config
|
|
53
|
+
node nwss.js --validate-config # Validate configuration
|
|
54
|
+
node nwss.js --dry-run # Preview without network calls
|
|
55
|
+
node nwss.js --headful # Launch with browser GUI
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Files to Ignore
|
|
59
|
+
|
|
60
|
+
- `node_modules/**`
|
|
61
|
+
- `logs/**`
|
|
62
|
+
- `sources/**`
|
|
63
|
+
- `.cache/**`
|
|
64
|
+
- `*.log`
|
|
65
|
+
- `*.gz`
|
package/README.md
CHANGED
|
@@ -59,8 +59,11 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
|
|
|
59
59
|
| `--compress-logs` | Compress log files with gzip (requires `--dumpurls`) |
|
|
60
60
|
| `--sub-domains` | Output full subdomains instead of collapsing to root |
|
|
61
61
|
| `--no-interact` | Disable page interactions globally |
|
|
62
|
+
| `--ghost-cursor` | Use ghost-cursor Bezier mouse movements globally (requires `npm i ghost-cursor`) |
|
|
62
63
|
| `--custom-json <file>` | Use a custom config JSON file instead of config.json |
|
|
63
64
|
| `--headful` | Launch browser with GUI (not headless) |
|
|
65
|
+
| `--keep-open` | Keep browser and tabs open after scan completes (use with `--headful` for debugging) |
|
|
66
|
+
| `--use-puppeteer-core` | Use `puppeteer-core` with system Chrome instead of bundled Chromium |
|
|
64
67
|
| `--cdp` | Enable Chrome DevTools Protocol logging (now per-page if enabled) |
|
|
65
68
|
| `--remove-dupes` | Remove duplicate domains from output (only with `-o`) |
|
|
66
69
|
| `--dry-run` | Console output only: show matching regex, titles, whois/dig/searchstring results, and adblock rules |
|
|
@@ -267,6 +270,11 @@ When a page redirects to a new domain, first-party/third-party detection is base
|
|
|
267
270
|
| `interact_clicks` | Boolean | `false` | Enable element clicking simulation |
|
|
268
271
|
| `interact_typing` | Boolean | `false` | Enable typing simulation |
|
|
269
272
|
| `interact_intensity` | String | `"medium"` | Interaction simulation intensity: "low", "medium", "high" |
|
|
273
|
+
| `cursor_mode` | `"ghost"` | - | Use ghost-cursor Bezier mouse movements (requires `npm i ghost-cursor`) |
|
|
274
|
+
| `ghost_cursor_speed` | Number | auto | Ghost-cursor movement speed multiplier |
|
|
275
|
+
| `ghost_cursor_hesitate` | Milliseconds | `50` | Delay before ghost-cursor clicks |
|
|
276
|
+
| `ghost_cursor_overshoot` | Pixels | auto | Max ghost-cursor overshoot distance before correcting |
|
|
277
|
+
| `ghost_cursor_duration` | Milliseconds | `interact_duration` or `2000` | How long ghost-cursor movements run |
|
|
270
278
|
| `dnsmasq` | Boolean | `false` | Force dnsmasq output for this site |
|
|
271
279
|
| `dnsmasq_old` | Boolean | `false` | Force dnsmasq old format output for this site |
|
|
272
280
|
| `unbound` | Boolean | `false` | Force unbound output for this site |
|
|
@@ -523,6 +531,27 @@ node nwss.js --max-concurrent 12 --cleanup-interval 300 -o rules.txt
|
|
|
523
531
|
}
|
|
524
532
|
```
|
|
525
533
|
|
|
534
|
+
#### Ghost Cursor (Advanced Bezier Mouse)
|
|
535
|
+
```json
|
|
536
|
+
{
|
|
537
|
+
"url": "https://anti-bot-site.com",
|
|
538
|
+
"interact": true,
|
|
539
|
+
"cursor_mode": "ghost",
|
|
540
|
+
"ghost_cursor_duration": 3000,
|
|
541
|
+
"ghost_cursor_speed": 1.2,
|
|
542
|
+
"fingerprint_protection": "random",
|
|
543
|
+
"filterRegex": "tracking|analytics",
|
|
544
|
+
"comments": "ghost-cursor uses Bezier curves with overshoot for realistic mouse paths"
|
|
545
|
+
}
|
|
546
|
+
```
|
|
547
|
+
|
|
548
|
+
Or enable globally via CLI:
|
|
549
|
+
```bash
|
|
550
|
+
node nwss.js --ghost-cursor --debug -o rules.txt
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
> **Note:** ghost-cursor is an optional dependency. Install with `npm install ghost-cursor`. If not installed, the scanner falls back to the built-in mouse simulation automatically.
|
|
554
|
+
|
|
526
555
|
#### E-commerce Site Scanning
|
|
527
556
|
```json
|
|
528
557
|
{
|
|
@@ -698,5 +727,7 @@ your_username ALL=(root) NOPASSWD: /usr/bin/wg-quick, /usr/bin/wg
|
|
|
698
727
|
- If an `.ovpn` file contains embedded credentials, no additional auth config is needed in the JSON
|
|
699
728
|
- VPN affects system-level routing — all concurrent scans will route through the active tunnel
|
|
700
729
|
- Both `vpn` (WireGuard) and `openvpn` can be set, but `vpn` takes precedence
|
|
730
|
+
- Ghost-cursor (`cursor_mode: "ghost"`) is optional — install with `npm i ghost-cursor`. Falls back to built-in mouse if not installed
|
|
731
|
+
- Ghost-cursor duration defaults to `interact_duration` (or 2000ms), capped by the 15s hard timeout
|
|
701
732
|
|
|
702
733
|
---
|
package/lib/adblock.js
CHANGED
|
@@ -51,11 +51,12 @@ function parseAdblockRules(filePath, options = {}) {
|
|
|
51
51
|
caseSensitive = false
|
|
52
52
|
} = options;
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
let fileContent;
|
|
55
|
+
try {
|
|
56
|
+
fileContent = fs.readFileSync(filePath, 'utf-8');
|
|
57
|
+
} catch (err) {
|
|
55
58
|
throw new Error(`Adblock rules file not found: ${filePath}`);
|
|
56
59
|
}
|
|
57
|
-
|
|
58
|
-
const fileContent = fs.readFileSync(filePath, 'utf-8');
|
|
59
60
|
const lines = fileContent.split('\n');
|
|
60
61
|
|
|
61
62
|
const rules = {
|
package/lib/browserexit.js
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
const fs = require('fs');
|
|
8
|
+
const path = require('path');
|
|
8
9
|
const { execSync } = require('child_process');
|
|
9
10
|
|
|
10
11
|
// Constants for temp file cleanup
|
|
@@ -15,20 +16,55 @@ const CHROME_TEMP_PATHS = [
|
|
|
15
16
|
];
|
|
16
17
|
|
|
17
18
|
const CHROME_TEMP_PATTERNS = [
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
/^\.?com\.google\.Chrome\./,
|
|
20
|
+
/^\.?org\.chromium\.Chromium\./,
|
|
21
|
+
/^puppeteer-/
|
|
21
22
|
];
|
|
22
23
|
|
|
24
|
+
/**
|
|
25
|
+
* Count and remove matching Chrome/Puppeteer temp entries from a directory using fs
|
|
26
|
+
* @param {string} basePath - Directory to scan
|
|
27
|
+
* @param {boolean} forceDebug - Whether to output debug logs
|
|
28
|
+
* @returns {number} Number of items cleaned
|
|
29
|
+
*/
|
|
30
|
+
function cleanTempDir(basePath, forceDebug) {
|
|
31
|
+
let entries;
|
|
32
|
+
try {
|
|
33
|
+
entries = fs.readdirSync(basePath);
|
|
34
|
+
} catch {
|
|
35
|
+
if (forceDebug) console.log(`[debug] [temp-cleanup] Cannot read ${basePath}`);
|
|
36
|
+
return 0;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
let cleaned = 0;
|
|
40
|
+
for (const entry of entries) {
|
|
41
|
+
let matched = false;
|
|
42
|
+
for (const re of CHROME_TEMP_PATTERNS) {
|
|
43
|
+
if (re.test(entry)) { matched = true; break; }
|
|
44
|
+
}
|
|
45
|
+
if (!matched) continue;
|
|
46
|
+
|
|
47
|
+
try {
|
|
48
|
+
fs.rmSync(path.join(basePath, entry), { recursive: true, force: true });
|
|
49
|
+
cleaned++;
|
|
50
|
+
if (forceDebug) console.log(`[debug] [temp-cleanup] Removed ${basePath}/${entry}`);
|
|
51
|
+
} catch (rmErr) {
|
|
52
|
+
if (forceDebug) console.log(`[debug] [temp-cleanup] Failed to remove ${basePath}/${entry}: ${rmErr.message}`);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return cleaned;
|
|
57
|
+
}
|
|
58
|
+
|
|
23
59
|
/**
|
|
24
60
|
* Clean Chrome temporary files and directories
|
|
25
61
|
* @param {Object} options - Cleanup options
|
|
26
62
|
* @param {boolean} options.includeSnapTemp - Whether to clean snap temp directories
|
|
27
63
|
* @param {boolean} options.forceDebug - Whether to output debug logs
|
|
28
64
|
* @param {boolean} options.comprehensive - Whether to perform comprehensive cleanup of all temp locations
|
|
29
|
-
* @returns {
|
|
65
|
+
* @returns {Object} Cleanup results
|
|
30
66
|
*/
|
|
31
|
-
|
|
67
|
+
function cleanupChromeTempFiles(options = {}) {
|
|
32
68
|
const {
|
|
33
69
|
includeSnapTemp = false,
|
|
34
70
|
forceDebug = false,
|
|
@@ -36,57 +72,20 @@ async function cleanupChromeTempFiles(options = {}) {
|
|
|
36
72
|
} = options;
|
|
37
73
|
|
|
38
74
|
try {
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
'rm -rf /tmp/com.google.Chrome.* 2>/dev/null || true',
|
|
43
|
-
'rm -rf /tmp/.com.google.Chrome.* 2>/dev/null || true',
|
|
44
|
-
'rm -rf /tmp/.org.chromium.Chromium.* 2>/dev/null || true',
|
|
45
|
-
'rm -rf /tmp/puppeteer-* 2>/dev/null || true',
|
|
46
|
-
'rm -rf /dev/shm/.com.google.Chrome.* 2>/dev/null || true',
|
|
47
|
-
'rm -rf /dev/shm/.org.chromium.Chromium.* 2>/dev/null || true'
|
|
48
|
-
];
|
|
49
|
-
|
|
50
|
-
// Add snap-specific cleanup if requested
|
|
51
|
-
if (includeSnapTemp || comprehensive) {
|
|
52
|
-
cleanupCommands.push('rm -rf /dev/shm/com.google.Chrome.* 2>/dev/null || true');
|
|
53
|
-
cleanupCommands.push(
|
|
54
|
-
'rm -rf /tmp/snap-private-tmp/snap.chromium/tmp/.org.chromium.Chromium.* 2>/dev/null || true',
|
|
55
|
-
'rm -rf /tmp/snap-private-tmp/snap.chromium/tmp/puppeteer-* 2>/dev/null || true'
|
|
56
|
-
);
|
|
57
|
-
}
|
|
75
|
+
const paths = comprehensive || includeSnapTemp
|
|
76
|
+
? CHROME_TEMP_PATHS
|
|
77
|
+
: CHROME_TEMP_PATHS.slice(0, 2); // /tmp and /dev/shm only
|
|
58
78
|
|
|
59
79
|
let totalCleaned = 0;
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
try {
|
|
63
|
-
// Extract glob pattern and count matches before deletion
|
|
64
|
-
const globPattern = command.match(/rm -rf ([^ ]+)/)?.[1];
|
|
65
|
-
if (!globPattern) continue;
|
|
66
|
-
const fileCount = parseInt(execSync(`ls -1d ${globPattern} 2>/dev/null | wc -l || echo 0`, { stdio: 'pipe' }).toString().trim()) || 0;
|
|
67
|
-
|
|
68
|
-
if (fileCount > 0) {
|
|
69
|
-
execSync(command, { stdio: 'ignore' });
|
|
70
|
-
totalCleaned += fileCount;
|
|
71
|
-
|
|
72
|
-
if (forceDebug) {
|
|
73
|
-
console.log(`[debug] [temp-cleanup] Cleaned ${fileCount} items from ${globPattern}`);
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
} catch (cmdErr) {
|
|
77
|
-
// Ignore individual command errors but log in debug mode
|
|
78
|
-
if (forceDebug) {
|
|
79
|
-
console.log(`[debug] [temp-cleanup] Cleanup command failed: ${command} (${cmdErr.message})`);
|
|
80
|
-
}
|
|
81
|
-
}
|
|
80
|
+
for (const basePath of paths) {
|
|
81
|
+
totalCleaned += cleanTempDir(basePath, forceDebug);
|
|
82
82
|
}
|
|
83
83
|
|
|
84
84
|
if (forceDebug) {
|
|
85
|
-
console.log(`[debug] [temp-cleanup]
|
|
85
|
+
console.log(`[debug] [temp-cleanup] Cleanup completed (${totalCleaned} items)`);
|
|
86
86
|
}
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
return { success: true, itemsCleaned: totalCleaned };
|
|
89
|
-
|
|
90
89
|
} catch (cleanupErr) {
|
|
91
90
|
if (forceDebug) {
|
|
92
91
|
console.log(`[debug] [temp-cleanup] Chrome cleanup error: ${cleanupErr.message}`);
|
|
@@ -96,72 +95,38 @@ async function cleanupChromeTempFiles(options = {}) {
|
|
|
96
95
|
}
|
|
97
96
|
|
|
98
97
|
/**
|
|
99
|
-
* Comprehensive temp file cleanup that
|
|
98
|
+
* Comprehensive temp file cleanup that checks all known Chrome temp locations
|
|
100
99
|
* @param {Object} options - Cleanup options
|
|
101
100
|
* @param {boolean} options.forceDebug - Whether to output debug logs
|
|
102
101
|
* @param {boolean} options.verbose - Whether to show verbose output
|
|
103
|
-
* @returns {
|
|
102
|
+
* @returns {Object} Cleanup results
|
|
104
103
|
*/
|
|
105
|
-
|
|
104
|
+
function comprehensiveChromeTempCleanup(options = {}) {
|
|
106
105
|
const { forceDebug = false, verbose = false } = options;
|
|
107
|
-
|
|
106
|
+
|
|
108
107
|
try {
|
|
109
|
-
let totalCleaned = 0;
|
|
110
|
-
|
|
111
108
|
if (verbose && !forceDebug) {
|
|
112
109
|
console.log(`[temp-cleanup] Scanning Chrome/Puppeteer temporary files...`);
|
|
113
110
|
}
|
|
114
|
-
|
|
111
|
+
|
|
112
|
+
let totalCleaned = 0;
|
|
115
113
|
for (const basePath of CHROME_TEMP_PATHS) {
|
|
116
|
-
|
|
117
|
-
try {
|
|
118
|
-
const pathExists = fs.existsSync(basePath);
|
|
119
|
-
|
|
120
|
-
if (!pathExists) {
|
|
121
|
-
if (forceDebug) {
|
|
122
|
-
console.log(`[debug] [temp-cleanup] Skipping non-existent path: ${basePath}`);
|
|
123
|
-
}
|
|
124
|
-
continue;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
for (const pattern of CHROME_TEMP_PATTERNS) {
|
|
128
|
-
const fullPattern = `${basePath}/${pattern}`;
|
|
129
|
-
|
|
130
|
-
// Count items before deletion
|
|
131
|
-
const countCommand = `ls -1d ${fullPattern} 2>/dev/null | wc -l || echo 0`;
|
|
132
|
-
const itemCount = parseInt(execSync(countCommand, { stdio: 'pipe' }).toString().trim()) || 0;
|
|
133
|
-
|
|
134
|
-
if (itemCount > 0) {
|
|
135
|
-
const deleteCommand = `rm -rf ${fullPattern} 2>/dev/null || true`;
|
|
136
|
-
execSync(deleteCommand, { stdio: 'ignore' });
|
|
137
|
-
totalCleaned += itemCount;
|
|
138
|
-
|
|
139
|
-
if (forceDebug) {
|
|
140
|
-
console.log(`[debug] [temp-cleanup] Removed ${itemCount} items matching ${fullPattern}`);
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
} catch (pathErr) {
|
|
145
|
-
if (forceDebug) {
|
|
146
|
-
console.log(`[debug] [temp-cleanup] Error checking path ${basePath}: ${pathErr.message}`);
|
|
147
|
-
}
|
|
148
|
-
}
|
|
114
|
+
totalCleaned += cleanTempDir(basePath, forceDebug);
|
|
149
115
|
}
|
|
150
|
-
|
|
116
|
+
|
|
151
117
|
if (verbose && totalCleaned > 0) {
|
|
152
|
-
console.log(`[temp-cleanup]
|
|
118
|
+
console.log(`[temp-cleanup] Removed ${totalCleaned} temporary file(s)/folder(s)`);
|
|
153
119
|
} else if (verbose && totalCleaned === 0) {
|
|
154
|
-
console.log(`[temp-cleanup]
|
|
120
|
+
console.log(`[temp-cleanup] Clean - no remaining temporary files`);
|
|
155
121
|
} else if (forceDebug) {
|
|
156
122
|
console.log(`[debug] [temp-cleanup] Comprehensive cleanup completed (${totalCleaned} items)`);
|
|
157
123
|
}
|
|
158
|
-
|
|
124
|
+
|
|
159
125
|
return { success: true, itemsCleaned: totalCleaned };
|
|
160
|
-
|
|
161
126
|
} catch (err) {
|
|
162
127
|
const errorMsg = `Comprehensive temp file cleanup failed: ${err.message}`;
|
|
163
128
|
if (verbose) {
|
|
164
|
-
console.warn(`[temp-cleanup]
|
|
129
|
+
console.warn(`[temp-cleanup] ${errorMsg}`);
|
|
165
130
|
} else if (forceDebug) {
|
|
166
131
|
console.log(`[debug] [temp-cleanup] ${errorMsg}`);
|
|
167
132
|
}
|
|
@@ -317,7 +282,7 @@ async function forceBrowserKill(browser, forceDebug = false) {
|
|
|
317
282
|
}
|
|
318
283
|
|
|
319
284
|
// Wait for graceful termination
|
|
320
|
-
await new Promise(resolve => setTimeout(resolve,
|
|
285
|
+
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
321
286
|
|
|
322
287
|
// Force kill any remaining processes with SIGKILL
|
|
323
288
|
for (const pid of pidsToKill) {
|
package/lib/browserhealth.js
CHANGED
|
@@ -438,11 +438,12 @@ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIM
|
|
|
438
438
|
let closedCount = 0;
|
|
439
439
|
for (const page of safePagesToClose) {
|
|
440
440
|
try {
|
|
441
|
-
// Cache both page state and URL for this iteration
|
|
442
441
|
const isPageClosed = page.isClosed();
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
442
|
+
|
|
443
|
+
// Re-check processing state — may have changed since safety check
|
|
444
|
+
const usage = pageUsageTracker.get(page);
|
|
445
|
+
if (!isPageClosed && !(usage && usage.isProcessing)) {
|
|
446
|
+
const pageUrl = page.url();
|
|
446
447
|
await page.close();
|
|
447
448
|
pageCreationTracker.delete(page); // Remove from tracker
|
|
448
449
|
pageUsageTracker.delete(page);
|
|
@@ -569,6 +570,16 @@ function trackPageForRealtime(page) {
|
|
|
569
570
|
updatePageUsage(page, false); // Initialize usage tracking
|
|
570
571
|
}
|
|
571
572
|
|
|
573
|
+
/**
|
|
574
|
+
* Removes a page from all tracking Maps immediately.
|
|
575
|
+
* Call this before page.close() to prevent stale entries during concurrent execution.
|
|
576
|
+
* @param {import('puppeteer').Page} page - Page to untrack
|
|
577
|
+
*/
|
|
578
|
+
function untrackPage(page) {
|
|
579
|
+
pageCreationTracker.delete(page);
|
|
580
|
+
pageUsageTracker.delete(page);
|
|
581
|
+
}
|
|
582
|
+
|
|
572
583
|
/**
|
|
573
584
|
* Purges stale entries from tracking Maps (pages that were closed without cleanup)
|
|
574
585
|
* Should be called periodically to prevent memory leaks
|
|
@@ -1225,6 +1236,7 @@ module.exports = {
|
|
|
1225
1236
|
isBrowserHealthy,
|
|
1226
1237
|
isCriticalProtocolError,
|
|
1227
1238
|
updatePageUsage,
|
|
1239
|
+
untrackPage,
|
|
1228
1240
|
cleanupPageBeforeReload,
|
|
1229
1241
|
purgeStaleTrackers
|
|
1230
1242
|
};
|
package/lib/cdp.js
CHANGED
|
@@ -28,15 +28,19 @@
|
|
|
28
28
|
const { formatLogMessage } = require('./colorize');
|
|
29
29
|
|
|
30
30
|
/**
|
|
31
|
-
*
|
|
31
|
+
* Race a promise against a timeout, clearing the timer when the promise settles.
|
|
32
|
+
* Prevents leaked setTimeout handles that hold closure references until they fire.
|
|
33
|
+
* @param {Promise} promise - The operation to race
|
|
32
34
|
* @param {number} ms - Timeout in milliseconds
|
|
33
35
|
* @param {string} message - Error message for timeout
|
|
34
|
-
* @returns {Promise}
|
|
36
|
+
* @returns {Promise} Resolves/rejects with the operation result, or rejects on timeout
|
|
35
37
|
*/
|
|
36
|
-
function
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
38
|
+
function raceWithTimeout(promise, ms, message) {
|
|
39
|
+
let timeoutId;
|
|
40
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
41
|
+
timeoutId = setTimeout(() => reject(new Error(message)), ms);
|
|
42
|
+
});
|
|
43
|
+
return Promise.race([promise, timeoutPromise]).finally(() => clearTimeout(timeoutId));
|
|
40
44
|
}
|
|
41
45
|
|
|
42
46
|
/**
|
|
@@ -59,10 +63,7 @@ const createSessionResult = (session = null, cleanup = async () => {}, isEnhance
|
|
|
59
63
|
* @returns {Promise<import('puppeteer').Page>} Page instance
|
|
60
64
|
*/
|
|
61
65
|
async function createPageWithTimeout(browser, timeout = 30000) {
|
|
62
|
-
return
|
|
63
|
-
browser.newPage(),
|
|
64
|
-
createTimeoutPromise(timeout, 'Page creation timeout - browser may be unresponsive')
|
|
65
|
-
]);
|
|
66
|
+
return raceWithTimeout(browser.newPage(), timeout, 'Page creation timeout - browser may be unresponsive');
|
|
66
67
|
}
|
|
67
68
|
|
|
68
69
|
/**
|
|
@@ -73,24 +74,18 @@ async function createPageWithTimeout(browser, timeout = 30000) {
|
|
|
73
74
|
*/
|
|
74
75
|
async function setRequestInterceptionWithTimeout(page, timeout = 15000) {
|
|
75
76
|
try {
|
|
76
|
-
await
|
|
77
|
-
page.setRequestInterception(true),
|
|
78
|
-
createTimeoutPromise(timeout, 'Request interception timeout - first attempt')
|
|
79
|
-
]);
|
|
77
|
+
await raceWithTimeout(page.setRequestInterception(true), timeout, 'Request interception timeout - first attempt');
|
|
80
78
|
} catch (firstError) {
|
|
81
79
|
// Check for immediate critical failures
|
|
82
|
-
if (firstError.message.includes('Target closed') ||
|
|
80
|
+
if (firstError.message.includes('Target closed') ||
|
|
83
81
|
firstError.message.includes('Session closed') ||
|
|
84
82
|
firstError.message.includes('Browser has been closed')) {
|
|
85
83
|
throw new Error('CRITICAL_BROWSER_ERROR: ' + firstError.message);
|
|
86
84
|
}
|
|
87
|
-
|
|
85
|
+
|
|
88
86
|
// Retry with extended timeout
|
|
89
87
|
try {
|
|
90
|
-
await
|
|
91
|
-
page.setRequestInterception(true),
|
|
92
|
-
createTimeoutPromise(timeout * 2, 'Request interception timeout - retry failed')
|
|
93
|
-
]);
|
|
88
|
+
await raceWithTimeout(page.setRequestInterception(true), timeout * 2, 'Request interception timeout - retry failed');
|
|
94
89
|
} catch (retryError) {
|
|
95
90
|
if (retryError.message.includes('Network.enable timed out') ||
|
|
96
91
|
retryError.message.includes('ProtocolError')) {
|
|
@@ -168,10 +163,7 @@ async function createCDPSession(page, currentUrl, options = {}) {
|
|
|
168
163
|
try {
|
|
169
164
|
// Create CDP session using modern Puppeteer 20+ API
|
|
170
165
|
// Add timeout protection for CDP session creation
|
|
171
|
-
cdpSession = await
|
|
172
|
-
page.createCDPSession(),
|
|
173
|
-
createTimeoutPromise(20000, 'CDP session creation timeout')
|
|
174
|
-
]);
|
|
166
|
+
cdpSession = await raceWithTimeout(page.createCDPSession(), 20000, 'CDP session creation timeout');
|
|
175
167
|
|
|
176
168
|
// Enable network domain - required for network event monitoring
|
|
177
169
|
await cdpSession.send('Network.enable');
|
|
@@ -255,150 +247,6 @@ async function createCDPSession(page, currentUrl, options = {}) {
|
|
|
255
247
|
}
|
|
256
248
|
}
|
|
257
249
|
|
|
258
|
-
/**
|
|
259
|
-
* Validates CDP availability and configuration
|
|
260
|
-
*
|
|
261
|
-
* USAGE IN YOUR APPLICATION:
|
|
262
|
-
* const validation = validateCDPConfig(siteConfig, globalCDPFlag);
|
|
263
|
-
* if (!validation.isValid) {
|
|
264
|
-
* console.warn('CDP configuration issues detected');
|
|
265
|
-
* }
|
|
266
|
-
* validation.recommendations.forEach(rec => console.log('Recommendation:', rec));
|
|
267
|
-
*
|
|
268
|
-
* @param {object} siteConfig - Site configuration object
|
|
269
|
-
* @param {boolean} globalCDP - Global CDP flag
|
|
270
|
-
* @param {Array} cdpSpecificDomains - Array of domains for cdp_specific feature
|
|
271
|
-
* @returns {object} Validation result with recommendations
|
|
272
|
-
*/
|
|
273
|
-
function validateCDPConfig(siteConfig, globalCDP, cdpSpecificDomains = []) {
|
|
274
|
-
const warnings = [];
|
|
275
|
-
const recommendations = [];
|
|
276
|
-
|
|
277
|
-
// Check for conflicting configurations
|
|
278
|
-
if (globalCDP && siteConfig.cdp === false) {
|
|
279
|
-
warnings.push('Site-specific CDP disabled but global CDP is enabled - global setting will override');
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
// Validate cdp_specific configuration
|
|
283
|
-
if (siteConfig.cdp_specific) {
|
|
284
|
-
if (!Array.isArray(siteConfig.cdp_specific)) {
|
|
285
|
-
warnings.push('cdp_specific must be an array of domain strings');
|
|
286
|
-
} else if (siteConfig.cdp_specific.length === 0) {
|
|
287
|
-
warnings.push('cdp_specific is empty - no domains will have CDP enabled');
|
|
288
|
-
} else {
|
|
289
|
-
// Validate domain format
|
|
290
|
-
const hasInvalidDomains = siteConfig.cdp_specific.some(domain =>
|
|
291
|
-
typeof domain !== 'string' || domain.trim() === ''
|
|
292
|
-
);
|
|
293
|
-
|
|
294
|
-
if (hasInvalidDomains) {
|
|
295
|
-
// Only filter invalid domains if we need to show them
|
|
296
|
-
const invalidDomains = siteConfig.cdp_specific.filter(domain =>
|
|
297
|
-
typeof domain !== 'string' || domain.trim() === ''
|
|
298
|
-
);
|
|
299
|
-
warnings.push(`cdp_specific contains invalid domains: ${invalidDomains.join(', ')}`);
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
// Performance recommendations
|
|
305
|
-
const cdpEnabled = globalCDP || siteConfig.cdp === true ||
|
|
306
|
-
(Array.isArray(siteConfig.cdp_specific) && siteConfig.cdp_specific.length > 0);
|
|
307
|
-
|
|
308
|
-
if (cdpEnabled) {
|
|
309
|
-
recommendations.push('CDP logging enabled - this may impact performance for high-traffic sites');
|
|
310
|
-
|
|
311
|
-
if (siteConfig.timeout && siteConfig.timeout < 30000) {
|
|
312
|
-
recommendations.push('Consider increasing timeout when using CDP logging to avoid protocol timeouts');
|
|
313
|
-
}
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
return {
|
|
317
|
-
isValid: true,
|
|
318
|
-
warnings,
|
|
319
|
-
recommendations
|
|
320
|
-
};
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
/**
|
|
324
|
-
* Enhanced CDP session with additional network monitoring features
|
|
325
|
-
*
|
|
326
|
-
* ADVANCED FEATURES:
|
|
327
|
-
* - JavaScript exception monitoring
|
|
328
|
-
* - Security state change detection
|
|
329
|
-
* - Failed network request tracking
|
|
330
|
-
* - Enhanced error reporting
|
|
331
|
-
*
|
|
332
|
-
* USE CASES:
|
|
333
|
-
* - Security analysis requiring comprehensive monitoring
|
|
334
|
-
* - Debugging complex single-page applications
|
|
335
|
-
* - Performance analysis of web applications
|
|
336
|
-
* - Research requiring detailed browser insights
|
|
337
|
-
*
|
|
338
|
-
* PERFORMANCE IMPACT:
|
|
339
|
-
* - Adds additional CDP domain subscriptions
|
|
340
|
-
* - Higher memory usage due to more event listeners
|
|
341
|
-
* - Recommended only for detailed analysis scenarios
|
|
342
|
-
*
|
|
343
|
-
* @param {import('puppeteer').Page} page - The Puppeteer page instance
|
|
344
|
-
* @param {string} currentUrl - The URL being processed
|
|
345
|
-
* @param {object} options - Configuration options (same as createCDPSession)
|
|
346
|
-
* @returns {Promise<object>} Enhanced CDP session object with isEnhanced flag
|
|
347
|
-
*/
|
|
348
|
-
async function createEnhancedCDPSession(page, currentUrl, options = {}) {
|
|
349
|
-
const basicSession = await createCDPSession(page, currentUrl, options);
|
|
350
|
-
|
|
351
|
-
if (!basicSession.session) {
|
|
352
|
-
// Ensure enhanced flag is set even for null sessions
|
|
353
|
-
return { ...basicSession, isEnhanced: false };
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
const { session } = basicSession;
|
|
357
|
-
const { forceDebug } = options;
|
|
358
|
-
|
|
359
|
-
try {
|
|
360
|
-
// Enable additional CDP domains for enhanced monitoring
|
|
361
|
-
await session.send('Runtime.enable'); // For JavaScript exceptions
|
|
362
|
-
await session.send('Security.enable'); // For security state changes
|
|
363
|
-
|
|
364
|
-
// Monitor JavaScript exceptions - useful for debugging problematic sites
|
|
365
|
-
session.on('Runtime.exceptionThrown', (params) => {
|
|
366
|
-
if (forceDebug) {
|
|
367
|
-
console.log(formatLogMessage('debug', `[cdp][exception] ${params.exceptionDetails.text}`));
|
|
368
|
-
}
|
|
369
|
-
});
|
|
370
|
-
|
|
371
|
-
// Monitor security state changes - detect mixed content, certificate issues, etc.
|
|
372
|
-
session.on('Security.securityStateChanged', (params) => {
|
|
373
|
-
if (forceDebug && params.securityState !== 'secure') {
|
|
374
|
-
console.log(formatLogMessage('debug', `[cdp][security] Security state: ${params.securityState}`));
|
|
375
|
-
}
|
|
376
|
-
});
|
|
377
|
-
|
|
378
|
-
// Monitor failed network requests - useful for understanding site issues
|
|
379
|
-
session.on('Network.loadingFailed', (params) => {
|
|
380
|
-
if (forceDebug) {
|
|
381
|
-
console.log(formatLogMessage('debug', `[cdp][failed] ${params.errorText}: ${params.requestId}`));
|
|
382
|
-
}
|
|
383
|
-
});
|
|
384
|
-
|
|
385
|
-
return {
|
|
386
|
-
session,
|
|
387
|
-
cleanup: basicSession.cleanup,
|
|
388
|
-
isEnhanced: true // Flag to indicate enhanced features are active
|
|
389
|
-
};
|
|
390
|
-
|
|
391
|
-
} catch (enhancedErr) {
|
|
392
|
-
if (forceDebug) {
|
|
393
|
-
console.log(formatLogMessage('debug', `Enhanced CDP features failed, falling back to basic session: ${enhancedErr.message}`));
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
// Graceful degradation: return basic session if enhanced features fail
|
|
397
|
-
// This ensures your application continues working even if advanced features break
|
|
398
|
-
return { ...basicSession, isEnhanced: false };
|
|
399
|
-
}
|
|
400
|
-
}
|
|
401
|
-
|
|
402
250
|
// EXPORT INTERFACE FOR OTHER APPLICATIONS:
|
|
403
251
|
// This module provides a clean, reusable interface for CDP integration.
|
|
404
252
|
// Simply require this module and use the exported functions.
|
|
@@ -406,7 +254,7 @@ async function createEnhancedCDPSession(page, currentUrl, options = {}) {
|
|
|
406
254
|
// CUSTOMIZATION TIPS:
|
|
407
255
|
// 1. Replace './colorize' import with your own logging system
|
|
408
256
|
// 2. Modify the request logging format in the Network.requestWillBeSent handler
|
|
409
|
-
// 3. Add additional CDP domain subscriptions in
|
|
257
|
+
// 3. Add additional CDP domain subscriptions in createCDPSession
|
|
410
258
|
// 4. Customize error categorization in the catch blocks
|
|
411
259
|
//
|
|
412
260
|
// TROUBLESHOOTING:
|
package/lib/compare.js
CHANGED
|
@@ -9,10 +9,6 @@ const path = require('path');
|
|
|
9
9
|
*/
|
|
10
10
|
function loadComparisonRules(compareFilePath, forceDebug = false) {
|
|
11
11
|
try {
|
|
12
|
-
if (!fs.existsSync(compareFilePath)) {
|
|
13
|
-
throw new Error(`Comparison file not found: ${compareFilePath}`);
|
|
14
|
-
}
|
|
15
|
-
|
|
16
12
|
const content = fs.readFileSync(compareFilePath, 'utf8');
|
|
17
13
|
const lines = content.split('\n')
|
|
18
14
|
.map(line => line.trim())
|