@fanboynz/network-scanner 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +76 -0
- package/CLAUDE.md +2 -1
- package/README.md +33 -5
- package/eslint.config.mjs +13 -1
- package/lib/browserhealth.js +28 -94
- package/lib/dns.js +238 -0
- package/lib/domain-cache.js +14 -127
- package/lib/fingerprint.js +220 -97
- package/lib/fingerprint.md +94 -0
- package/lib/ghost-cursor.js +29 -11
- package/lib/interaction.js +4 -0
- package/lib/nettools.js +154 -51
- package/lib/output.js +24 -13
- package/lib/proxy.js +6 -2
- package/lib/redirect.js +4 -1
- package/lib/smart-cache.js +9 -1
- package/lib/socks-relay.js +14 -9
- package/lib/validate_rules.js +16 -1
- package/nwss.1 +76 -15
- package/nwss.js +389 -113
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,82 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the Network Scanner (nwss.js) project.
|
|
4
4
|
|
|
5
|
+
## [3.2.0] - 2026-06-04
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- **`output_regex`** site option — a per-site regex whose capture group 1 (or whole match) becomes the rule body, so output can be a path-prefix rule like `||host/script/` instead of `||host^`. Collapses randomized filenames under a stable path into one rule and lets you block a folder on a host that also serves legit content; falls back to `||host^` when the regex doesn't match. Adblock-only — domain-based formats (dnsmasq/unbound/pi-hole/hosts/plain) emit the bare host. Compiled once per pattern (memoized) and validated at config load.
|
|
9
|
+
- **dig resolver failover** — `digLookup` now fails over through the `--dns` resolvers on timeout / no-reply / `REFUSED` / `SERVFAIL` (up to 3 attempts, `+time=2 +tries=1` each), matching the resilience the whois retry and DNS pre-check rotation already had. With no `--dns`, the system-resolver path keeps dig's native `resolv.conf` rotation unchanged.
|
|
10
|
+
|
|
11
|
+
### Changed
|
|
12
|
+
- **Ghost-cursor coordinate clicks now use the same realistic press as the built-in content clicks** (`humanClick`): hover dwell + mousedown/hold/mouseup, plus hand-tremor during the hold and a mouseup drift (so mousedown ≠ mouseup coordinates) when `realistic_click` is set — replacing a 0ms `page.mouse.click`.
|
|
13
|
+
- **Ghost-cursor clicks honor `interact_click_count`** (default 3, cap 20) instead of firing a single click — ad SDKs often swallow the 1st/2nd click as warmup. The bezier movement loop reserves part of `ghost_cursor_duration` for the clicks (raise the duration to fit more; the default 2000ms fits ~1 realistic click).
|
|
14
|
+
- **`dig` success is judged by RCODE, not stderr** — a dig that prints a transient `communications error` warning but still returns a valid `ANSWER SECTION` is no longer discarded.
|
|
15
|
+
- **dig-only configs skip the whois root-domain parse** per request (small per-request saving when no `whois`/`whois-or` is configured).
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
- **`max_redirects: 0`** now means "follow none" instead of silently becoming 10 (the `|| 10` falsy-zero bug in `nwss.js` and `lib/redirect.js`).
|
|
19
|
+
- **A `REFUSED`/`SERVFAIL` dig that exhausts all resolvers returns failure** so it isn't cached — a transient resolver-side error no longer poisons a domain for the cache TTL.
|
|
20
|
+
- **Ghost-cursor coordinate click no longer reports false success** — it returned `true` (and logged "Clicked") even when the click was silently skipped for lack of a page; it now returns `false` and logs the skip.
|
|
21
|
+
|
|
22
|
+
### Removed
|
|
23
|
+
- **`follow_redirects`** site option — documented in `--help`, the man page, the README, and example configs but never wired to any runtime behavior; removed from the docs. Use `max_redirects` instead (`0` = follow none).
|
|
24
|
+
|
|
25
|
+
### Security
|
|
26
|
+
- **dig argv-injection guard** — `digLookup` rejects non-hostname-shaped input before shelling out. `dig` has no `--` end-of-options marker (unlike whois) and parses `@`/`-`/`+`-leading argv tokens as options, so a crafted "domain" like `@evil-resolver` (redirects the query to an arbitrary server) or `-f /path` (reads a file as a query batch) is now rejected — out-of-charset or dash-leading values fall back to no-match.
|
|
27
|
+
|
|
28
|
+
## [3.1.2] - 2026-05-30
|
|
29
|
+
|
|
30
|
+
### Changed
|
|
31
|
+
- **Fingerprint identity pinned to Stable Chrome 148**, not whatever Chrome-for-Testing puppeteer bundles (currently 149, ahead of Stable). The spoof must blend with the real-world population; claiming an unreleased build is itself a tell. The Chrome major + build (`CHROME_BUILD`) + GREASE brand (`CHROME_GREASE_BRAND`) are now single constants — see `lib/fingerprint.md`.
|
|
32
|
+
- **UA Client Hints made fully consistent and matched to real Chrome 148** (verified field-for-field against a live desktop): brand-list order + GREASE string (`Not/A)Brand`), and the full-version build (`148.0.7778.217`) sourced from one place so JS `getHighEntropyValues` and the HTTP `Sec-CH-UA-Full-Version*` headers can't drift. Added `wow64`, `model`, `formFactors`, `uaFullVersion`, and `Sec-CH-UA-WoW64`/`-Model`/`-Form-Factors` headers; Windows `platformVersion` → `19.0.0`.
|
|
33
|
+
- **`navigator.deviceMemory` and `Sec-CH-Device-Memory` both pinned to `8`** (consistent JS↔HTTP), hiding the host's real RAM; `hardwareConcurrency` reports 4–8 (hides datacenter core count).
|
|
34
|
+
- **Dependencies**: puppeteer / puppeteer-core 25.1.0, lru-cache 11.5.1.
|
|
35
|
+
|
|
36
|
+
### Fixed
|
|
37
|
+
- **Timezone is now spoofed via CDP `emulateTimezone`** instead of JS overrides, so `Date`, `Intl`, and `getTimezoneOffset` are all consistent and DST-correct. The old JS patching left the real `Date` in the host zone — an 8-hour `Date`-vs-`Intl` contradiction and a leaked host timezone.
|
|
38
|
+
- **Closed several headless tells**: Battery now reports the plugged-in default (`charging:true, level:1`); `navigator.bluetooth`, `navigator.share`/`canShare` stubs added (present in real Chrome, absent in headless); `speechSynthesis.getVoices()` returns the claimed-OS voice set (`instanceof`-correct).
|
|
39
|
+
- **proxy**: a string `proxy_bypass`/`socks5_bypass` (instead of an array) no longer throws `bypass.join is not a function` in the browser-launch path.
|
|
40
|
+
- **socks-relay**: a client that disconnects during the upstream-connect await is now handled, so a tunnel isn't opened for a gone client and the watchdog clears immediately.
|
|
41
|
+
- **smart-cache**: the memory-check and auto-save `setInterval`s are now `unref`'d, so an error path that skips `destroy()` can no longer hang the process.
|
|
42
|
+
|
|
43
|
+
### Removed
|
|
44
|
+
- Dead code: `browserhealth` `testNetworkCapability` + `purgeStaleTrackers` (zero callers), and a redundant 2-voice `speechSynthesis` block superseded by the full voice set.
|
|
45
|
+
|
|
46
|
+
### Added
|
|
47
|
+
- **`lib/fingerprint.md`** — fingerprint spoofing coverage tables (surfaces, mitigations, gating flags) and known limitations.
|
|
48
|
+
|
|
49
|
+
## [3.1.0] - 2026-05-29
|
|
50
|
+
|
|
51
|
+
### Added
|
|
52
|
+
- **`realistic_click`** site flag — denser mouse approach, hold tremor, and mouseup drift for sites that score click realism.
|
|
53
|
+
- **`interact_click_count`** site override for popunder-discovery click volume (default content-click count also raised 2 → 3).
|
|
54
|
+
- **`clear_sitedata_full_on_reload`** site flag — full storage clear between reloads; quick mode now also clears localStorage/sessionStorage.
|
|
55
|
+
- **regex-tool rewritten** as a real `filterRegex` builder/tester: literal↔standard↔JSON conversion, multi-pattern + `regex_and`, and testing against real request URLs (matching mirrors the scanner exactly).
|
|
56
|
+
- **Fingerprint coverage**: per-domain-seeded Battery / `navigator.connection` values, `AudioBuffer` fingerprint defeat, `PerformanceNavigationTiming` jitter, `userActivation`; UA strings bumped to Chrome 148 / Firefox 151 / Safari 19.5.
|
|
57
|
+
|
|
58
|
+
### Changed
|
|
59
|
+
- **`userAgent` now defaults to `"chrome"`** when a site doesn't set one — previously sites without it leaked the bundled `HeadlessChrome` UA.
|
|
60
|
+
- **`Sec-CH-UA` headers and the curl content-fetch UA derive from the single UA source**, so Client Hints can't drift from `navigator.userAgent`.
|
|
61
|
+
- **VPN configs force scan concurrency to 1** — the shared system routing table isn't concurrency-safe.
|
|
62
|
+
- **Interaction time ceiling scales with the work envelope** (click count / `realistic_click`) instead of a flat 15s.
|
|
63
|
+
|
|
64
|
+
### Fixed
|
|
65
|
+
- **Per-URL timeout scales** with site timeout/delay/reload (+8s recovery grace) instead of a flat 75s that discarded partial-match recovery on multi-URL scans.
|
|
66
|
+
- **Interaction hard cap is now actually enforced** (was cooperative, overshooting to 20s+ under concurrency).
|
|
67
|
+
- **WireGuard** inline temp-config leaked the private key on failed connect and broke retries; temp dir is now per-PID so concurrent processes can't wipe each other's config.
|
|
68
|
+
- **nettools**: fixed a dig dedup race (concurrent same-domain double lookups); whois no longer discards valid records over non-fatal stderr.
|
|
69
|
+
- **Orphan resource leaks** on `Promise.race` timeout (cdp.js, clear_sitedata.js, browserhealth.js) and several un-`unref`'d `setTimeout` handles.
|
|
70
|
+
- **Config keys validated at startup** with boolean-like coercion, preventing silent misconfiguration.
|
|
71
|
+
|
|
72
|
+
### Security
|
|
73
|
+
- **OpenVPN** `pkill`/`ping`/`curl` calls moved from shell-interpolated `execSync` to `spawnSync` arg arrays (command-injection).
|
|
74
|
+
- **WireGuard/OpenVPN interface & connection names validated** against a strict charset before use in paths/commands.
|
|
75
|
+
|
|
76
|
+
### Performance
|
|
77
|
+
- **adblock**: O(1) exact-domain lookup for `$third-party` / `$first-party` rules.
|
|
78
|
+
- Parallelized site-data clearing and window-cleanup checks.
|
|
79
|
+
- Removed dead code across cdp, domain-cache, searchstring, compress, adblock-rust, and nettools.
|
|
80
|
+
|
|
5
81
|
## [3.0.3] - 2026-05-26
|
|
6
82
|
|
|
7
83
|
### Improved
|
package/CLAUDE.md
CHANGED
|
@@ -6,7 +6,7 @@ Puppeteer-based network scanner for analyzing web traffic, generating adblock fi
|
|
|
6
6
|
|
|
7
7
|
- `nwss.js` — Main entry point (~5,800 lines). CLI args, URL processing, orchestration.
|
|
8
8
|
- `config.json` — Default scan configuration (sites, filters, options).
|
|
9
|
-
- `lib/` —
|
|
9
|
+
- `lib/` — 33 focused, single-purpose modules:
|
|
10
10
|
- `fingerprint.js` — Bot detection evasion (device/GPU/timezone spoofing)
|
|
11
11
|
- `cloudflare.js` — Cloudflare challenge detection and solving
|
|
12
12
|
- `browserhealth.js` — Memory management and browser lifecycle
|
|
@@ -14,6 +14,7 @@ Puppeteer-based network scanner for analyzing web traffic, generating adblock fi
|
|
|
14
14
|
- `ghost-cursor.js` — Bezier-curve cursor pathing for human-like mouse movement
|
|
15
15
|
- `smart-cache.js` — Multi-layer caching with persistence
|
|
16
16
|
- `nettools.js` — WHOIS/dig integration
|
|
17
|
+
- `dns.js` — DNS pre-check resolver: multi-nameserver rotation + `--dns` override (pre-check only; not Chrome/dig)
|
|
17
18
|
- `output.js` — Multi-format rule output (adblock, dnsmasq, unbound, pihole, etc.)
|
|
18
19
|
- `proxy.js` — SOCKS5/HTTP proxy support
|
|
19
20
|
- `socks-relay.js` — Local SOCKS proxy relay/chain helper
|
package/README.md
CHANGED
|
@@ -17,6 +17,7 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
|
|
|
17
17
|
- Subdomain handling (collapse to root or full subdomain)
|
|
18
18
|
- Optionally match only first-party, third-party, or both
|
|
19
19
|
- Enhanced redirect handling with JavaScript and meta refresh detection
|
|
20
|
+
- Capture and drive popup/popunder chains (`capture_popups` + `interact_popups`) so domains reachable only via a clicked popup still match
|
|
20
21
|
- Per-site proxy routing (SOCKS5, SOCKS4, HTTP, HTTPS) with pre-flight health checks
|
|
21
22
|
|
|
22
23
|
---
|
|
@@ -50,7 +51,6 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
|
|
|
50
51
|
|
|
51
52
|
| Argument | Description |
|
|
52
53
|
|:---------------------------|:------------|
|
|
53
|
-
| `--verbose` | Force verbose mode globally |
|
|
54
54
|
| `--debug` | Force debug mode globally |
|
|
55
55
|
| `--silent` | Suppress normal console logs |
|
|
56
56
|
| `--titles` | Add `! <url>` title before each site's group |
|
|
@@ -66,7 +66,7 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
|
|
|
66
66
|
| `--use-puppeteer-core` | Use `puppeteer-core` with system Chrome instead of bundled Chromium |
|
|
67
67
|
| `--use-obscura` | Connect to running Obscura CDP server (`ws://127.0.0.1:9222` or `OBSCURA_WS` env). Skips fingerprint injection — Obscura provides built-in stealth |
|
|
68
68
|
| `--load-extension <path>` | Load unpacked Chrome extension from directory (can be used multiple times) |
|
|
69
|
-
| `--dns-cache` | Persist dig/whois results to disk between runs (20hr TTL, 2000-entry cap each, `.digcache`/`.whoiscache`). Disk writes are atomic (tmp + rename); corrupt cache files are detected on load with a `[dns-cache]` warn line and reset cleanly. |
|
|
69
|
+
| `--dns-cache` | Persist dig/whois results to disk between runs (20hr TTL, 2000-entry cap each, `.digcache`/`.whoiscache`), **plus** the DNS pre-check negative cache (NXDOMAIN/ENODATA only — never resolver errors — 12h TTL, `.dnsnegcache`) so known-dead hosts aren't re-resolved next run. Disk writes are atomic (tmp + rename); corrupt cache files are detected on load with a `[dns-cache]` warn line and reset cleanly. |
|
|
70
70
|
| `--no-dns-precheck` | Disable per-URL DNS resolution check before page navigation. By default, hosts that dig/whois have already proven live (within the 20hr cache TTL) skip their c-ares pre-check via a positive-resolution index. |
|
|
71
71
|
| `--block-ads=<files>` | Block ads using EasyList format rules (comma-separated: `easylist.txt,easyprivacy.txt`) |
|
|
72
72
|
| `--cdp` | Enable Chrome DevTools Protocol logging (now per-page if enabled) |
|
|
@@ -76,6 +76,8 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
|
|
|
76
76
|
| `--help`, `-h` | Show this help menu |
|
|
77
77
|
| `--version` | Show script version |
|
|
78
78
|
| `--max-concurrent <number>` | Maximum concurrent site processing (1-50, overrides config/default) |
|
|
79
|
+
| `--dns <ip[,ip,...]>` | Resolver(s) for the DNS pre-check **and** nettools' `dig` (one pins, several rotate per query; overrides `/etc/resolv.conf`). Does not affect Chrome navigation or `whois`. Useful when the system resolver is flaky and `dig`-gated domains time out |
|
|
80
|
+
| `--show-dead-domains` | At end of scan, list hostnames that did not resolve / were unreachable (`NXDOMAIN`/`ENODATA` + `ERR_NAME_NOT_RESOLVED`/`ERR_ADDRESS_UNREACHABLE`). Excludes blocks/timeouts (those mean the domain is alive). For pruning dead URLs. |
|
|
79
81
|
| `--cleanup-interval <number>` | Browser restart interval in URLs processed (1-1000, overrides config/default) |
|
|
80
82
|
|
|
81
83
|
### Validation Options
|
|
@@ -152,6 +154,7 @@ Example:
|
|
|
152
154
|
| `userAgent` | `chrome`, `chrome_mac`, `chrome_linux`, `firefox`, `firefox_mac`, `firefox_linux`, `safari` | - | User agent for page |
|
|
153
155
|
| `filterRegex` | String or Array | `.*` | Regex or list of regexes to match requests |
|
|
154
156
|
| `regex_and` | Boolean | `false` | Use AND logic for multiple filterRegex patterns - ALL patterns must match the same URL |
|
|
157
|
+
| `output_regex` | String | — | Regex applied to each matched URL to build the rule body: capture group 1 (or whole match) becomes `\|\|<capture>` instead of `\|\|host^`. E.g. `^https?:\/\/([^\/]+\/[^\/]+\/)` turns `https://host.com/script/abc.js` into `\|\|host.com/script/`. The capture must include the host. No match → falls back to `\|\|host^`. Adblock-only; domain formats (dnsmasq/pihole/hosts/plain) emit the bare host |
|
|
155
158
|
| `comments` | String or Array | - | String of comments or references |
|
|
156
159
|
| `resourceTypes` | Array | `["script", "xhr", "image", "stylesheet"]` | What resource types to monitor |
|
|
157
160
|
| `reload` | Integer | `1` | Number of times to reload page |
|
|
@@ -176,8 +179,7 @@ Example:
|
|
|
176
179
|
|
|
177
180
|
| Field | Values | Default | Description |
|
|
178
181
|
|:---------------------|:-------|:-------:|:------------|
|
|
179
|
-
| `
|
|
180
|
-
| `max_redirects` | Integer | `10` | Maximum number of redirects to follow |
|
|
182
|
+
| `max_redirects` | Integer | `10` | Maximum number of redirects to follow (`0` = follow none) |
|
|
181
183
|
| `js_redirect_timeout` | Milliseconds | `5000` | Time to wait for JavaScript redirects |
|
|
182
184
|
| `detect_js_patterns` | Boolean | `true` | Analyze page source for redirect patterns |
|
|
183
185
|
| `redirect_timeout_multiplier` | Number | `1.5` | Increase timeout for redirected URLs |
|
|
@@ -279,6 +281,8 @@ When a page redirects to a new domain, first-party/third-party detection is base
|
|
|
279
281
|
| `interact_duration` | Milliseconds | `2000` | Duration of interaction simulation |
|
|
280
282
|
| `interact_scrolling` | Boolean | `true` | Enable scrolling simulation |
|
|
281
283
|
| `interact_clicks` | Boolean | `false` | Enable element clicking simulation |
|
|
284
|
+
| `interact_click_count` | Integer | `3` | Number of random content-zone clicks per load (capped at 20). Default 3 = primary + 2 backups, since ad SDKs sometimes suppress the 1st/2nd click as warmup |
|
|
285
|
+
| `realistic_click` | Boolean | `false` | Higher click fidelity: denser mouse approach (15 steps), ±1px hand-tremor micro-moves during the press, and ±1.5px mouseup drift (so mousedown≠mouseup coords) — for sites that score click realism. Costs ~80–120ms/click |
|
|
282
286
|
| `interact_typing` | Boolean | `false` | Enable typing simulation |
|
|
283
287
|
| `interact_intensity` | String | `"medium"` | Interaction simulation intensity: "low", "medium", "high" |
|
|
284
288
|
| `cursor_mode` | `"ghost"` | - | Use ghost-cursor Bezier mouse movements (requires `npm i ghost-cursor`) |
|
|
@@ -295,6 +299,21 @@ When a page redirects to a new domain, first-party/third-party detection is base
|
|
|
295
299
|
| `ignore_similar_threshold` | Integer | - | Override global similarity threshold for this site |
|
|
296
300
|
| `ignore_similar_ignored_domains` | Boolean | - | Override global `ignore_similar_ignored_domains` for this site |
|
|
297
301
|
|
|
302
|
+
### Popup Capture Options
|
|
303
|
+
|
|
304
|
+
Capture (and optionally drive) the popup/popunder windows that ad and redirect
|
|
305
|
+
scripts open, so domains reachable only via a popup chain still match `filterRegex`.
|
|
306
|
+
The same `filterRegex` applies to the whole chain — it must contain every pattern
|
|
307
|
+
you expect along it. Popup capture only fires when the main page is actually
|
|
308
|
+
clicking, so set `interact: true` **and** `interact_clicks: true` as well.
|
|
309
|
+
|
|
310
|
+
| Field | Values | Default | Description |
|
|
311
|
+
|:---------------------|:-------|:-------:|:------------|
|
|
312
|
+
| `capture_popups` | Boolean | `false` | Capture popup windows opened during the scan and evaluate their landing URL + in-popup requests against `filterRegex`/`dig`/`whois` (requires `interact` + `interact_clicks` to fire user-gesture clicks) |
|
|
313
|
+
| `interact_popups` | Boolean | `false` | Mouse-click inside captured popups (3 content-zone clicks) so the chain cascades to its next redirect/ad. Requires `capture_popups`. Clicks popups up to `capture_popups_max_depth − 1` (the deepest captured popup is observed, not clicked) |
|
|
314
|
+
| `capture_popups_max_depth` | Integer | `4` | Max popup-chain depth to capture (`site → p1 → p2 → p3 → destination`). Each extra level multiplies popups + time |
|
|
315
|
+
| `capture_popups_window_ms` | Integer | `5000` | Per-popup capture window (ms) before the popup is auto-closed |
|
|
316
|
+
|
|
298
317
|
### VPN Options
|
|
299
318
|
|
|
300
319
|
Route traffic through a VPN for specific sites. Requires `sudo` privileges. The VPN connection is established before scanning and torn down after the site completes.
|
|
@@ -596,8 +615,11 @@ node nwss.js --max-concurrent 12 --cleanup-interval 300 -o rules.txt
|
|
|
596
615
|
{
|
|
597
616
|
"url": "https://anti-bot-site.com",
|
|
598
617
|
"interact": true,
|
|
618
|
+
"interact_clicks": true,
|
|
599
619
|
"cursor_mode": "ghost",
|
|
600
|
-
"
|
|
620
|
+
"realistic_click": true,
|
|
621
|
+
"interact_click_count": 3,
|
|
622
|
+
"ghost_cursor_duration": 5000,
|
|
601
623
|
"ghost_cursor_speed": 1.2,
|
|
602
624
|
"fingerprint_protection": "random",
|
|
603
625
|
"filterRegex": "tracking|analytics",
|
|
@@ -610,6 +632,12 @@ Or enable globally via CLI:
|
|
|
610
632
|
node nwss.js --ghost-cursor --debug -o rules.txt
|
|
611
633
|
```
|
|
612
634
|
|
|
635
|
+
**Ghost-cursor clicks.** The cursor moves with `cursor_mode: "ghost"`, but it only *clicks* when both `interact: true` **and** `interact_clicks: true` are set (same rule as the built-in path). Click behavior:
|
|
636
|
+
|
|
637
|
+
- `realistic_click: true` — each press adds hand-tremor during the hold and a mouseup drift, so `mousedown` ≠ `mouseup` coordinates (the press is routed through the same `humanClick` the built-in content clicks use).
|
|
638
|
+
- `interact_click_count` — number of clicks per load (default `3`, capped at `20`). The default of 3 matters because some ad SDKs swallow the 1st/2nd click as warmup.
|
|
639
|
+
- **Duration vs. clicks:** realistic clicks take ~600–700ms each, and the bezier movement loop reserves up to **half** of `ghost_cursor_duration` for them. So the default `ghost_cursor_duration: 2000` only fits **~1 click** — raise it to roughly `interact_click_count × 700 + movement` (e.g. `5000`–`8000`) to fit all of them.
|
|
640
|
+
|
|
613
641
|
> **Note:** ghost-cursor is an optional dependency. Install with `npm install ghost-cursor`. If not installed, the scanner falls back to the built-in mouse simulation automatically.
|
|
614
642
|
|
|
615
643
|
#### E-commerce Site Scanning
|
package/eslint.config.mjs
CHANGED
|
@@ -2,5 +2,17 @@ import globals from "globals";
|
|
|
2
2
|
import { defineConfig } from "eslint/config";
|
|
3
3
|
|
|
4
4
|
export default defineConfig([
|
|
5
|
-
{
|
|
5
|
+
{
|
|
6
|
+
files: ["**/*.{js,mjs,cjs}"],
|
|
7
|
+
// Node globals (require/module/process/Buffer/...) plus browser globals
|
|
8
|
+
// (document/window/navigator) — the latter are referenced inside
|
|
9
|
+
// page.evaluate() callbacks that eslint parses as part of the file.
|
|
10
|
+
languageOptions: { globals: { ...globals.node, ...globals.browser } },
|
|
11
|
+
// Catch undefined-variable references statically. node --check only
|
|
12
|
+
// validates syntax, so an orphaned identifier (e.g. a const that was
|
|
13
|
+
// removed while a usage remained) passes parsing but throws
|
|
14
|
+
// ReferenceError at runtime only when that branch executes. no-undef
|
|
15
|
+
// turns that whole class into a build-time failure.
|
|
16
|
+
rules: { "no-undef": "error" },
|
|
17
|
+
},
|
|
6
18
|
]);
|
package/lib/browserhealth.js
CHANGED
|
@@ -7,12 +7,11 @@ const { formatLogMessage, messageColors } = require('./colorize');
|
|
|
7
7
|
const IS_PAGE_FROM_PREVIOUS_SCAN_TAG = messageColors.processing('[isPageFromPreviousScan]');
|
|
8
8
|
const REALTIME_CLEANUP_TAG = messageColors.processing('[realtime_cleanup]');
|
|
9
9
|
const GROUP_WINDOW_CLEANUP_TAG = messageColors.processing('[group_window_cleanup]');
|
|
10
|
-
const {
|
|
10
|
+
const { execFile } = require('child_process');
|
|
11
11
|
|
|
12
12
|
// Window cleanup delay constant
|
|
13
13
|
const WINDOW_CLEANUP_DELAY_MS = 15000;
|
|
14
14
|
// window_clean REALTIME
|
|
15
|
-
const REALTIME_CLEANUP_BUFFER_MS = 25000; // Additional buffer time after site delay (increased for Cloudflare)
|
|
16
15
|
const REALTIME_CLEANUP_THRESHOLD = 12; // Default number of pages to keep
|
|
17
16
|
const REALTIME_CLEANUP_MIN_PAGES = 6; // Minimum pages before cleanup kicks in
|
|
18
17
|
|
|
@@ -380,7 +379,30 @@ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIM
|
|
|
380
379
|
|
|
381
380
|
// Use the provided total delay (already includes appropriate buffer)
|
|
382
381
|
const cleanupDelay = totalDelay;
|
|
383
|
-
|
|
382
|
+
|
|
383
|
+
// Pre-wait short-circuit. The only pages this pass can ever close are popups
|
|
384
|
+
// (untracked) and idle pages — active main pages are protected by
|
|
385
|
+
// isPageSafeToClose. When concurrency exceeds the threshold the page count is
|
|
386
|
+
// dominated by active main pages, so without this we'd wait the full
|
|
387
|
+
// cleanupDelay and then close nothing (e.g. max_concurrent 30 vs threshold 8
|
|
388
|
+
// = a ~36s no-op on every task). If nothing is even a candidate, skip the
|
|
389
|
+
// wait. A main task that finishes during the skipped wait closes its OWN page,
|
|
390
|
+
// so realtime cleanup never needed to wait for it.
|
|
391
|
+
const hasCloseCandidate = quickPages.some(p => {
|
|
392
|
+
if (p.isClosed()) return false;
|
|
393
|
+
const usage = pageUsageTracker.get(p);
|
|
394
|
+
return !usage || !usage.isProcessing; // untracked popup, or a tracked-idle page
|
|
395
|
+
});
|
|
396
|
+
if (!hasCloseCandidate) {
|
|
397
|
+
if (forceDebug) {
|
|
398
|
+
console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} ${quickPages.length} pages but all actively processing — skipping ${cleanupDelay}ms wait (nothing closeable)`));
|
|
399
|
+
}
|
|
400
|
+
result.success = true;
|
|
401
|
+
result.totalPages = quickPages.length;
|
|
402
|
+
result.reason = 'all_active';
|
|
403
|
+
return result;
|
|
404
|
+
}
|
|
405
|
+
|
|
384
406
|
if (forceDebug) {
|
|
385
407
|
console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Waiting ${cleanupDelay}ms before cleanup (threshold: ${threshold})`));
|
|
386
408
|
}
|
|
@@ -607,16 +629,6 @@ function untrackPage(page) {
|
|
|
607
629
|
pageUsageTracker.delete(page);
|
|
608
630
|
}
|
|
609
631
|
|
|
610
|
-
/**
|
|
611
|
-
* No-op since the trackers were migrated to WeakMap — GC reclaims dead-page
|
|
612
|
-
* entries automatically when Puppeteer drops its internal references. Kept
|
|
613
|
-
* exported so the ~7 callers in nwss.js continue to compile; safe to delete
|
|
614
|
-
* entirely once those callsites are scrubbed.
|
|
615
|
-
*/
|
|
616
|
-
function purgeStaleTrackers() {
|
|
617
|
-
// intentionally empty
|
|
618
|
-
}
|
|
619
|
-
|
|
620
632
|
/**
|
|
621
633
|
* Quick browser responsiveness test for use during page setup
|
|
622
634
|
* Designed to catch browser degradation between operations
|
|
@@ -637,82 +649,6 @@ async function isQuicklyResponsive(browserInstance, timeout = 3000) {
|
|
|
637
649
|
}
|
|
638
650
|
}
|
|
639
651
|
|
|
640
|
-
/**
|
|
641
|
-
* Tests if browser can handle network operations (like Network.enable)
|
|
642
|
-
* Creates a test page and attempts basic network setup
|
|
643
|
-
* @param {import('puppeteer').Browser} browserInstance - Puppeteer browser instance
|
|
644
|
-
* @param {number} timeout - Timeout in milliseconds (default: 10000)
|
|
645
|
-
* @returns {Promise<object>} Network capability test result
|
|
646
|
-
*/
|
|
647
|
-
async function testNetworkCapability(browserInstance, timeout = 10000) {
|
|
648
|
-
const result = {
|
|
649
|
-
capable: false,
|
|
650
|
-
error: null,
|
|
651
|
-
responseTime: 0
|
|
652
|
-
};
|
|
653
|
-
|
|
654
|
-
const startTime = Date.now();
|
|
655
|
-
let testPage = null;
|
|
656
|
-
// Hoisted so the catch can attach an orphan-close chain. Promise.race
|
|
657
|
-
// cannot cancel browser.newPage() — if the race times out, the underlying
|
|
658
|
-
// call may still resolve to a real Page tab nothing references. Same
|
|
659
|
-
// pattern as cdp.js (commit 0772ccd) and clear_sitedata.js (commit 780b443).
|
|
660
|
-
let testPagePromise = null;
|
|
661
|
-
|
|
662
|
-
try {
|
|
663
|
-
// Create test page
|
|
664
|
-
testPagePromise = browserInstance.newPage();
|
|
665
|
-
testPage = await raceWithTimeout(
|
|
666
|
-
testPagePromise,
|
|
667
|
-
timeout,
|
|
668
|
-
'Test page creation timeout'
|
|
669
|
-
);
|
|
670
|
-
|
|
671
|
-
// Test network operations (the critical operation that's failing)
|
|
672
|
-
await raceWithTimeout(
|
|
673
|
-
testPage.setRequestInterception(true),
|
|
674
|
-
timeout,
|
|
675
|
-
'Network.enable test timeout'
|
|
676
|
-
);
|
|
677
|
-
|
|
678
|
-
// Turn off interception. Symmetric to the enable above — Network.disable
|
|
679
|
-
// can hang for the same CDP reasons, so it needs the same watchdog.
|
|
680
|
-
await raceWithTimeout(
|
|
681
|
-
testPage.setRequestInterception(false),
|
|
682
|
-
timeout,
|
|
683
|
-
'Network.disable test timeout'
|
|
684
|
-
);
|
|
685
|
-
result.capable = true;
|
|
686
|
-
result.responseTime = Date.now() - startTime;
|
|
687
|
-
|
|
688
|
-
} catch (error) {
|
|
689
|
-
// Orphan cleanup: if testPage is null but newPage() was started, the
|
|
690
|
-
// race timed out before assignment. Close the orphan when it arrives.
|
|
691
|
-
if (!testPage && testPagePromise) {
|
|
692
|
-
testPagePromise.then(p => p.close().catch(() => {})).catch(() => {});
|
|
693
|
-
}
|
|
694
|
-
result.error = error.message;
|
|
695
|
-
result.responseTime = Date.now() - startTime;
|
|
696
|
-
|
|
697
|
-
// Classify the error type
|
|
698
|
-
if (error.message.includes('Network.enable') ||
|
|
699
|
-
error.message.includes('timed out') ||
|
|
700
|
-
error.message.includes('Protocol error')) {
|
|
701
|
-
result.error = `Network capability test failed: ${error.message}`;
|
|
702
|
-
}
|
|
703
|
-
} finally {
|
|
704
|
-
if (testPage && !testPage.isClosed()) {
|
|
705
|
-
try {
|
|
706
|
-
await testPage.close();
|
|
707
|
-
} catch (closeErr) {
|
|
708
|
-
/* ignore cleanup errors */
|
|
709
|
-
}
|
|
710
|
-
}
|
|
711
|
-
}
|
|
712
|
-
|
|
713
|
-
return result;
|
|
714
|
-
}
|
|
715
|
-
|
|
716
652
|
/**
|
|
717
653
|
* Checks if browser instance is still responsive
|
|
718
654
|
* @param {import('puppeteer').Browser} browserInstance - Puppeteer browser instance
|
|
@@ -758,8 +694,8 @@ async function checkBrowserHealth(browserInstance, timeout = 8000) {
|
|
|
758
694
|
|
|
759
695
|
// Test 4: Create a single test page to verify both browser functionality AND network capability
|
|
760
696
|
let testPage = null;
|
|
761
|
-
// Same orphan-cleanup pattern as
|
|
762
|
-
//
|
|
697
|
+
// Same orphan-cleanup pattern as cdp.js + clear_sitedata.js.
|
|
698
|
+
// Promise.race can't cancel newPage() — if the race
|
|
763
699
|
// times out the underlying call may still produce a Page tab nothing
|
|
764
700
|
// references → leaked tab.
|
|
765
701
|
let testPagePromise = null;
|
|
@@ -1282,7 +1218,6 @@ module.exports = {
|
|
|
1282
1218
|
performGroupWindowCleanup,
|
|
1283
1219
|
performRealtimeWindowCleanup,
|
|
1284
1220
|
trackPageForRealtime,
|
|
1285
|
-
testNetworkCapability,
|
|
1286
1221
|
isQuicklyResponsive,
|
|
1287
1222
|
performHealthAssessment,
|
|
1288
1223
|
monitorBrowserHealth,
|
|
@@ -1290,6 +1225,5 @@ module.exports = {
|
|
|
1290
1225
|
isCriticalProtocolError,
|
|
1291
1226
|
updatePageUsage,
|
|
1292
1227
|
untrackPage,
|
|
1293
|
-
cleanupPageBeforeReload
|
|
1294
|
-
purgeStaleTrackers
|
|
1228
|
+
cleanupPageBeforeReload
|
|
1295
1229
|
};
|
package/lib/dns.js
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DNS pre-check resolver with multi-nameserver rotation.
|
|
3
|
+
*
|
|
4
|
+
* Owns nameserver selection and robust resolution for the scan's DNS
|
|
5
|
+
* pre-check. The default global resolver leads EVERY query with the FIRST
|
|
6
|
+
* nameserver in /etc/resolv.conf, so under scan concurrency one server
|
|
7
|
+
* (typically the ISP resolver) takes the whole c-ares burst and starts
|
|
8
|
+
* answering REFUSED while the other configured servers (e.g. 8.8.8.8/8.8.4.4)
|
|
9
|
+
* sit idle. This module builds one Resolver per nameserver — each leading with
|
|
10
|
+
* a different server, the rest kept as failover order — and round-robins them
|
|
11
|
+
* per resolve attempt so the lead spreads across all servers (and across the
|
|
12
|
+
* retry). A `--dns` override pins/rotates an explicit list instead of
|
|
13
|
+
* resolv.conf.
|
|
14
|
+
*
|
|
15
|
+
* Scope: this affects the pre-check resolver only. Chrome's navigation DNS
|
|
16
|
+
* (OS resolver) and nettools' dig/whois are separate paths and unaffected.
|
|
17
|
+
*/
|
|
18
|
+
const net = require('node:net');
|
|
19
|
+
const dnsPromises = require('node:dns/promises');
|
|
20
|
+
const { getServers: getSystemDnsServers } = require('node:dns');
|
|
21
|
+
const { Resolver: DnsPromiseResolver } = require('node:dns/promises');
|
|
22
|
+
const { formatLogMessage } = require('./colorize');
|
|
23
|
+
|
|
24
|
+
// c-ares codes that mean "resolver problem" (retry-worthy / fail-open), not
|
|
25
|
+
// "the host does not exist".
|
|
26
|
+
const DNS_TRANSIENT_ERRORS = new Set(['ETIMEOUT', 'ESERVFAIL', 'EREFUSED', 'ECONNREFUSED']);
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* True only for a definitive "host does not exist / has no address" answer —
|
|
30
|
+
* the only case that justifies skipping a URL in the pre-check. Everything
|
|
31
|
+
* else (EREFUSED, ESERVFAIL, ETIMEOUT, ECONNREFUSED, timeout) is a resolver
|
|
32
|
+
* problem the caller should fail open on.
|
|
33
|
+
* @param {string} code
|
|
34
|
+
* @returns {boolean}
|
|
35
|
+
*/
|
|
36
|
+
function isNonExistenceError(code) {
|
|
37
|
+
return code === 'ENOTFOUND' || code === 'ENODATA';
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Accept a bare IPv4/IPv6 address, or an address with a port in the exact form
|
|
41
|
+
// Resolver.setServers() understands: `ipv4:port` or `[ipv6]:port`.
|
|
42
|
+
function isResolverSpec(s) {
|
|
43
|
+
if (net.isIP(s)) return true;
|
|
44
|
+
const bracketed = s.match(/^\[([0-9a-fA-F:]+)\](?::\d{1,5})?$/);
|
|
45
|
+
if (bracketed) return net.isIP(bracketed[1]) === 6;
|
|
46
|
+
const v4port = s.match(/^(\d{1,3}(?:\.\d{1,3}){3}):\d{1,5}$/);
|
|
47
|
+
if (v4port) return net.isIP(v4port[1]) === 4;
|
|
48
|
+
return false;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Parse + validate a `--dns` / config value into a clean, de-duplicated server
|
|
53
|
+
* list. Accepts a comma-separated string or an array. Each entry may be a bare
|
|
54
|
+
* IPv4/IPv6 address or an address with a port (`8.8.8.8:5353`,
|
|
55
|
+
* `[2001:db8::1]:5353`) — the form setServers() accepts. Invalid entries are
|
|
56
|
+
* warned and dropped; duplicates are collapsed so the rotation stays even.
|
|
57
|
+
* @param {string|string[]|undefined} raw
|
|
58
|
+
* @returns {string[]} validated server specs (possibly empty)
|
|
59
|
+
*/
|
|
60
|
+
function parseDnsServers(raw) {
|
|
61
|
+
if (!raw) return [];
|
|
62
|
+
const parts = (Array.isArray(raw) ? raw : String(raw).split(','))
|
|
63
|
+
.map(s => String(s).trim())
|
|
64
|
+
.filter(Boolean);
|
|
65
|
+
const valid = [];
|
|
66
|
+
const seen = new Set();
|
|
67
|
+
for (const p of parts) {
|
|
68
|
+
if (!isResolverSpec(p)) {
|
|
69
|
+
console.warn(`⚠ --dns: ignoring invalid server "${p}" (expected IPv4/IPv6, optionally with :port)`);
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
if (!seen.has(p)) { seen.add(p); valid.push(p); }
|
|
73
|
+
}
|
|
74
|
+
return valid;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Build a rotating pre-check resolver.
|
|
79
|
+
* @param {object} [opts]
|
|
80
|
+
* @param {string[]} [opts.servers] - explicit servers (from --dns). When empty,
|
|
81
|
+
* the system resolv.conf servers are used.
|
|
82
|
+
* @param {boolean} [opts.forceDebug] - emit a debug line on the retry path.
|
|
83
|
+
* @returns {{ resolveHost: (hostname:string, timeoutMs:number)=>Promise<void>,
|
|
84
|
+
* servers: string[], rotates: boolean, pinned: boolean }}
|
|
85
|
+
* resolveHost resolves on success and rejects with the final error
|
|
86
|
+
* (err.code intact) on failure.
|
|
87
|
+
*/
|
|
88
|
+
function createRotatingResolver(opts = {}) {
|
|
89
|
+
const forceDebug = !!opts.forceDebug;
|
|
90
|
+
const override = Array.isArray(opts.servers) && opts.servers.length > 0 ? opts.servers : null;
|
|
91
|
+
|
|
92
|
+
let systemServers = [];
|
|
93
|
+
try { systemServers = getSystemDnsServers(); } catch { systemServers = []; }
|
|
94
|
+
const servers = override || systemServers;
|
|
95
|
+
|
|
96
|
+
// Pin/rotate an explicit --dns list (even a single server — never fall back
|
|
97
|
+
// to the OS resolver in that case). For resolv.conf, only build a pool when
|
|
98
|
+
// there is more than one server to rotate; otherwise use the global API
|
|
99
|
+
// (which already reads resolv.conf).
|
|
100
|
+
const shouldPool = override ? servers.length >= 1 : servers.length > 1;
|
|
101
|
+
let pool = null;
|
|
102
|
+
if (shouldPool) {
|
|
103
|
+
pool = servers.map((_, i) => {
|
|
104
|
+
const r = new DnsPromiseResolver();
|
|
105
|
+
// setServers accepts exactly what we hold here: getServers()'s own output
|
|
106
|
+
// (system path) or net-validated specs incl. ip:port (override path).
|
|
107
|
+
// Keep the resolver's default servers if an entry is somehow rejected.
|
|
108
|
+
try { r.setServers([...servers.slice(i), ...servers.slice(0, i)]); } catch { /* keep default */ }
|
|
109
|
+
return r;
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
let cursor = 0;
|
|
114
|
+
// Resolver for the next attempt: rotated when a pool exists, else the global
|
|
115
|
+
// promises API. `cursor++` is a synchronous single-threaded increment, so even
|
|
116
|
+
// under heavy concurrency every caller gets a distinct slot and the lead
|
|
117
|
+
// distribution stays exactly even (no locking needed).
|
|
118
|
+
const nextResolver = () => (pool ? pool[cursor++ % pool.length] : dnsPromises);
|
|
119
|
+
|
|
120
|
+
// One resolution attempt: rotate the lead server, resolve4 first, and on
|
|
121
|
+
// no-IPv4 (ENODATA/ENOTFOUND) fall back to resolve6 so IPv6-only hosts aren't
|
|
122
|
+
// wrongly skipped. Any OTHER code propagates unchanged so the caller sees the
|
|
123
|
+
// real resolver error. A timeout is kept as a safety net — with c-ares off
|
|
124
|
+
// the libuv threadpool it should rarely fire.
|
|
125
|
+
async function attempt(hostname, timeoutMs) {
|
|
126
|
+
const resolver = nextResolver();
|
|
127
|
+
let timer;
|
|
128
|
+
try {
|
|
129
|
+
const timeoutP = new Promise((_, reject) => {
|
|
130
|
+
timer = setTimeout(() => reject(new Error('DNS timeout')), timeoutMs);
|
|
131
|
+
});
|
|
132
|
+
const chain = resolver.resolve4(hostname).catch(err => {
|
|
133
|
+
if (err && (err.code === 'ENODATA' || err.code === 'ENOTFOUND')) {
|
|
134
|
+
return resolver.resolve6(hostname);
|
|
135
|
+
}
|
|
136
|
+
throw err;
|
|
137
|
+
});
|
|
138
|
+
await Promise.race([chain, timeoutP]);
|
|
139
|
+
} finally {
|
|
140
|
+
if (timer) clearTimeout(timer);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Resolve a hostname, rotating the lead server per attempt and retrying once
|
|
146
|
+
* on a transient/resolver error (so the retry leads with the next server —
|
|
147
|
+
* if one REFUSES, the retry hits another).
|
|
148
|
+
*/
|
|
149
|
+
async function resolveHost(hostname, timeoutMs) {
|
|
150
|
+
try {
|
|
151
|
+
await attempt(hostname, timeoutMs);
|
|
152
|
+
} catch (firstErr) {
|
|
153
|
+
const code = firstErr && firstErr.code;
|
|
154
|
+
if (DNS_TRANSIENT_ERRORS.has(code) || (firstErr && firstErr.message === 'DNS timeout')) {
|
|
155
|
+
if (forceDebug) console.log(formatLogMessage('debug', `DNS pre-check transient (${code || 'timeout'}) for ${hostname}, retrying once`));
|
|
156
|
+
await attempt(hostname, timeoutMs);
|
|
157
|
+
} else {
|
|
158
|
+
throw firstErr;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return { resolveHost, servers, rotates: !!pool, pinned: !!override };
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Circuit breaker for the DNS pre-check. During a resolver-refusal storm the
|
|
168
|
+
* pre-check is worthless (every host fails open and proceeds anyway) and
|
|
169
|
+
* actively harmful (it piles ~2× the queries — with the retry — onto an
|
|
170
|
+
* already-refusing resolver). This trips when resolver errors dominate a recent
|
|
171
|
+
* window of attempts and suspends pre-checking for a cooldown so the resolver
|
|
172
|
+
* gets breathing room; sites still load (a suspended pre-check just proceeds to
|
|
173
|
+
* navigation, exactly like a single fail-open). NXDOMAIN and success count as
|
|
174
|
+
* HEALTHY (the resolver answered) — only resolver errors (EREFUSED / ESERVFAIL
|
|
175
|
+
* / ETIMEOUT / ECONNREFUSED / timeout) count against it.
|
|
176
|
+
*
|
|
177
|
+
* @param {object} [opts]
|
|
178
|
+
* @param {number} [opts.window=20] attempts kept in the rolling window
|
|
179
|
+
* @param {number} [opts.threshold=10] resolver-errors in the window to trip
|
|
180
|
+
* @param {number} [opts.cooldownMs=30000] how long to stay suspended once tripped
|
|
181
|
+
* @param {boolean} [opts.forceDebug]
|
|
182
|
+
* @param {function} [opts.now] clock injection (tests); defaults to Date.now
|
|
183
|
+
* @returns {{ record:(isResolverError:boolean)=>void, isTripped:()=>boolean,
|
|
184
|
+
* stats:()=>{tripped:boolean,errorCount:number,windowFill:number,trips:number} }}
|
|
185
|
+
*/
|
|
186
|
+
function createDnsCircuitBreaker(opts = {}) {
|
|
187
|
+
const windowSize = opts.window || 20;
|
|
188
|
+
const threshold = opts.threshold || 10;
|
|
189
|
+
const cooldownMs = opts.cooldownMs != null ? opts.cooldownMs : 30000;
|
|
190
|
+
const forceDebug = !!opts.forceDebug;
|
|
191
|
+
const now = opts.now || Date.now;
|
|
192
|
+
|
|
193
|
+
const recent = []; // booleans, true = resolver error
|
|
194
|
+
let errorCount = 0;
|
|
195
|
+
let openUntil = 0; // suspended while now() < openUntil
|
|
196
|
+
let trips = 0;
|
|
197
|
+
|
|
198
|
+
// Feed one resolve outcome. Only ever called while closed (a suspended
|
|
199
|
+
// pre-check skips the resolve, so no outcome is produced).
|
|
200
|
+
function record(isResolverError) {
|
|
201
|
+
recent.push(!!isResolverError);
|
|
202
|
+
if (isResolverError) errorCount++;
|
|
203
|
+
if (recent.length > windowSize && recent.shift()) errorCount--;
|
|
204
|
+
|
|
205
|
+
if (now() >= openUntil && errorCount >= threshold) {
|
|
206
|
+
openUntil = now() + cooldownMs;
|
|
207
|
+
trips++;
|
|
208
|
+
console.log(formatLogMessage('warn', `[dns-precheck] resolver errors ${errorCount}/${recent.length} — suspending DNS pre-check ${Math.round(cooldownMs / 1000)}s (sites still load; backing off the resolver)`));
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// True while suspended. On the first call after the cooldown elapses, resume
|
|
213
|
+
// with a clean window so the storm is re-measured fresh rather than re-tripping
|
|
214
|
+
// on stale errors.
|
|
215
|
+
function isTripped() {
|
|
216
|
+
if (now() < openUntil) return true;
|
|
217
|
+
if (openUntil !== 0) {
|
|
218
|
+
openUntil = 0;
|
|
219
|
+
recent.length = 0;
|
|
220
|
+
errorCount = 0;
|
|
221
|
+
if (forceDebug) console.log(formatLogMessage('debug', '[dns-precheck] cooldown elapsed — resuming DNS pre-check'));
|
|
222
|
+
}
|
|
223
|
+
return false;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
return {
|
|
227
|
+
record,
|
|
228
|
+
isTripped,
|
|
229
|
+
stats: () => ({ tripped: now() < openUntil, errorCount, windowFill: recent.length, trips }),
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
module.exports = {
|
|
234
|
+
createRotatingResolver,
|
|
235
|
+
createDnsCircuitBreaker,
|
|
236
|
+
parseDnsServers,
|
|
237
|
+
isNonExistenceError,
|
|
238
|
+
};
|