agent-browser-stealth 0.14.0-fork.4 → 0.14.0-fork.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -102
- package/bin/agent-browser-darwin-arm64 +0 -0
- package/bin/agent-browser-darwin-x64 +0 -0
- package/bin/agent-browser-linux-arm64 +0 -0
- package/bin/agent-browser-linux-x64 +0 -0
- package/bin/agent-browser-win32-x64.exe +0 -0
- package/dist/actions.d.ts +5 -1
- package/dist/actions.d.ts.map +1 -1
- package/dist/actions.js +95 -46
- package/dist/actions.js.map +1 -1
- package/dist/protocol.d.ts.map +1 -1
- package/dist/protocol.js +1 -0
- package/dist/protocol.js.map +1 -1
- package/dist/types.d.ts +9 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/skills/agent-browser/SKILL.md +29 -14
package/README.md
CHANGED
|
@@ -1,163 +1,199 @@
|
|
|
1
1
|
# agent-browser-stealth
|
|
2
2
|
|
|
3
|
-
Stealth-
|
|
3
|
+
Stealth-first fork of `agent-browser` for production browser automation under anti-bot pressure.
|
|
4
4
|
|
|
5
|
-
This
|
|
5
|
+
This README focuses on stealth architecture and principles. For full command coverage inherited from upstream, use:
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
- upstream docs: <https://github.com/vercel-labs/agent-browser>
|
|
8
|
+
- local help: `agent-browser --help`
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
- Fork value: stronger anti-bot defaults and operational policies
|
|
11
|
-
- Default mindset: no extra stealth toggle, stealth is always on
|
|
10
|
+
## What This Fork Optimizes
|
|
12
11
|
|
|
13
|
-
|
|
12
|
+
- Stealth is always on (legacy `launch.stealth` is accepted but ignored).
|
|
13
|
+
- Fingerprint surfaces are patched at multiple layers (launch args, CDP overrides, init scripts).
|
|
14
|
+
- Behavioral signals are humanized (typing cadence, cursor path, pacing, retry backoff).
|
|
15
|
+
- Region signals are auto-aligned (locale/timezone/Accept-Language) to reduce mismatch risk.
|
|
16
|
+
- Verification/captcha handling is policy-driven (`--risk-mode off|warn|block`).
|
|
14
17
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
```bash
|
|
18
|
-
npm install -g agent-browser-stealth
|
|
19
|
-
agent-browser install
|
|
20
|
-
```
|
|
21
|
-
|
|
22
|
-
### Quick try with npx
|
|
23
|
-
|
|
24
|
-
```bash
|
|
25
|
-
npx agent-browser-stealth install
|
|
26
|
-
npx agent-browser-stealth open example.com
|
|
27
|
-
```
|
|
18
|
+
## Quick Start
|
|
28
19
|
|
|
29
|
-
###
|
|
20
|
+
### Install
|
|
30
21
|
|
|
31
22
|
```bash
|
|
32
|
-
|
|
33
|
-
cd agent-browser
|
|
34
|
-
pnpm install
|
|
35
|
-
pnpm build
|
|
36
|
-
pnpm build:native
|
|
37
|
-
pnpm link --global
|
|
23
|
+
npm install -g agent-browser-stealth
|
|
38
24
|
agent-browser install
|
|
39
25
|
```
|
|
40
26
|
|
|
41
|
-
|
|
27
|
+
### Minimal Usage
|
|
42
28
|
|
|
43
29
|
```bash
|
|
44
30
|
agent-browser open https://example.com
|
|
45
31
|
agent-browser snapshot -i
|
|
46
32
|
agent-browser click @e2
|
|
47
|
-
agent-browser fill @e3 "test@example.com"
|
|
48
|
-
agent-browser screenshot page.png
|
|
49
33
|
```
|
|
50
34
|
|
|
51
|
-
##
|
|
35
|
+
## Stealth Architecture
|
|
36
|
+
|
|
37
|
+
```mermaid
|
|
38
|
+
flowchart TD
|
|
39
|
+
A["Command Input"] --> B["Stealth Policy Resolver"]
|
|
40
|
+
B --> C["Connection Mode Detection"]
|
|
41
|
+
C --> D["Launch Layer: Chromium Args"]
|
|
42
|
+
C --> E["CDP Layer: UA + Metadata Override"]
|
|
43
|
+
C --> F["Context Layer: Init Script Patches"]
|
|
44
|
+
D --> G["Behavior Layer: Humanized Interaction"]
|
|
45
|
+
E --> G
|
|
46
|
+
F --> G
|
|
47
|
+
G --> H["Risk Layer: Verification Detection and Handling"]
|
|
48
|
+
H --> I["Response with warnings and riskSignals"]
|
|
49
|
+
```
|
|
52
50
|
|
|
53
|
-
|
|
51
|
+
### Policy by Connection Mode
|
|
54
52
|
|
|
55
|
-
|
|
53
|
+
| Mode | Stealth Capabilities | Notes |
|
|
54
|
+
|---|---|---|
|
|
55
|
+
| Local Chromium launch | Chromium launch args + CDP UA override + context init scripts | Most complete stack |
|
|
56
|
+
| Existing browser via CDP | CDP UA override + context init scripts | No local Chromium arg injection |
|
|
57
|
+
| Cloud provider (browserbase/browseruse) | Context init scripts | Remote browser runtime controls launch layer |
|
|
58
|
+
| Kernel provider | Context init scripts + provider-managed stealth | Provider-side stealth may also apply |
|
|
56
59
|
|
|
57
|
-
|
|
58
|
-
- Adds Chromium launch args to reduce automation fingerprints
|
|
59
|
-
- Rewrites headless UA markers (`HeadlessChrome`)
|
|
60
|
-
- Patches high-signal surfaces such as:
|
|
61
|
-
- `navigator.plugins` / `navigator.mimeTypes`
|
|
62
|
-
- `window.chrome.runtime`
|
|
63
|
-
- WebGL vendor/renderer exposure
|
|
64
|
-
- permissions/language/media/device related probes
|
|
65
|
-
- Applies both context init scripts and CDP-level UA overrides
|
|
66
|
-
- Preserves explicit custom UA from `--user-agent` or `launch({ userAgent })`
|
|
60
|
+
## Principle 1: Always-On Stealth with Explicit Boundaries
|
|
67
61
|
|
|
68
|
-
|
|
62
|
+
- Stealth defaults to enabled and does not depend on a runtime toggle.
|
|
63
|
+
- Project policy forbids:
|
|
64
|
+
- `--profile` / `AGENT_BROWSER_PROFILE`
|
|
65
|
+
- `--channel` / `AGENT_BROWSER_CHANNEL`
|
|
66
|
+
- Default CLI policy expects an existing browser on CDP `localhost:9333` unless explicit connection options are provided.
|
|
69
67
|
|
|
70
|
-
|
|
71
|
-
- Random wait ranges (`wait 2000-5000`)
|
|
72
|
-
- Bezier-curve mouse movement before click actions
|
|
73
|
-
- Randomized navigation pacing
|
|
68
|
+
## Principle 2: Multi-Layer Fingerprint Hardening
|
|
74
69
|
|
|
75
|
-
###
|
|
70
|
+
### 2.1 Launch Layer (Local Chromium)
|
|
76
71
|
|
|
77
|
-
|
|
78
|
-
- Reduces locale-timezone mismatch risk on region-sensitive sites
|
|
72
|
+
Injected Chromium args:
|
|
79
73
|
|
|
80
|
-
|
|
74
|
+
- `--disable-blink-features=AutomationControlled`
|
|
75
|
+
- `--use-gl=angle`
|
|
76
|
+
- `--use-angle=default`
|
|
81
77
|
|
|
82
|
-
|
|
83
|
-
- Retries navigation with randomized backoff when triggered
|
|
78
|
+
If no custom UA is set, the runtime UA is normalized to remove `HeadlessChrome` tokens.
|
|
84
79
|
|
|
85
|
-
|
|
80
|
+
### 2.2 CDP Layer (Browser/Page Targets)
|
|
86
81
|
|
|
87
|
-
|
|
82
|
+
- Uses `Emulation.setUserAgentOverride` to align:
|
|
83
|
+
- `userAgent`
|
|
84
|
+
- `acceptLanguage`
|
|
85
|
+
- `userAgentMetadata` brands and versions
|
|
86
|
+
- Applies overrides for existing/new targets, including worker-relevant contexts.
|
|
87
|
+
- Forces opaque white background (`Emulation.setDefaultBackgroundColorOverride`) to avoid headless transparency fingerprints.
|
|
88
88
|
|
|
89
|
-
|
|
90
|
-
agent-browser type @e2 "iphone" --delay 120
|
|
91
|
-
agent-browser keyboard type "iphone" --delay 120
|
|
92
|
-
```
|
|
89
|
+
### 2.3 Context Init-Script Layer (Patch Inventory)
|
|
93
90
|
|
|
94
|
-
|
|
91
|
+
The init script patch set is injected before page scripts and currently includes:
|
|
95
92
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
93
|
+
1. `navigator.webdriver` removal (including prototype-level cleanup).
|
|
94
|
+
2. CSS webdriver heuristic neutralization (`CSS.supports('border-end-end-radius: initial')` probe).
|
|
95
|
+
3. `window.chrome.runtime` bootstrap for missing runtime surfaces.
|
|
96
|
+
4. Locale/language normalization (`navigator.language`, `navigator.languages`).
|
|
97
|
+
5. Realistic `navigator.plugins` and `navigator.mimeTypes`.
|
|
98
|
+
6. `navigator.permissions.query` normalization for notifications.
|
|
99
|
+
7. WebGL vendor/renderer masking when SwiftShader indicators are present.
|
|
100
|
+
8. `cdc_` property cleanup on document/documentElement.
|
|
101
|
+
9. Window/screen dimension normalization (`outerWidth/outerHeight/screenX/screenY`).
|
|
102
|
+
10. Screen availability patching (`availWidth/availHeight`).
|
|
103
|
+
11. Hardware concurrency stabilization.
|
|
104
|
+
12. Notification permission consistency.
|
|
105
|
+
13. Active text color heuristic patching.
|
|
106
|
+
14. `navigator.connection` normalization.
|
|
107
|
+
15. Worker network signal normalization (`downlinkMax`).
|
|
108
|
+
16. `prefers-color-scheme` light-mode heuristic neutralization.
|
|
109
|
+
17. `navigator.share` exposure.
|
|
110
|
+
18. `navigator.contacts` exposure.
|
|
111
|
+
19. `contentIndex` exposure.
|
|
112
|
+
20. `navigator.pdfViewerEnabled` normalization.
|
|
113
|
+
21. Media devices surface normalization.
|
|
114
|
+
22. `navigator.userAgent` cleanup (strip `HeadlessChrome`).
|
|
115
|
+
23. `navigator.userAgentData` brand cleanup.
|
|
116
|
+
24. `performance.memory` stabilization.
|
|
117
|
+
25. Default background color patching at script level.
|
|
100
118
|
|
|
101
|
-
##
|
|
119
|
+
## Principle 3: Behavioral Humanization
|
|
102
120
|
|
|
103
|
-
|
|
121
|
+
- Navigation pacing jitter before `goto` (short randomized delay).
|
|
122
|
+
- Typing jitter for `type --delay` and `keyboard type --delay`:
|
|
123
|
+
- per-character randomized delay around the requested base delay (about ±40%).
|
|
124
|
+
- Click path humanization:
|
|
125
|
+
- cursor moves on a Bezier-like curve before click.
|
|
126
|
+
- Wait supports random ranges (`wait min-max`) for non-uniform timing.
|
|
104
127
|
|
|
105
|
-
|
|
106
|
-
- [CreepJS](https://abrahamjuliot.github.io/creepjs/)
|
|
107
|
-
- [areyouheadless](https://arh.antoinevastel.com/bots/areyouheadless)
|
|
108
|
-
- [detect-headless](https://infosimples.github.io/detect-headless)
|
|
128
|
+
## Principle 4: Region Signal Alignment
|
|
109
129
|
|
|
110
|
-
|
|
130
|
+
Before navigation, the runtime derives region hints from target URL TLD and aligns:
|
|
111
131
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
132
|
+
- locale
|
|
133
|
+
- timezone
|
|
134
|
+
- `Accept-Language`
|
|
135
|
+
|
|
136
|
+
Examples of built-in mappings include `tw`, `jp`, `kr`, `sg`, `de`, `fr`, `uk`, `in`, `au`.
|
|
115
137
|
|
|
116
|
-
|
|
138
|
+
Manual overrides are supported:
|
|
117
139
|
|
|
118
|
-
|
|
140
|
+
- `AGENT_BROWSER_LOCALE`
|
|
141
|
+
- `AGENT_BROWSER_TIMEZONE` (or `TZ`)
|
|
119
142
|
|
|
120
|
-
|
|
121
|
-
- Local help: `agent-browser --help`
|
|
143
|
+
## Principle 5: Verification-Aware Risk Control
|
|
122
144
|
|
|
123
|
-
|
|
145
|
+
When a navigation lands on verification/captcha pages, structured risk signals are generated from URL/title evidence.
|
|
124
146
|
|
|
125
|
-
|
|
147
|
+
`riskSignals` include:
|
|
126
148
|
|
|
127
|
-
-
|
|
128
|
-
-
|
|
129
|
-
-
|
|
149
|
+
- `code`
|
|
150
|
+
- `source` (`url` or `title`)
|
|
151
|
+
- `evidence`
|
|
152
|
+
- `confidence`
|
|
130
153
|
|
|
131
|
-
|
|
154
|
+
### Risk Mode
|
|
132
155
|
|
|
133
|
-
-
|
|
134
|
-
-
|
|
135
|
-
-
|
|
136
|
-
- Use npm Trusted Publishing (OIDC)
|
|
156
|
+
- `warn` (default): retry with randomized backoff and return warnings + `riskSignals`.
|
|
157
|
+
- `block`: fail fast once verification/captcha interstitial is detected.
|
|
158
|
+
- `off`: skip detection/retry path.
|
|
137
159
|
|
|
138
|
-
|
|
160
|
+
```bash
|
|
161
|
+
agent-browser --risk-mode warn open https://example.com
|
|
162
|
+
agent-browser --risk-mode block open https://example.com
|
|
163
|
+
AGENT_BROWSER_RISK_MODE=off agent-browser open https://example.com
|
|
164
|
+
```
|
|
139
165
|
|
|
140
|
-
|
|
166
|
+
```mermaid
|
|
167
|
+
flowchart TD
|
|
168
|
+
A["Navigate"] --> B["Collect URL and Title Signals"]
|
|
169
|
+
B --> C{"risk-mode"}
|
|
170
|
+
C -->|off| D["Return Success"]
|
|
171
|
+
C -->|block| E["Return Error with First Signal"]
|
|
172
|
+
C -->|warn| F["Retry up to 2 times"]
|
|
173
|
+
F --> G{"Signals Cleared"}
|
|
174
|
+
G -->|yes| H["Return Success + recovery warning + riskSignals"]
|
|
175
|
+
G -->|no| I["Return Success + warning + riskSignals"]
|
|
176
|
+
```
|
|
141
177
|
|
|
142
|
-
|
|
178
|
+
## Operational Recommendations
|
|
143
179
|
|
|
144
|
-
|
|
180
|
+
- Prefer `--headed` for high-friction targets.
|
|
181
|
+
- Reuse session state with `--session-name` for continuity.
|
|
182
|
+
- Keep locale/timezone consistent with target market.
|
|
183
|
+
- Use `--risk-mode block` in strict pipelines that require explicit operator intervention on verification pages.
|
|
145
184
|
|
|
146
|
-
|
|
185
|
+
## Validation Scripts
|
|
147
186
|
|
|
148
|
-
|
|
187
|
+
Run public detector checks after stealth changes:
|
|
149
188
|
|
|
150
189
|
```bash
|
|
151
|
-
|
|
190
|
+
node scripts/check-sannysoft-webdriver.js --binary ./cli/target/release/agent-browser
|
|
191
|
+
node scripts/check-creepjs-headless.js --binary ./cli/target/release/agent-browser
|
|
152
192
|
```
|
|
153
193
|
|
|
154
|
-
|
|
194
|
+
## Upstream Compatibility
|
|
155
195
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
```bash
|
|
159
|
-
SKIP_CLAWHUB_SYNC=1 git push
|
|
160
|
-
```
|
|
196
|
+
This fork intentionally keeps command workflows close to upstream while concentrating custom behavior in stealth, policy, and anti-detection handling.
|
|
161
197
|
|
|
162
198
|
## License
|
|
163
199
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/dist/actions.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { BrowserManager, ScreencastFrame } from './browser.js';
|
|
2
|
-
import type { Command, Response } from './types.js';
|
|
2
|
+
import type { Command, Response, RiskSignal } from './types.js';
|
|
3
3
|
/**
|
|
4
4
|
* Set the callback for screencast frames
|
|
5
5
|
* This is called by the daemon to set up frame streaming
|
|
@@ -14,4 +14,8 @@ export declare function toAIFriendlyError(error: unknown, selector: string): Err
|
|
|
14
14
|
* Execute a command and return a response
|
|
15
15
|
*/
|
|
16
16
|
export declare function executeCommand(command: Command, browser: BrowserManager): Promise<Response>;
|
|
17
|
+
/**
|
|
18
|
+
* Detect verification/captcha interstitials and return structured risk evidence.
|
|
19
|
+
*/
|
|
20
|
+
export declare function detectRiskSignals(url: string, title: string): RiskSignal[];
|
|
17
21
|
//# sourceMappingURL=actions.d.ts.map
|
package/dist/actions.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAUpE,OAAO,KAAK,EACV,OAAO,EACP,QAAQ,
|
|
1
|
+
{"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAUpE,OAAO,KAAK,EACV,OAAO,EACP,QAAQ,EAmIR,UAAU,EACX,MAAM,YAAY,CAAC;AAQpB;;;GAGG;AACH,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,CAAC,CAAC,KAAK,EAAE,eAAe,KAAK,IAAI,CAAC,GAAG,IAAI,GAClD,IAAI,CAEN;AAQD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,GAAG,KAAK,CAqDzE;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC,CAuRjG;AAyGD;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,UAAU,EAAE,CA8C1E"}
|
package/dist/actions.js
CHANGED
|
@@ -359,65 +359,114 @@ async function handleNavigate(command, browser) {
|
|
|
359
359
|
await page.goto(command.url, {
|
|
360
360
|
waitUntil: command.waitUntil ?? 'load',
|
|
361
361
|
});
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
const title = await page.title();
|
|
365
|
-
const captchaDetected = isCaptchaPage(finalUrl, title);
|
|
366
|
-
if (captchaDetected) {
|
|
367
|
-
const maxRetries = 2;
|
|
368
|
-
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
369
|
-
const backoff = 3000 + Math.random() * 4000;
|
|
370
|
-
await page.waitForTimeout(Math.round(backoff));
|
|
371
|
-
await page.goto(command.url, {
|
|
372
|
-
waitUntil: command.waitUntil ?? 'load',
|
|
373
|
-
});
|
|
374
|
-
const retryUrl = page.url();
|
|
375
|
-
const retryTitle = await page.title();
|
|
376
|
-
if (!isCaptchaPage(retryUrl, retryTitle)) {
|
|
377
|
-
return successResponse(command.id, {
|
|
378
|
-
url: retryUrl,
|
|
379
|
-
title: retryTitle,
|
|
380
|
-
});
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
|
-
// All retries exhausted -- return the page as-is with a warning
|
|
362
|
+
const riskMode = command.riskMode ?? 'warn';
|
|
363
|
+
if (riskMode === 'off') {
|
|
384
364
|
return successResponse(command.id, {
|
|
385
365
|
url: page.url(),
|
|
386
366
|
title: await page.title(),
|
|
387
|
-
warning: 'Captcha/verification page detected. Try --headed mode or use --session-name for state persistence.',
|
|
388
367
|
});
|
|
389
368
|
}
|
|
369
|
+
// Detect risk interstitials (captcha/verification) and handle by risk mode.
|
|
370
|
+
const finalUrl = page.url();
|
|
371
|
+
const title = await page.title();
|
|
372
|
+
let encounteredSignals = detectRiskSignals(finalUrl, title);
|
|
373
|
+
if (encounteredSignals.length === 0) {
|
|
374
|
+
return successResponse(command.id, {
|
|
375
|
+
url: finalUrl,
|
|
376
|
+
title,
|
|
377
|
+
});
|
|
378
|
+
}
|
|
379
|
+
if (riskMode === 'block') {
|
|
380
|
+
const first = encounteredSignals[0];
|
|
381
|
+
return errorResponse(command.id, `Navigation blocked by risk-mode=block: ${first.code} (${first.source}="${first.evidence}")`);
|
|
382
|
+
}
|
|
383
|
+
const maxRetries = 2;
|
|
384
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
385
|
+
const backoff = 3000 + Math.random() * 4000;
|
|
386
|
+
await page.waitForTimeout(Math.round(backoff));
|
|
387
|
+
await page.goto(command.url, {
|
|
388
|
+
waitUntil: command.waitUntil ?? 'load',
|
|
389
|
+
});
|
|
390
|
+
const retryUrl = page.url();
|
|
391
|
+
const retryTitle = await page.title();
|
|
392
|
+
const retrySignals = detectRiskSignals(retryUrl, retryTitle);
|
|
393
|
+
if (retrySignals.length === 0) {
|
|
394
|
+
return successResponse(command.id, {
|
|
395
|
+
url: retryUrl,
|
|
396
|
+
title: retryTitle,
|
|
397
|
+
warning: 'Risk interstitial detected and recovered after retry. Review riskSignals for evidence.',
|
|
398
|
+
riskSignals: encounteredSignals,
|
|
399
|
+
});
|
|
400
|
+
}
|
|
401
|
+
encounteredSignals = mergeRiskSignals(encounteredSignals, retrySignals);
|
|
402
|
+
}
|
|
403
|
+
// All retries exhausted -- return the page as-is with a warning and evidence.
|
|
390
404
|
return successResponse(command.id, {
|
|
391
|
-
url:
|
|
392
|
-
title,
|
|
405
|
+
url: page.url(),
|
|
406
|
+
title: await page.title(),
|
|
407
|
+
warning: 'Captcha/verification page detected. Try --headed mode or use --session-name for state persistence.',
|
|
408
|
+
riskSignals: encounteredSignals,
|
|
393
409
|
});
|
|
394
410
|
}
|
|
395
|
-
function
|
|
411
|
+
function mergeRiskSignals(current, next) {
|
|
412
|
+
const merged = new Map();
|
|
413
|
+
for (const signal of [...current, ...next]) {
|
|
414
|
+
const key = `${signal.code}|${signal.source}|${signal.evidence}`;
|
|
415
|
+
if (!merged.has(key) || (merged.get(key)?.confidence ?? 0) < signal.confidence) {
|
|
416
|
+
merged.set(key, signal);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
return [...merged.values()];
|
|
420
|
+
}
|
|
421
|
+
/**
|
|
422
|
+
* Detect verification/captcha interstitials and return structured risk evidence.
|
|
423
|
+
*/
|
|
424
|
+
export function detectRiskSignals(url, title) {
|
|
396
425
|
const lowerUrl = url.toLowerCase();
|
|
397
426
|
const lowerTitle = title.toLowerCase();
|
|
398
|
-
const
|
|
399
|
-
'/verify/captcha',
|
|
400
|
-
'/captcha',
|
|
401
|
-
'/challenge',
|
|
402
|
-
'scene=crawler',
|
|
403
|
-
'scene=anti_bot',
|
|
404
|
-
'recaptcha',
|
|
405
|
-
'hcaptcha',
|
|
427
|
+
const urlPatterns = [
|
|
428
|
+
{ pattern: '/verify/captcha', code: 'captcha_interstitial', confidence: 0.98 },
|
|
429
|
+
{ pattern: '/captcha', code: 'captcha_interstitial', confidence: 0.95 },
|
|
430
|
+
{ pattern: '/challenge', code: 'verification_interstitial', confidence: 0.93 },
|
|
431
|
+
{ pattern: 'scene=crawler', code: 'bot_challenge', confidence: 0.99 },
|
|
432
|
+
{ pattern: 'scene=anti_bot', code: 'bot_challenge', confidence: 0.99 },
|
|
433
|
+
{ pattern: 'recaptcha', code: 'captcha_interstitial', confidence: 0.97 },
|
|
434
|
+
{ pattern: 'hcaptcha', code: 'captcha_interstitial', confidence: 0.97 },
|
|
406
435
|
];
|
|
407
436
|
const titlePatterns = [
|
|
408
|
-
'verify',
|
|
409
|
-
'captcha',
|
|
410
|
-
'challenge',
|
|
411
|
-
'attention required',
|
|
412
|
-
'just a moment',
|
|
413
|
-
'checking your browser',
|
|
414
|
-
'access denied',
|
|
415
|
-
'驗證',
|
|
416
|
-
'验证',
|
|
417
|
-
'人机验证',
|
|
437
|
+
{ pattern: 'verify', code: 'verification_interstitial', confidence: 0.78 },
|
|
438
|
+
{ pattern: 'captcha', code: 'captcha_interstitial', confidence: 0.9 },
|
|
439
|
+
{ pattern: 'challenge', code: 'verification_interstitial', confidence: 0.8 },
|
|
440
|
+
{ pattern: 'attention required', code: 'verification_interstitial', confidence: 0.96 },
|
|
441
|
+
{ pattern: 'just a moment', code: 'verification_interstitial', confidence: 0.95 },
|
|
442
|
+
{ pattern: 'checking your browser', code: 'verification_interstitial', confidence: 0.97 },
|
|
443
|
+
{ pattern: 'access denied', code: 'access_gate', confidence: 0.86 },
|
|
444
|
+
{ pattern: '驗證', code: 'verification_interstitial', confidence: 0.88 },
|
|
445
|
+
{ pattern: '验证', code: 'verification_interstitial', confidence: 0.88 },
|
|
446
|
+
{ pattern: '人机验证', code: 'captcha_interstitial', confidence: 0.95 },
|
|
418
447
|
];
|
|
419
|
-
|
|
420
|
-
|
|
448
|
+
const signals = [];
|
|
449
|
+
for (const item of urlPatterns) {
|
|
450
|
+
if (lowerUrl.includes(item.pattern)) {
|
|
451
|
+
signals.push({
|
|
452
|
+
code: item.code,
|
|
453
|
+
source: 'url',
|
|
454
|
+
evidence: item.pattern,
|
|
455
|
+
confidence: item.confidence,
|
|
456
|
+
});
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
for (const item of titlePatterns) {
|
|
460
|
+
if (lowerTitle.includes(item.pattern)) {
|
|
461
|
+
signals.push({
|
|
462
|
+
code: item.code,
|
|
463
|
+
source: 'title',
|
|
464
|
+
evidence: item.pattern,
|
|
465
|
+
confidence: item.confidence,
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
return mergeRiskSignals([], signals);
|
|
421
470
|
}
|
|
422
471
|
function bezierPoint(t, p0, p1, p2, p3) {
|
|
423
472
|
const u = 1 - t;
|