webpeel 0.21.85 → 0.21.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ee/challenge-re-export.d.ts +1 -0
- package/dist/ee/challenge-re-export.js +1 -0
- package/dist/ee/challenge-solver.d.ts +72 -0
- package/dist/ee/challenge-solver.js +720 -0
- package/dist/ee/domain-extractors.d.ts +48 -0
- package/dist/ee/domain-extractors.js +6342 -0
- package/dist/ee/domain-intel.d.ts +16 -0
- package/dist/ee/domain-intel.js +133 -0
- package/dist/ee/extractors-re-export.d.ts +1 -0
- package/dist/ee/extractors-re-export.js +1 -0
- package/dist/ee/premium-hooks.d.ts +20 -0
- package/dist/ee/premium-hooks.js +50 -0
- package/dist/ee/spa-detection.d.ts +2 -0
- package/dist/ee/spa-detection.js +2 -0
- package/dist/ee/stability.d.ts +4 -0
- package/dist/ee/stability.js +29 -0
- package/dist/ee/swr-cache.d.ts +14 -0
- package/dist/ee/swr-cache.js +34 -0
- package/dist/server/routes/smart-search.d.ts +11 -0
- package/dist/server/routes/smart-search.js +64 -7
- package/package.json +2 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { solveChallenge } from './challenge-solver.js';
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { solveChallenge } from './challenge-solver.js';
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Challenge / bot-protection solver.
|
|
3
|
+
*
|
|
4
|
+
* Attempts to bypass bot-protection challenges using free, in-process methods:
|
|
5
|
+
* 1. Cloudflare JS challenge — render in stealth Playwright, wait for auto-solve
|
|
6
|
+
* 2. hCaptcha — accessibility bypass (TODO: implement if API is confirmed available)
|
|
7
|
+
*
|
|
8
|
+
* Architecture note:
|
|
9
|
+
* Browser-based solving is CPU/RAM intensive. When the env var BROWSER_WORKER_URL
|
|
10
|
+
* is set, the solve request is proxied to an external worker (e.g. Hetzner 4GB VM)
|
|
11
|
+
* instead of running locally. This keeps the main Render container (512 MB) lean.
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* const result = await solveChallenge(url, 'cloudflare', html);
|
|
15
|
+
* if (result.solved) {
|
|
16
|
+
* // result.html = real page content
|
|
17
|
+
* // result.cookies = ["cf_clearance=...", ...]
|
|
18
|
+
* }
|
|
19
|
+
*/
|
|
20
|
+
import type { ChallengeType } from '../core/challenge-detection.js';
|
|
21
|
+
export interface ImageCaptchaResult {
|
|
22
|
+
solved: boolean;
|
|
23
|
+
rounds: number;
|
|
24
|
+
error?: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Ask the moondream vision model which grid cells contain the target object.
|
|
28
|
+
* Returns an array of 1-indexed grid positions (1–9), or null if the call fails.
|
|
29
|
+
*/
|
|
30
|
+
export declare function askVisionModel(base64Image: string, targetObject: string): Promise<number[] | null>;
|
|
31
|
+
/**
|
|
32
|
+
* Detect if the page has an image grid CAPTCHA and extract the target object.
|
|
33
|
+
* Returns the object name (e.g. "traffic lights") or null if not detected.
|
|
34
|
+
*/
|
|
35
|
+
export declare function detectImageCaptchaTarget(page: import('playwright').Page): Promise<string | null>;
|
|
36
|
+
/**
|
|
37
|
+
* Solve an image grid CAPTCHA using the moondream vision model.
|
|
38
|
+
*
|
|
39
|
+
* Flow per round:
|
|
40
|
+
* 1. Screenshot the CAPTCHA grid element
|
|
41
|
+
* 2. Send to moondream → get grid positions
|
|
42
|
+
* 3. Click identified cells
|
|
43
|
+
* 4. Click Verify button
|
|
44
|
+
* 5. Check if solved; if a new round appears, repeat (max 3 rounds)
|
|
45
|
+
*/
|
|
46
|
+
export declare function solveImageCaptcha(page: import('playwright').Page, targetObject: string): Promise<ImageCaptchaResult>;
|
|
47
|
+
export interface SolveOptions {
|
|
48
|
+
/** Hard timeout in ms (default: 15 000) */
|
|
49
|
+
timeout?: number;
|
|
50
|
+
/** Optional proxy URL (http://user:pass@host:port) */
|
|
51
|
+
proxy?: string;
|
|
52
|
+
}
|
|
53
|
+
export interface SolveResult {
|
|
54
|
+
solved: boolean;
|
|
55
|
+
html: string;
|
|
56
|
+
/** Raw Set-Cookie header values extracted after solve */
|
|
57
|
+
cookies?: string[];
|
|
58
|
+
/** How the solve was performed */
|
|
59
|
+
method?: 'local-browser' | 'remote-worker' | 'accessibility';
|
|
60
|
+
/** Error details if solve failed */
|
|
61
|
+
error?: string;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Attempt to solve a bot-protection challenge.
|
|
65
|
+
*
|
|
66
|
+
* @param url The page URL (used for proxy routing and cookie caching)
|
|
67
|
+
* @param challengeType The type of challenge as detected by challenge-detection
|
|
68
|
+
* @param html The raw challenge HTML (used for context / fallback)
|
|
69
|
+
* @param options Optional timeout and proxy settings
|
|
70
|
+
* @returns Solve result with real HTML content and cookies if successful
|
|
71
|
+
*/
|
|
72
|
+
export declare function solveChallenge(url: string, challengeType: ChallengeType, html: string, options?: SolveOptions): Promise<SolveResult>;
|