webpeel 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -21
- package/README.md +148 -497
- package/dist/cli-auth.d.ts +2 -0
- package/dist/cli-auth.d.ts.map +1 -1
- package/dist/cli-auth.js +16 -3
- package/dist/cli-auth.js.map +1 -1
- package/dist/cli.js +475 -77
- package/dist/cli.js.map +1 -1
- package/dist/core/actions.d.ts +19 -10
- package/dist/core/actions.d.ts.map +1 -1
- package/dist/core/actions.js +214 -43
- package/dist/core/actions.js.map +1 -1
- package/dist/core/agent.d.ts +60 -3
- package/dist/core/agent.d.ts.map +1 -1
- package/dist/core/agent.js +375 -86
- package/dist/core/agent.js.map +1 -1
- package/dist/core/answer.d.ts +43 -0
- package/dist/core/answer.d.ts.map +1 -0
- package/dist/core/answer.js +378 -0
- package/dist/core/answer.js.map +1 -0
- package/dist/core/cache.d.ts +14 -0
- package/dist/core/cache.d.ts.map +1 -0
- package/dist/core/cache.js +122 -0
- package/dist/core/cache.js.map +1 -0
- package/dist/core/dns-cache.d.ts +21 -0
- package/dist/core/dns-cache.d.ts.map +1 -0
- package/dist/core/dns-cache.js +184 -0
- package/dist/core/dns-cache.js.map +1 -0
- package/dist/core/documents.d.ts +24 -0
- package/dist/core/documents.d.ts.map +1 -0
- package/dist/core/documents.js +124 -0
- package/dist/core/documents.js.map +1 -0
- package/dist/core/extract-inline.d.ts +39 -0
- package/dist/core/extract-inline.d.ts.map +1 -0
- package/dist/core/extract-inline.js +214 -0
- package/dist/core/extract-inline.js.map +1 -0
- package/dist/core/fetcher.d.ts +33 -7
- package/dist/core/fetcher.d.ts.map +1 -1
- package/dist/core/fetcher.js +608 -41
- package/dist/core/fetcher.js.map +1 -1
- package/dist/core/jobs.d.ts +66 -0
- package/dist/core/jobs.d.ts.map +1 -0
- package/dist/core/jobs.js +513 -0
- package/dist/core/jobs.js.map +1 -0
- package/dist/core/markdown.d.ts.map +1 -1
- package/dist/core/markdown.js +141 -31
- package/dist/core/markdown.js.map +1 -1
- package/dist/core/pdf.d.ts.map +1 -1
- package/dist/core/pdf.js +3 -1
- package/dist/core/pdf.js.map +1 -1
- package/dist/core/screenshot.d.ts +33 -0
- package/dist/core/screenshot.d.ts.map +1 -0
- package/dist/core/screenshot.js +30 -0
- package/dist/core/screenshot.js.map +1 -0
- package/dist/core/search-provider.d.ts +46 -0
- package/dist/core/search-provider.d.ts.map +1 -0
- package/dist/core/search-provider.js +281 -0
- package/dist/core/search-provider.js.map +1 -0
- package/dist/core/strategies.d.ts +17 -26
- package/dist/core/strategies.d.ts.map +1 -1
- package/dist/core/strategies.js +308 -67
- package/dist/core/strategies.js.map +1 -1
- package/dist/core/strategy-hooks.d.ts +76 -0
- package/dist/core/strategy-hooks.d.ts.map +1 -0
- package/dist/core/strategy-hooks.js +33 -0
- package/dist/core/strategy-hooks.js.map +1 -0
- package/dist/index.d.ts +9 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +61 -32
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +335 -70
- package/dist/mcp/server.js.map +1 -1
- package/dist/types.d.ts +43 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/llms.txt +85 -47
- package/package.json +12 -6
package/dist/core/strategies.js
CHANGED
|
@@ -1,44 +1,73 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Smart escalation strategy: try simple fetch first, escalate to browser if needed
|
|
2
|
+
* Smart escalation strategy: try simple fetch first, escalate to browser if needed.
|
|
3
|
+
*
|
|
4
|
+
* Premium server-side optimisations (SWR cache, domain intelligence, parallel
|
|
5
|
+
* race) are injected via the hook system in `strategy-hooks.ts`. When no hooks
|
|
6
|
+
* are registered the strategy degrades gracefully to a simple escalation path
|
|
7
|
+
* that works great for CLI / npm library usage.
|
|
3
8
|
*/
|
|
4
9
|
import { simpleFetch, browserFetch, retryFetch } from './fetcher.js';
|
|
10
|
+
import { getCached, setCached as setBasicCache } from './cache.js';
|
|
11
|
+
import { resolveAndCache } from './dns-cache.js';
|
|
5
12
|
import { BlockedError, NetworkError } from '../types.js';
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
*
|
|
15
|
-
* Returns the result along with which method worked
|
|
16
|
-
*/
|
|
17
|
-
export async function smartFetch(url, options = {}) {
|
|
18
|
-
const { forceBrowser = false, stealth = false, waitMs = 0, userAgent, timeoutMs = 30000, screenshot = false, screenshotFullPage = false, headers, cookies, actions, keepPageOpen = false, } = options;
|
|
19
|
-
// If stealth is requested, force browser mode (stealth requires browser)
|
|
20
|
-
const shouldUseBrowser = forceBrowser || screenshot || stealth;
|
|
21
|
-
// Strategy 1: Simple fetch (unless browser is forced or screenshot is requested)
|
|
22
|
-
if (!shouldUseBrowser) {
|
|
23
|
-
try {
|
|
24
|
-
const result = await retryFetch(() => simpleFetch(url, userAgent, timeoutMs, headers), 3);
|
|
25
|
-
return {
|
|
26
|
-
...result,
|
|
27
|
-
method: 'simple',
|
|
28
|
-
};
|
|
13
|
+
import { getStrategyHooks, } from './strategy-hooks.js';
|
|
14
|
+
/* ---------- hardcoded domain rules -------------------------------------- */
|
|
15
|
+
function shouldForceBrowser(url) {
|
|
16
|
+
try {
|
|
17
|
+
const hostname = new URL(url).hostname.toLowerCase();
|
|
18
|
+
// Reddit often returns an HTML shell via simple fetch
|
|
19
|
+
if (hostname === 'reddit.com' || hostname.endsWith('.reddit.com')) {
|
|
20
|
+
return { mode: 'browser' };
|
|
29
21
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
}
|
|
22
|
+
// npmjs blocks simple fetch with 403 frequently
|
|
23
|
+
if (hostname === 'npmjs.com' ||
|
|
24
|
+
hostname === 'www.npmjs.com' ||
|
|
25
|
+
hostname.endsWith('.npmjs.com')) {
|
|
26
|
+
return { mode: 'browser' };
|
|
27
|
+
}
|
|
28
|
+
// These are known to aggressively block automation
|
|
29
|
+
if (hostname === 'glassdoor.com' || hostname.endsWith('.glassdoor.com')) {
|
|
30
|
+
return { mode: 'stealth' };
|
|
31
|
+
}
|
|
32
|
+
if (hostname === 'bloomberg.com' || hostname.endsWith('.bloomberg.com')) {
|
|
33
|
+
return { mode: 'stealth' };
|
|
34
|
+
}
|
|
35
|
+
if (hostname === 'indeed.com' || hostname.endsWith('.indeed.com')) {
|
|
36
|
+
return { mode: 'stealth' };
|
|
39
37
|
}
|
|
40
38
|
}
|
|
41
|
-
|
|
39
|
+
catch {
|
|
40
|
+
// Ignore URL parsing errors; validation happens inside fetchers.
|
|
41
|
+
}
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
/* ---------- helpers ------------------------------------------------------ */
|
|
45
|
+
function isAbortError(error) {
|
|
46
|
+
return error instanceof Error && error.name === 'AbortError';
|
|
47
|
+
}
|
|
48
|
+
function shouldEscalateSimpleError(error) {
|
|
49
|
+
if (error instanceof BlockedError)
|
|
50
|
+
return true;
|
|
51
|
+
return error instanceof NetworkError && error.message.includes('TLS/SSL');
|
|
52
|
+
}
|
|
53
|
+
function looksLikeShellPage(result) {
|
|
54
|
+
const ct = (result.contentType || '').toLowerCase();
|
|
55
|
+
if (!ct.includes('html'))
|
|
56
|
+
return false;
|
|
57
|
+
const text = result.html.replace(/<[^>]*>/g, '').trim();
|
|
58
|
+
return text.length < 500 && result.html.length > 1000;
|
|
59
|
+
}
|
|
60
|
+
function prefetchDns(url) {
|
|
61
|
+
try {
|
|
62
|
+
const hostname = new URL(url).hostname;
|
|
63
|
+
void resolveAndCache(hostname).catch(() => { });
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
// Ignore invalid URL.
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
async function fetchWithBrowserStrategy(url, options) {
|
|
70
|
+
const { userAgent, waitMs, timeoutMs, screenshot, screenshotFullPage, headers, cookies, actions, keepPageOpen, effectiveStealth, signal, } = options;
|
|
42
71
|
try {
|
|
43
72
|
const result = await browserFetch(url, {
|
|
44
73
|
userAgent,
|
|
@@ -48,62 +77,274 @@ export async function smartFetch(url, options = {}) {
|
|
|
48
77
|
screenshotFullPage,
|
|
49
78
|
headers,
|
|
50
79
|
cookies,
|
|
51
|
-
stealth,
|
|
80
|
+
stealth: effectiveStealth,
|
|
52
81
|
actions,
|
|
53
82
|
keepPageOpen,
|
|
83
|
+
signal,
|
|
54
84
|
});
|
|
55
85
|
return {
|
|
56
86
|
...result,
|
|
57
|
-
method:
|
|
87
|
+
method: effectiveStealth ? 'stealth' : 'browser',
|
|
58
88
|
};
|
|
59
89
|
}
|
|
60
90
|
catch (error) {
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
};
|
|
80
|
-
}
|
|
81
|
-
catch (stealthError) {
|
|
82
|
-
// If stealth also fails, throw the original error
|
|
83
|
-
throw stealthError;
|
|
84
|
-
}
|
|
91
|
+
if (isAbortError(error))
|
|
92
|
+
throw error;
|
|
93
|
+
// If browser gets blocked, try stealth as fallback (unless already stealth)
|
|
94
|
+
if (!effectiveStealth && error instanceof BlockedError) {
|
|
95
|
+
const result = await browserFetch(url, {
|
|
96
|
+
userAgent,
|
|
97
|
+
waitMs,
|
|
98
|
+
timeoutMs,
|
|
99
|
+
screenshot,
|
|
100
|
+
screenshotFullPage,
|
|
101
|
+
headers,
|
|
102
|
+
cookies,
|
|
103
|
+
stealth: true,
|
|
104
|
+
actions,
|
|
105
|
+
keepPageOpen,
|
|
106
|
+
signal,
|
|
107
|
+
});
|
|
108
|
+
return { ...result, method: 'stealth' };
|
|
85
109
|
}
|
|
86
|
-
// If
|
|
110
|
+
// If Cloudflare detected, retry with extra wait time
|
|
87
111
|
if (error instanceof NetworkError &&
|
|
88
112
|
error.message.toLowerCase().includes('cloudflare')) {
|
|
89
113
|
const result = await browserFetch(url, {
|
|
90
114
|
userAgent,
|
|
91
|
-
waitMs: 5000,
|
|
115
|
+
waitMs: 5000,
|
|
92
116
|
timeoutMs,
|
|
93
117
|
screenshot,
|
|
94
118
|
screenshotFullPage,
|
|
95
119
|
headers,
|
|
96
120
|
cookies,
|
|
97
|
-
stealth,
|
|
121
|
+
stealth: effectiveStealth,
|
|
98
122
|
actions,
|
|
99
123
|
keepPageOpen,
|
|
124
|
+
signal,
|
|
100
125
|
});
|
|
101
|
-
return {
|
|
102
|
-
...result,
|
|
103
|
-
method: stealth ? 'stealth' : 'browser',
|
|
104
|
-
};
|
|
126
|
+
return { ...result, method: effectiveStealth ? 'stealth' : 'browser' };
|
|
105
127
|
}
|
|
106
128
|
throw error;
|
|
107
129
|
}
|
|
108
130
|
}
|
|
131
|
+
/* ---------- main entry point -------------------------------------------- */
|
|
132
|
+
/**
|
|
133
|
+
* Smart fetch with automatic escalation.
|
|
134
|
+
*
|
|
135
|
+
* Without hooks: simple fetch → browser → stealth escalation.
|
|
136
|
+
* With premium hooks: SWR cache → domain intel → parallel race → escalation.
|
|
137
|
+
*/
|
|
138
|
+
export async function smartFetch(url, options = {}) {
|
|
139
|
+
const { forceBrowser = false, stealth = false, waitMs = 0, userAgent, timeoutMs = 30000, screenshot = false, screenshotFullPage = false, headers, cookies, actions, keepPageOpen = false, noCache = false, raceTimeoutMs = 2000, } = options;
|
|
140
|
+
const hooks = getStrategyHooks();
|
|
141
|
+
const fetchStartMs = Date.now();
|
|
142
|
+
const recordMethod = (method) => {
|
|
143
|
+
if (method === 'cached')
|
|
144
|
+
return;
|
|
145
|
+
hooks.recordDomainResult?.(url, method, Date.now() - fetchStartMs);
|
|
146
|
+
};
|
|
147
|
+
/* ---- determine effective mode ---------------------------------------- */
|
|
148
|
+
// Hardcoded rules always take priority, then hook-based domain intelligence.
|
|
149
|
+
const forced = shouldForceBrowser(url);
|
|
150
|
+
const recommended = hooks.getDomainRecommendation?.(url) ?? null;
|
|
151
|
+
const selected = forced ?? recommended;
|
|
152
|
+
let effectiveForceBrowser = forceBrowser;
|
|
153
|
+
let effectiveStealth = stealth;
|
|
154
|
+
if (selected) {
|
|
155
|
+
effectiveForceBrowser = true;
|
|
156
|
+
if (selected.mode === 'stealth')
|
|
157
|
+
effectiveStealth = true;
|
|
158
|
+
}
|
|
159
|
+
prefetchDns(url);
|
|
160
|
+
/* ---- cache eligibility ----------------------------------------------- */
|
|
161
|
+
const canUseCache = !noCache &&
|
|
162
|
+
!effectiveForceBrowser &&
|
|
163
|
+
!effectiveStealth &&
|
|
164
|
+
!screenshot &&
|
|
165
|
+
!keepPageOpen &&
|
|
166
|
+
!actions?.length &&
|
|
167
|
+
!headers &&
|
|
168
|
+
!cookies &&
|
|
169
|
+
waitMs === 0 &&
|
|
170
|
+
!userAgent;
|
|
171
|
+
/* ---- hook-based cache check (premium) -------------------------------- */
|
|
172
|
+
if (canUseCache && hooks.checkCache) {
|
|
173
|
+
const cached = hooks.checkCache(url);
|
|
174
|
+
if (cached) {
|
|
175
|
+
if (cached.stale && hooks.markRevalidating?.(url)) {
|
|
176
|
+
// Background revalidation — fire-and-forget
|
|
177
|
+
void (async () => {
|
|
178
|
+
try {
|
|
179
|
+
const fresh = await simpleFetch(url, userAgent, timeoutMs);
|
|
180
|
+
if (!looksLikeShellPage(fresh)) {
|
|
181
|
+
hooks.setCache?.(url, { ...fresh, method: 'simple' });
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
catch {
|
|
185
|
+
// Stale entry continues serving.
|
|
186
|
+
}
|
|
187
|
+
})();
|
|
188
|
+
}
|
|
189
|
+
return { ...cached.value, method: 'cached' };
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
/* ---- basic cache check (non-premium fallback) ------------------------ */
|
|
193
|
+
if (canUseCache && !hooks.checkCache) {
|
|
194
|
+
const basicCached = getCached(url);
|
|
195
|
+
if (basicCached) {
|
|
196
|
+
return { ...basicCached, method: 'cached' };
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
/* ---- browser-level options ------------------------------------------- */
|
|
200
|
+
let shouldUseBrowser = effectiveForceBrowser || screenshot || effectiveStealth;
|
|
201
|
+
const browserOptions = {
|
|
202
|
+
userAgent,
|
|
203
|
+
waitMs,
|
|
204
|
+
timeoutMs,
|
|
205
|
+
screenshot,
|
|
206
|
+
screenshotFullPage,
|
|
207
|
+
headers,
|
|
208
|
+
cookies,
|
|
209
|
+
actions,
|
|
210
|
+
keepPageOpen,
|
|
211
|
+
effectiveStealth,
|
|
212
|
+
};
|
|
213
|
+
/* ---- Strategy: simple fetch (with optional race) --------------------- */
|
|
214
|
+
if (!shouldUseBrowser) {
|
|
215
|
+
const simpleAbortController = new AbortController();
|
|
216
|
+
const simplePromise = retryFetch(() => simpleFetch(url, userAgent, timeoutMs, headers, simpleAbortController.signal), 3).then((result) => {
|
|
217
|
+
if (looksLikeShellPage(result)) {
|
|
218
|
+
throw new BlockedError('Shell page detected. Browser rendering required.');
|
|
219
|
+
}
|
|
220
|
+
return result;
|
|
221
|
+
});
|
|
222
|
+
// Determine race timeout — hooks can override
|
|
223
|
+
const useRace = hooks.shouldRace?.() ?? false;
|
|
224
|
+
const effectiveRaceTimeout = useRace
|
|
225
|
+
? (hooks.getRaceTimeoutMs?.() ?? raceTimeoutMs)
|
|
226
|
+
: raceTimeoutMs;
|
|
227
|
+
let raceTimer;
|
|
228
|
+
const simpleOrTimeout = await Promise.race([
|
|
229
|
+
simplePromise
|
|
230
|
+
.then((result) => ({ type: 'simple-success', result }))
|
|
231
|
+
.catch((error) => ({ type: 'simple-error', error })),
|
|
232
|
+
new Promise((resolve) => {
|
|
233
|
+
raceTimer = setTimeout(() => resolve({ type: 'race-timeout' }), Math.max(effectiveRaceTimeout, 0));
|
|
234
|
+
}),
|
|
235
|
+
]);
|
|
236
|
+
if (raceTimer)
|
|
237
|
+
clearTimeout(raceTimer);
|
|
238
|
+
if (simpleOrTimeout.type === 'simple-success') {
|
|
239
|
+
const strategyResult = {
|
|
240
|
+
...simpleOrTimeout.result,
|
|
241
|
+
method: 'simple',
|
|
242
|
+
};
|
|
243
|
+
if (canUseCache) {
|
|
244
|
+
hooks.setCache?.(url, strategyResult) ?? setBasicCache(url, strategyResult);
|
|
245
|
+
}
|
|
246
|
+
recordMethod('simple');
|
|
247
|
+
return strategyResult;
|
|
248
|
+
}
|
|
249
|
+
if (simpleOrTimeout.type === 'simple-error') {
|
|
250
|
+
if (!shouldEscalateSimpleError(simpleOrTimeout.error)) {
|
|
251
|
+
throw simpleOrTimeout.error;
|
|
252
|
+
}
|
|
253
|
+
shouldUseBrowser = true;
|
|
254
|
+
}
|
|
255
|
+
else {
|
|
256
|
+
// Race timeout — only start parallel browser if hooks say to race
|
|
257
|
+
if (useRace) {
|
|
258
|
+
// Parallel race: simple still running, start browser too
|
|
259
|
+
const browserAbortController = new AbortController();
|
|
260
|
+
let simpleError;
|
|
261
|
+
let browserError;
|
|
262
|
+
const simpleCandidate = simplePromise
|
|
263
|
+
.then((result) => ({ source: 'simple', result }))
|
|
264
|
+
.catch((error) => {
|
|
265
|
+
simpleError = error;
|
|
266
|
+
throw error;
|
|
267
|
+
});
|
|
268
|
+
const browserCandidate = fetchWithBrowserStrategy(url, {
|
|
269
|
+
...browserOptions,
|
|
270
|
+
signal: browserAbortController.signal,
|
|
271
|
+
})
|
|
272
|
+
.then((result) => ({ source: 'browser', result }))
|
|
273
|
+
.catch((error) => {
|
|
274
|
+
browserError = error;
|
|
275
|
+
throw error;
|
|
276
|
+
});
|
|
277
|
+
try {
|
|
278
|
+
const winner = await Promise.any([
|
|
279
|
+
simpleCandidate,
|
|
280
|
+
browserCandidate,
|
|
281
|
+
]);
|
|
282
|
+
if (winner.source === 'simple') {
|
|
283
|
+
browserAbortController.abort();
|
|
284
|
+
const strategyResult = {
|
|
285
|
+
...winner.result,
|
|
286
|
+
method: 'simple',
|
|
287
|
+
};
|
|
288
|
+
if (canUseCache) {
|
|
289
|
+
hooks.setCache?.(url, strategyResult) ?? setBasicCache(url, strategyResult);
|
|
290
|
+
}
|
|
291
|
+
recordMethod('simple');
|
|
292
|
+
return strategyResult;
|
|
293
|
+
}
|
|
294
|
+
simpleAbortController.abort();
|
|
295
|
+
if (canUseCache) {
|
|
296
|
+
hooks.setCache?.(url, winner.result) ?? setBasicCache(url, winner.result);
|
|
297
|
+
}
|
|
298
|
+
recordMethod(winner.result.method);
|
|
299
|
+
return winner.result;
|
|
300
|
+
}
|
|
301
|
+
catch {
|
|
302
|
+
if (simpleError &&
|
|
303
|
+
!shouldEscalateSimpleError(simpleError) &&
|
|
304
|
+
!isAbortError(simpleError)) {
|
|
305
|
+
throw simpleError;
|
|
306
|
+
}
|
|
307
|
+
if (browserError)
|
|
308
|
+
throw browserError;
|
|
309
|
+
if (simpleError)
|
|
310
|
+
throw simpleError;
|
|
311
|
+
throw new Error('Both simple and browser fetch attempts failed');
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
else {
|
|
315
|
+
// No race — just wait for the simple fetch to finish
|
|
316
|
+
const simpleResult = await simplePromise
|
|
317
|
+
.then((result) => ({ type: 'simple-success', result }))
|
|
318
|
+
.catch((error) => ({ type: 'simple-error', error }));
|
|
319
|
+
if (simpleResult.type === 'simple-success') {
|
|
320
|
+
const strategyResult = {
|
|
321
|
+
...simpleResult.result,
|
|
322
|
+
method: 'simple',
|
|
323
|
+
};
|
|
324
|
+
if (canUseCache) {
|
|
325
|
+
hooks.setCache?.(url, strategyResult) ?? setBasicCache(url, strategyResult);
|
|
326
|
+
}
|
|
327
|
+
recordMethod('simple');
|
|
328
|
+
return strategyResult;
|
|
329
|
+
}
|
|
330
|
+
if (!shouldEscalateSimpleError(simpleResult.error)) {
|
|
331
|
+
throw simpleResult.error;
|
|
332
|
+
}
|
|
333
|
+
shouldUseBrowser = true;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
/* ---- browser / stealth fallback -------------------------------------- */
|
|
338
|
+
const browserResult = await fetchWithBrowserStrategy(url, browserOptions);
|
|
339
|
+
if (canUseCache) {
|
|
340
|
+
hooks.setCache?.(url, browserResult) ?? setBasicCache(url, browserResult);
|
|
341
|
+
}
|
|
342
|
+
recordMethod(browserResult.method);
|
|
343
|
+
return browserResult;
|
|
344
|
+
}
|
|
345
|
+
/* ---------- legacy export for tests ------------------------------------- */
|
|
346
|
+
/**
|
|
347
|
+
* @deprecated Use `clearStrategyHooks()` from strategy-hooks.ts instead.
|
|
348
|
+
*/
|
|
349
|
+
export { clearStrategyHooks as clearDomainIntel } from './strategy-hooks.js';
|
|
109
350
|
//# sourceMappingURL=strategies.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"strategies.js","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,UAAU,EAAoB,MAAM,cAAc,CAAC;AACvF,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AA6CzD;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,GAAW,EAAE,UAA2B,EAAE;IACzE,MAAM,EACJ,YAAY,GAAG,KAAK,EACpB,OAAO,GAAG,KAAK,EACf,MAAM,GAAG,CAAC,EACV,SAAS,EACT,SAAS,GAAG,KAAK,EACjB,UAAU,GAAG,KAAK,EAClB,kBAAkB,GAAG,KAAK,EAC1B,OAAO,EACP,OAAO,EACP,OAAO,EACP,YAAY,GAAG,KAAK,GACrB,GAAG,OAAO,CAAC;IAEZ,yEAAyE;IACzE,MAAM,gBAAgB,GAAG,YAAY,IAAI,UAAU,IAAI,OAAO,CAAC;IAE/D,iFAAiF;IACjF,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAC7B,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,CAAC,EACrD,CAAC,CACF,CAAC;YACF,OAAO;gBACL,GAAG,MAAM;gBACT,MAAM,EAAE,QAAQ;aACjB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,8CAA8C;YAC9C,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;gBAClC,mCAAmC;YACrC,CAAC;iBAAM,CAAC;gBACN,kDAAkD;gBAClD,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAED,sDAAsD;IACtD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;YACrC,SAAS;YACT,MAAM;YACN,SAAS;YACT,UAAU;YACV,kBAAkB;YAClB,OAAO;YACP,OAAO;YACP,OAAO;YACP,OAAO;YACP,YAAY;SACb,CAAC,CAAC;QACH,OAAO;YACL,GAAG,MAAM;YACT,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;SACxC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,mGAAmG;QACnG,IAAI,CAAC,OAAO,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;YAC9C,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;oBACrC,SAAS;oBACT,MAAM;oBACN,SAAS;oBACT,UAAU;oBACV,kBAAkB;oBAClB,OAAO;oBACP,OAAO;oBACP,OAAO,EAAE,IAAI,EAAE,2BAA2B;oBAC1C,OAAO;oBACP,YAAY;iBACb,CAAC,CAAC;gBACH,OAAO;oBACL,GAAG,MAAM;oBACT,MAAM,EAAE,SAAS;iBAClB,CAAC;YACJ,CAAC;YAAC,OAAO,YAAY,EAAE,CAAC;gBACtB,kDAAkD;gBAClD,MAAM,YAAY,CAAC;YACrB,CAAC;QACH,CAAC;QAED,+DAA+D;QAC/D,IACE,KAAK,YAAY,YAAY;YAC7B,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAClD,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;gBACrC,SAAS;gBACT,MAAM,EAAE,IAAI,EAAE,mCAAmC;gBACjD,SAAS;gBACT,UAAU;gBACV,kBAAkB;gBAClB,OAAO;gBACP,OAAO;gBACP,OAAO,EAAE,uBAAuB;gBAChC,OAAO;gBACP,YAAY;aACb,CAAC,CAAC;YACH,OAAO;gBACL,GAAG,MAAM;gBACT,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;aACxC,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC"}
|
|
1
|
+
{"version":3,"file":"strategies.js","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,UAAU,EAAoB,MAAM,cAAc,CAAC;AACvF,OAAO,EAAE,SAAS,EAAE,SAAS,IAAI,aAAa,EAAE,MAAM,YAAY,CAAC;AACnE,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EACL,gBAAgB,GAGjB,MAAM,qBAAqB,CAAC;AAK7B,8EAA8E;AAE9E,SAAS,kBAAkB,CAAC,GAAW;IACrC,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;QAErD,sDAAsD;QACtD,IAAI,QAAQ,KAAK,YAAY,IAAI,QAAQ,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;YAClE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAC7B,CAAC;QAED,gDAAgD;QAChD,IACE,QAAQ,KAAK,WAAW;YACxB,QAAQ,KAAK,eAAe;YAC5B,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,EAC/B,CAAC;YACD,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAC7B,CAAC;QAED,mDAAmD;QACnD,IAAI,QAAQ,KAAK,eAAe,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;YACxE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAC7B,CAAC;QACD,IAAI,QAAQ,KAAK,eAAe,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;YACxE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAC7B,CAAC;QACD,IAAI,QAAQ,KAAK,YAAY,IAAI,QAAQ,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;YAClE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAC7B,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,iEAAiE;IACnE,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,+EAA+E;AAE/E,SAAS,YAAY,CAAC,KAAc;IAClC,OAAO,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,IAAI,KAAK,YAAY,CAAC;AAC/D,CAAC;AAED,SAAS,yBAAyB,CAAC,KAAc;IAC/C,IAAI,KAAK,YAAY,YAAY;QAAE,OAAO,IAAI,CAAC;IAC/C,OAAO,KAAK,YAAY,YAAY,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;AAC5E,CAAC;AAED,SAAS,kBAAkB,CAAC,MAAmB;IAC7C,MAAM,EAAE,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IACpD,IAAI,CAAC,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;QAAE,OAAO,KAAK,CAAC;IACvC,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IACxD,OAAO,IAAI,CAAC,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;AACxD,CAAC;AAED,SAAS,WAAW,CAAC,GAAW;IAC9B,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QACvC,KAAK,eAAe,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IACjD,CAAC;IAAC,MAAM,CAAC;QACP,sBAAsB;IACxB,CAAC;AACH,CAAC;AA0DD,KAAK,UAAU,wBAAwB,CACrC,GAAW,EACX,OAA+B;IAE/B,MAAM,EACJ,SAAS,EACT,MAAM,EACN,SAAS,EACT,UAAU,EACV,kBAAkB,EAClB,OAAO,EACP,OAAO,EACP,OAAO,EACP,YAAY,EACZ,gBAAgB,EAChB,MAAM,GACP,GAAG,OAAO,CAAC;IAEZ,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;YACrC,SAAS;YACT,MAAM;YACN,SAAS;YACT,UAAU;YACV,kBAAkB;YAClB,OAAO;YACP,OAAO;YACP,OAAO,EAAE,gBAAgB;YACzB,OAAO;YACP,YAAY;YACZ,MAAM;SACP,CAAC,CAAC;QAEH,OAAO;YACL,GAAG,MAAM;YACT,MAAM,EAAE,gBAAgB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;SACjD,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,YAAY,CAAC,KAAK,CAAC;YAAE,MAAM,KAAK,CAAC;QAErC,4EAA4E;QAC5E,IAAI,CAAC,gBAAgB,IAAI,KAAK,YAAY,YAAY,EAAE,CAAC;YACvD,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;gBACrC,SAAS;gBACT,MAAM;gBACN,SAAS;gBACT,UAAU;gBACV,kBAAkB;gBAClB,OAAO;gBACP,OAAO;gBACP,OAAO,EAAE,IAAI;gBACb,OAAO;gBACP,YAAY;gBACZ,MAAM;aACP,CAAC,CAAC;YACH,OAAO,EAAE,GAAG,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;QAC1C,CAAC;QAED,qDAAqD;QACrD,IACE,KAAK,YAAY,YAAY;YAC7B,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAClD,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,GAAG,EAAE;gBACrC,SAAS;gBACT,MAAM,EAAE,IAAI;gBACZ,SAAS;gBACT,UAAU;gBACV,kBAAkB;gBAClB,OAAO;gBACP,OAAO;gBACP,OAAO,EAAE,gBAAgB;gBACzB,OAAO;gBACP,YAAY;gBACZ,MAAM;aACP,CAAC,CAAC;YACH,OAAO,EAAE,GAAG,MAAM,EAAE,MAAM,EAAE,gBAAgB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC;QACzE,CAAC;QAED,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED,8EAA8E;AAE9E;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,GAAW,EACX,UAA2B,EAAE;IAE7B,MAAM,EACJ,YAAY,GAAG,KAAK,EACpB,OAAO,GAAG,KAAK,EACf,MAAM,GAAG,CAAC,EACV,SAAS,EACT,SAAS,GAAG,KAAK,EACjB,UAAU,GAAG,KAAK,EAClB,kBAAkB,GAAG,KAAK,EAC1B,OAAO,EACP,OAAO,EACP,OAAO,EACP,YAAY,GAAG,KAAK,EACpB,OAAO,GAAG,KAAK,EACf,aAAa,GAAG,IAAI,GACrB,GAAG,OAAO,CAAC;IAEZ,MAAM,KAAK,GAAG,gBAAgB,EAAE,CAAC;IACjC,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEhC,MAAM,YAAY,GAAG,CAAC,MAAgC,EAAQ,EAAE;QAC9D,IAAI,MAAM,KAAK,QAAQ;YAAE,OAAO;QAChC,KAAK,CAAC,kBAAkB,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,YAAY,CAAC,CAAC;IACrE,CAAC,CAAC;IAEF,4EAA4E;IAE5E,6EAA6E;IAC7E,MAAM,MAAM,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAC;IACvC,MAAM,WAAW,GAAG,KAAK,CAAC,uBAAuB,EAAE,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC;IACjE,MAAM,QAAQ,GAAG,MAAM,IAAI,WAAW,CAAC;IAEvC,IAAI,qBAAqB,GAAG,YAAY,CAAC;IACzC,IAAI,gBAAgB,GAAG,OAAO,CAAC;IAE/B,IAAI,QAAQ,EAAE,CAAC;QACb,qBAAqB,GAAG,IAAI,CAAC;QAC7B,IAAI,QAAQ,CAAC,IAAI,KAAK,SAAS;YAAE,gBAAgB,GAAG,IAAI,CAAC;IAC3D,CAAC;IAED,WAAW,CAAC,GAAG,CAAC,CAAC;IAEjB,4EAA4E;IAE5E,MAAM,WAAW,GACf,CAAC,OAAO;QACR,CAAC,qBAAqB;QACtB,CAAC,gBAAgB;QACjB,CAAC,UAAU;QACX,CAAC,YAAY;QACb,CAAC,OAAO,EAAE,MAAM;QAChB,CAAC,OAAO;QACR,CAAC,OAAO;QACR,MAAM,KAAK,CAAC;QACZ,CAAC,SAAS,CAAC;IAEb,4EAA4E;IAE5E,IAAI,WAAW,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;QACpC,MAAM,MAAM,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;QACrC,IAAI,MAAM,EAAE,CAAC;YACX,IAAI,MAAM,CAAC,KAAK,IAAI,KAAK,CAAC,gBAAgB,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC;gBAClD,4CAA4C;gBAC5C,KAAK,CAAC,KAAK,IAAI,EAAE;oBACf,IAAI,CAAC;wBACH,MAAM,KAAK,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;wBAC3D,IAAI,CAAC,kBAAkB,CAAC,KAAK,CAAC,EAAE,CAAC;4BAC/B,KAAK,CAAC,QAAQ,EAAE,CAAC,GAAG,EAAE,EAAE,GAAG,KAAK,EAAE,MAAM,EAAE,QAAiB,EAAE,CAAC,CAAC;wBACjE,CAAC;oBACH,CAAC;oBAAC,MAAM,CAAC;wBACP,iCAAiC;oBACnC,CAAC;gBACH,CAAC,CAAC,EAAE,CAAC;YACP,CAAC;YACD,OAAO,EAAE,GAAG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,4EAA4E;IAE5E,IAAI,WAAW,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;QACrC,MAAM,WAAW,GAAG,SAAS,CAAiB,GAAG,CAAC,CAAC;QACnD,IAAI,WAAW,EAAE,CAAC;YAChB,OAAO,EAAE,GAAG,WAAW,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;QAC9C,CAAC;IACH,CAAC;IAED,4EAA4E;IAE5E,IAAI,gBAAgB,GAClB,qBAAqB,IAAI,UAAU,IAAI,gBAAgB,CAAC;IAE1D,MAAM,cAAc,GAA2B;QAC7C,SAAS;QACT,MAAM;QACN,SAAS;QACT,UAAU;QACV,kBAAkB;QAClB,OAAO;QACP,OAAO;QACP,OAAO;QACP,YAAY;QACZ,gBAAgB;KACjB,CAAC;IAEF,4EAA4E;IAE5E,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,MAAM,qBAAqB,GAAG,IAAI,eAAe,EAAE,CAAC;QAEpD,MAAM,aAAa,GAAG,UAAU,CAC9B,GAAG,EAAE,CACH,WAAW,CACT,GAAG,EACH,SAAS,EACT,SAAS,EACT,OAAO,EACP,qBAAqB,CAAC,MAAM,CAC7B,EACH,CAAC,CACF,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE;YAChB,IAAI,kBAAkB,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC/B,MAAM,IAAI,YAAY,CACpB,kDAAkD,CACnD,CAAC;YACJ,CAAC;YACD,OAAO,MAAM,CAAC;QAChB,CAAC,CAAC,CAAC;QAEH,8CAA8C;QAC9C,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,EAAE,EAAE,IAAI,KAAK,CAAC;QAC9C,MAAM,oBAAoB,GAAG,OAAO;YAClC,CAAC,CAAC,CAAC,KAAK,CAAC,gBAAgB,EAAE,EAAE,IAAI,aAAa,CAAC;YAC/C,CAAC,CAAC,aAAa,CAAC;QAElB,IAAI,SAAoD,CAAC;QACzD,MAAM,eAAe,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC;YACzC,aAAa;iBACV,IAAI,CACH,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,gBAAyB,EAAE,MAAM,EAAE,CAAC,CAC1D;iBACA,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,cAAuB,EAAE,KAAK,EAAE,CAAC,CAAC;YAC/D,IAAI,OAAO,CAA2B,CAAC,OAAO,EAAE,EAAE;gBAChD,SAAS,GAAG,UAAU,CACpB,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,EACvC,IAAI,CAAC,GAAG,CAAC,oBAAoB,EAAE,CAAC,CAAC,CAClC,CAAC;YACJ,CAAC,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,SAAS;YAAE,YAAY,CAAC,SAAS,CAAC,CAAC;QAEvC,IAAI,eAAe,CAAC,IAAI,KAAK,gBAAgB,EAAE,CAAC;YAC9C,MAAM,cAAc,GAAmB;gBACrC,GAAG,eAAe,CAAC,MAAM;gBACzB,MAAM,EAAE,QAAQ;aACjB,CAAC;YACF,IAAI,WAAW,EAAE,CAAC;gBAChB,KAAK,CAAC,QAAQ,EAAE,CAAC,GAAG,EAAE,cAAc,CAAC,IAAI,aAAa,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;YAC9E,CAAC;YACD,YAAY,CAAC,QAAQ,CAAC,CAAC;YACvB,OAAO,cAAc,CAAC;QACxB,CAAC;QAED,IAAI,eAAe,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;YAC5C,IAAI,CAAC,yBAAyB,CAAC,eAAe,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtD,MAAM,eAAe,CAAC,KAAK,CAAC;YAC9B,CAAC;YACD,gBAAgB,GAAG,IAAI,CAAC;QAC1B,CAAC;aAAM,CAAC;YACN,kEAAkE;YAClE,IAAI,OAAO,EAAE,CAAC;gBACZ,yDAAyD;gBACzD,MAAM,sBAAsB,GAAG,IAAI,eAAe,EAAE,CAAC;gBACrD,IAAI,WAAoB,CAAC;gBACzB,IAAI,YAAqB,CAAC;gBAE1B,MAAM,eAAe,GAAG,aAAa;qBAClC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,QAAiB,EAAE,MAAM,EAAE,CAAC,CAAC;qBACzD,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;oBACf,WAAW,GAAG,KAAK,CAAC;oBACpB,MAAM,KAAK,CAAC;gBACd,CAAC,CAAC,CAAC;gBAEL,MAAM,gBAAgB,GAAG,wBAAwB,CAAC,GAAG,EAAE;oBACrD,GAAG,cAAc;oBACjB,MAAM,EAAE,sBAAsB,CAAC,MAAM;iBACtC,CAAC;qBACC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,SAAkB,EAAE,MAAM,EAAE,CAAC,CAAC;qBAC1D,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;oBACf,YAAY,GAAG,KAAK,CAAC;oBACrB,MAAM,KAAK,CAAC;gBACd,CAAC,CAAC,CAAC;gBAEL,IAAI,CAAC;oBACH,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;wBAC/B,eAAe;wBACf,gBAAgB;qBACjB,CAAC,CAAC;oBAEH,IAAI,MAAM,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;wBAC/B,sBAAsB,CAAC,KAAK,EAAE,CAAC;wBAC/B,MAAM,cAAc,GAAmB;4BACrC,GAAG,MAAM,CAAC,MAAM;4BAChB,MAAM,EAAE,QAAQ;yBACjB,CAAC;wBACF,IAAI,WAAW,EAAE,CAAC;4BAChB,KAAK,CAAC,QAAQ,EAAE,CAAC,GAAG,EAAE,cAAc,CAAC,IAAI,aAAa,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;wBAC9E,CAAC;wBACD,YAAY,CAAC,QAAQ,CAAC,CAAC;wBACvB,OAAO,cAAc,CAAC;oBACxB,CAAC;oBAED,qBAAqB,CAAC,KAAK,EAAE,CAAC;oBAC9B,IAAI,WAAW,EAAE,CAAC;wBAChB,KAAK,CAAC,QAAQ,EAAE,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,CAAC,IAAI,aAAa,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;oBAC5E,CAAC;oBACD,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;oBACnC,OAAO,MAAM,CAAC,MAAM,CAAC;gBACvB,CAAC;gBAAC,MAAM,CAAC;oBACP,IACE,WAAW;wBACX,CAAC,yBAAyB,CAAC,WAAW,CAAC;wBACvC,CAAC,YAAY,CAAC,WAAW,CAAC,EAC1B,CAAC;wBACD,MAAM,WAAW,CAAC;oBACpB,CAAC;oBACD,IAAI,YAAY;wBAAE,MAAM,YAAY,CAAC;oBACrC,IAAI,WAAW;wBAAE,MAAM,WAAW,CAAC;oBACnC,MAAM,IAAI,KAAK,CACb,+CAA+C,CAChD,CAAC;gBACJ,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,qDAAqD;gBACrD,MAAM,YAAY,GAAG,MAAM,aAAa;qBACrC,IAAI,CACH,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,gBAAyB,EAAE,MAAM,EAAE,CAAC,CAC1D;qBACA,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,cAAuB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC;gBAEhE,IAAI,YAAY,CAAC,IAAI,KAAK,gBAAgB,EAAE,CAAC;oBAC3C,MAAM,cAAc,GAAmB;wBACrC,GAAG,YAAY,CAAC,MAAM;wBACtB,MAAM,EAAE,QAAQ;qBACjB,CAAC;oBACF,IAAI,WAAW,EAAE,CAAC;wBAChB,KAAK,CAAC,QAAQ,EAAE,CAAC,GAAG,EAAE,cAAc,CAAC,IAAI,aAAa,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;oBAC9E,CAAC;oBACD,YAAY,CAAC,QAAQ,CAAC,CAAC;oBACvB,OAAO,cAAc,CAAC;gBACxB,CAAC;gBAED,IAAI,CAAC,yBAAyB,CAAC,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;oBACnD,MAAM,YAAY,CAAC,KAAK,CAAC;gBAC3B,CAAC;gBACD,gBAAgB,GAAG,IAAI,CAAC;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAED,4EAA4E;IAE5E,MAAM,aAAa,GAAG,MAAM,wBAAwB,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC;IAC1E,IAAI,WAAW,EAAE,CAAC;QAChB,KAAK,CAAC,QAAQ,EAAE,CAAC,GAAG,EAAE,aAAa,CAAC,IAAI,aAAa,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;IAC5E,CAAC;IACD,YAAY,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;IACnC,OAAO,aAAa,CAAC;AACvB,CAAC;AAED,8EAA8E;AAE9E;;GAEG;AACH,OAAO,EAAE,kBAAkB,IAAI,gBAAgB,EAAE,MAAM,qBAAqB,CAAC"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Strategy hooks — plugin interface for premium server-side optimizations.
|
|
3
|
+
*
|
|
4
|
+
* The base `smartFetch()` in strategies.ts provides solid simple→browser→stealth
|
|
5
|
+
* escalation. Hooks allow the server (or any host) to layer on caching, domain
|
|
6
|
+
* intelligence, and parallel-race strategies *without* shipping that logic in
|
|
7
|
+
* the npm package.
|
|
8
|
+
*
|
|
9
|
+
* Register hooks once at startup via `registerStrategyHooks()`.
|
|
10
|
+
* All hook methods are optional — unset hooks are simply skipped.
|
|
11
|
+
*/
|
|
12
|
+
import type { FetchResult } from './fetcher.js';
|
|
13
|
+
export interface StrategyResult extends FetchResult {
|
|
14
|
+
method: 'simple' | 'browser' | 'stealth' | 'cached';
|
|
15
|
+
}
|
|
16
|
+
export interface DomainRecommendation {
|
|
17
|
+
mode: 'browser' | 'stealth';
|
|
18
|
+
}
|
|
19
|
+
export interface CacheCheckResult {
|
|
20
|
+
/** Cached response to serve immediately. */
|
|
21
|
+
value: StrategyResult;
|
|
22
|
+
/** When true the caller should trigger a background revalidation. */
|
|
23
|
+
stale: boolean;
|
|
24
|
+
}
|
|
25
|
+
export interface StrategyHooks {
|
|
26
|
+
/**
|
|
27
|
+
* Look up `url` in the cache.
|
|
28
|
+
* Return `null` for a cache miss.
|
|
29
|
+
* When `stale` is true the result can be served but should be refreshed.
|
|
30
|
+
*/
|
|
31
|
+
checkCache?(url: string): CacheCheckResult | null;
|
|
32
|
+
/**
|
|
33
|
+
* Attempt to mark `url` as "currently revalidating" so that only one
|
|
34
|
+
* background refresh runs at a time. Return `true` if this call won
|
|
35
|
+
* the race (caller should revalidate), `false` otherwise.
|
|
36
|
+
*/
|
|
37
|
+
markRevalidating?(url: string): boolean;
|
|
38
|
+
/**
|
|
39
|
+
* Store a fresh result in the cache.
|
|
40
|
+
*/
|
|
41
|
+
setCache?(url: string, result: StrategyResult): void;
|
|
42
|
+
/**
|
|
43
|
+
* Return a recommendation for how to fetch `url` based on historical
|
|
44
|
+
* success/failure data for the domain. Return `null` to let the default
|
|
45
|
+
* escalation logic decide.
|
|
46
|
+
*/
|
|
47
|
+
getDomainRecommendation?(url: string): DomainRecommendation | null;
|
|
48
|
+
/**
|
|
49
|
+
* Record the outcome of a fetch so the intelligence layer can learn.
|
|
50
|
+
*/
|
|
51
|
+
recordDomainResult?(url: string, method: 'simple' | 'browser' | 'stealth', latencyMs: number): void;
|
|
52
|
+
/**
|
|
53
|
+
* Whether to use the parallel race strategy (fire browser after a short
|
|
54
|
+
* timeout if simple fetch hasn't resolved). Default: false (no race).
|
|
55
|
+
*/
|
|
56
|
+
shouldRace?(): boolean;
|
|
57
|
+
/**
|
|
58
|
+
* Timeout (ms) before the race starts a parallel browser fetch.
|
|
59
|
+
* Only called when `shouldRace()` returns true. Default: 2000.
|
|
60
|
+
*/
|
|
61
|
+
getRaceTimeoutMs?(): number;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Register premium strategy hooks. Should be called once at server startup.
|
|
65
|
+
* Calling again replaces the previous hooks entirely.
|
|
66
|
+
*/
|
|
67
|
+
export declare function registerStrategyHooks(hooks: StrategyHooks): void;
|
|
68
|
+
/**
|
|
69
|
+
* Clear all registered hooks (useful in tests).
|
|
70
|
+
*/
|
|
71
|
+
export declare function clearStrategyHooks(): void;
|
|
72
|
+
/**
|
|
73
|
+
* Retrieve the current hooks (internal — used by strategies.ts).
|
|
74
|
+
*/
|
|
75
|
+
export declare function getStrategyHooks(): Readonly<StrategyHooks>;
|
|
76
|
+
//# sourceMappingURL=strategy-hooks.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strategy-hooks.d.ts","sourceRoot":"","sources":["../../src/core/strategy-hooks.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAIhD,MAAM,WAAW,cAAe,SAAQ,WAAW;IACjD,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,GAAG,QAAQ,CAAC;CACrD;AAED,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,SAAS,GAAG,SAAS,CAAC;CAC7B;AAED,MAAM,WAAW,gBAAgB;IAC/B,4CAA4C;IAC5C,KAAK,EAAE,cAAc,CAAC;IACtB,qEAAqE;IACrE,KAAK,EAAE,OAAO,CAAC;CAChB;AAED,MAAM,WAAW,aAAa;IAG5B;;;;OAIG;IACH,UAAU,CAAC,CAAC,GAAG,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI,CAAC;IAElD;;;;OAIG;IACH,gBAAgB,CAAC,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;IAExC;;OAEG;IACH,QAAQ,CAAC,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,GAAG,IAAI,CAAC;IAIrD;;;;OAIG;IACH,uBAAuB,CAAC,CAAC,GAAG,EAAE,MAAM,GAAG,oBAAoB,GAAG,IAAI,CAAC;IAEnE;;OAEG;IACH,kBAAkB,CAAC,CACjB,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,EACxC,SAAS,EAAE,MAAM,GAChB,IAAI,CAAC;IAIR;;;OAGG;IACH,UAAU,CAAC,IAAI,OAAO,CAAC;IAEvB;;;OAGG;IACH,gBAAgB,CAAC,IAAI,MAAM,CAAC;CAC7B;AAMD;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,aAAa,GAAG,IAAI,CAEhE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,IAAI,IAAI,CAEzC;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,QAAQ,CAAC,aAAa,CAAC,CAE1D"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Strategy hooks — plugin interface for premium server-side optimizations.
|
|
3
|
+
*
|
|
4
|
+
* The base `smartFetch()` in strategies.ts provides solid simple→browser→stealth
|
|
5
|
+
* escalation. Hooks allow the server (or any host) to layer on caching, domain
|
|
6
|
+
* intelligence, and parallel-race strategies *without* shipping that logic in
|
|
7
|
+
* the npm package.
|
|
8
|
+
*
|
|
9
|
+
* Register hooks once at startup via `registerStrategyHooks()`.
|
|
10
|
+
* All hook methods are optional — unset hooks are simply skipped.
|
|
11
|
+
*/
|
|
12
|
+
/* ---------- singleton registry ------------------------------------------- */
|
|
13
|
+
let registeredHooks = {};
|
|
14
|
+
/**
|
|
15
|
+
* Register premium strategy hooks. Should be called once at server startup.
|
|
16
|
+
* Calling again replaces the previous hooks entirely.
|
|
17
|
+
*/
|
|
18
|
+
export function registerStrategyHooks(hooks) {
|
|
19
|
+
registeredHooks = { ...hooks };
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Clear all registered hooks (useful in tests).
|
|
23
|
+
*/
|
|
24
|
+
export function clearStrategyHooks() {
|
|
25
|
+
registeredHooks = {};
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Retrieve the current hooks (internal — used by strategies.ts).
|
|
29
|
+
*/
|
|
30
|
+
export function getStrategyHooks() {
|
|
31
|
+
return registeredHooks;
|
|
32
|
+
}
|
|
33
|
+
//# sourceMappingURL=strategy-hooks.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"strategy-hooks.js","sourceRoot":"","sources":["../../src/core/strategy-hooks.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AA4EH,+EAA+E;AAE/E,IAAI,eAAe,GAAkB,EAAE,CAAC;AAExC;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CAAC,KAAoB;IACxD,eAAe,GAAG,EAAE,GAAG,KAAK,EAAE,CAAC;AACjC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB;IAChC,eAAe,GAAG,EAAE,CAAC;AACvB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB;IAC9B,OAAO,eAAe,CAAC;AACzB,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Main library export
|
|
5
5
|
*/
|
|
6
|
-
import { cleanup } from './core/fetcher.js';
|
|
6
|
+
import { cleanup, warmup, closePool } from './core/fetcher.js';
|
|
7
7
|
import type { PeelOptions, PeelResult } from './types.js';
|
|
8
8
|
export * from './types.js';
|
|
9
9
|
export { crawl, type CrawlOptions, type CrawlResult, type CrawlProgress } from './core/crawler.js';
|
|
@@ -12,8 +12,13 @@ export { mapDomain, type MapOptions, type MapResult } from './core/map.js';
|
|
|
12
12
|
export { extractBranding, type BrandingProfile } from './core/branding.js';
|
|
13
13
|
export { trackChange, getSnapshot, clearSnapshots, type ChangeResult, type Snapshot } from './core/change-tracking.js';
|
|
14
14
|
export { extractWithLLM } from './core/extract.js';
|
|
15
|
-
export {
|
|
15
|
+
export { extractDocumentToFormat, isPdfContentType, isDocxContentType, type DocumentExtractionResult } from './core/documents.js';
|
|
16
|
+
export { extractInlineJson, type InlineExtractOptions, type InlineExtractResult } from './core/extract-inline.js';
|
|
17
|
+
export { runAgent, type AgentOptions, type AgentResult, type AgentProgress, type AgentStreamEvent, type AgentDepth, type AgentTopic } from './core/agent.js';
|
|
16
18
|
export { summarizeContent, type SummarizeOptions } from './core/summarize.js';
|
|
19
|
+
export { getSearchProvider, DuckDuckGoProvider, BraveSearchProvider, type SearchProvider, type SearchProviderId, type WebSearchResult, type WebSearchOptions, } from './core/search-provider.js';
|
|
20
|
+
export { answerQuestion, type AnswerRequest, type AnswerResponse, type AnswerCitation, type LLMProviderId, type TokensUsed, } from './core/answer.js';
|
|
21
|
+
export { searchJobs, type JobCard, type JobDetail, type JobSearchOptions, type JobSearchResult } from './core/jobs.js';
|
|
17
22
|
/**
|
|
18
23
|
* Fetch and extract content from a URL
|
|
19
24
|
*
|
|
@@ -56,5 +61,6 @@ export declare function peelBatch(urls: string[], options?: PeelOptions & {
|
|
|
56
61
|
* Clean up any browser resources
|
|
57
62
|
* Call this when you're done using WebPeel
|
|
58
63
|
*/
|
|
59
|
-
export { cleanup };
|
|
64
|
+
export { cleanup, warmup, closePool };
|
|
65
|
+
export { getCached, setCached, clearCache, setCacheTTL } from './core/cache.js';
|
|
60
66
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAMH,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAMH,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAG/D,OAAO,KAAK,EAAE,WAAW,EAAE,UAAU,EAAa,MAAM,YAAY,CAAC;AAErE,cAAc,YAAY,CAAC;AAC3B,OAAO,EAAE,KAAK,EAAE,KAAK,YAAY,EAAE,KAAK,WAAW,EAAE,KAAK,aAAa,EAAE,MAAM,mBAAmB,CAAC;AACnG,OAAO,EAAE,eAAe,EAAE,KAAK,UAAU,EAAE,KAAK,aAAa,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,SAAS,EAAE,KAAK,UAAU,EAAE,KAAK,SAAS,EAAE,MAAM,eAAe,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,KAAK,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAC3E,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,cAAc,EAAE,KAAK,YAAY,EAAE,KAAK,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACvH,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,KAAK,wBAAwB,EAAE,MAAM,qBAAqB,CAAC;AAClI,OAAO,EAAE,iBAAiB,EAAE,KAAK,oBAAoB,EAAE,KAAK,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AAClH,OAAO,EAAE,QAAQ,EAAE,KAAK,YAAY,EAAE,KAAK,WAAW,EAAE,KAAK,aAAa,EAAE,KAAK,gBAAgB,EAAE,KAAK,UAAU,EAAE,KAAK,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7J,OAAO,EAAE,gBAAgB,EAAE,KAAK,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAC9E,OAAO,EACL,iBAAiB,EACjB,kBAAkB,EAClB,mBAAmB,EACnB,KAAK,cAAc,EACnB,KAAK,gBAAgB,EACrB,KAAK,eAAe,EACpB,KAAK,gBAAgB,GACtB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EACL,cAAc,EACd,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,aAAa,EAClB,KAAK,UAAU,GAChB,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EAAE,UAAU,EAAE,KAAK,OAAO,EAAE,KAAK,SAAS,EAAE,KAAK,gBAAgB,EAAE,KAAK,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAEvH;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,WAAgB,GAAG,OAAO,CAAC,UAAU,CAAC,CAyTtF;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EAAE,EACd,OAAO,GAAE,WAAW,GAAG;IAAE,WAAW,CAAC,EAAE,MAAM,CAAA;CAAO,GACnD,OAAO,CAAC,CAAC,UAAU,GAAG;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,EAAE,CAAC,CAwB1D;AAED;;;GAGG;AACH,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC"}
|