webpeel 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +140 -500
- package/dist/cli-auth.d.ts +2 -0
- package/dist/cli-auth.d.ts.map +1 -1
- package/dist/cli-auth.js +16 -3
- package/dist/cli-auth.js.map +1 -1
- package/dist/cli.js +475 -77
- package/dist/cli.js.map +1 -1
- package/dist/core/actions.d.ts +19 -10
- package/dist/core/actions.d.ts.map +1 -1
- package/dist/core/actions.js +214 -43
- package/dist/core/actions.js.map +1 -1
- package/dist/core/agent.d.ts +60 -3
- package/dist/core/agent.d.ts.map +1 -1
- package/dist/core/agent.js +375 -86
- package/dist/core/agent.js.map +1 -1
- package/dist/core/answer.d.ts +43 -0
- package/dist/core/answer.d.ts.map +1 -0
- package/dist/core/answer.js +378 -0
- package/dist/core/answer.js.map +1 -0
- package/dist/core/cache.d.ts +14 -0
- package/dist/core/cache.d.ts.map +1 -0
- package/dist/core/cache.js +122 -0
- package/dist/core/cache.js.map +1 -0
- package/dist/core/dns-cache.d.ts +21 -0
- package/dist/core/dns-cache.d.ts.map +1 -0
- package/dist/core/dns-cache.js +184 -0
- package/dist/core/dns-cache.js.map +1 -0
- package/dist/core/documents.d.ts +24 -0
- package/dist/core/documents.d.ts.map +1 -0
- package/dist/core/documents.js +124 -0
- package/dist/core/documents.js.map +1 -0
- package/dist/core/extract-inline.d.ts +39 -0
- package/dist/core/extract-inline.d.ts.map +1 -0
- package/dist/core/extract-inline.js +214 -0
- package/dist/core/extract-inline.js.map +1 -0
- package/dist/core/fetcher.d.ts +33 -7
- package/dist/core/fetcher.d.ts.map +1 -1
- package/dist/core/fetcher.js +608 -41
- package/dist/core/fetcher.js.map +1 -1
- package/dist/core/jobs.d.ts +66 -0
- package/dist/core/jobs.d.ts.map +1 -0
- package/dist/core/jobs.js +513 -0
- package/dist/core/jobs.js.map +1 -0
- package/dist/core/markdown.d.ts.map +1 -1
- package/dist/core/markdown.js +141 -31
- package/dist/core/markdown.js.map +1 -1
- package/dist/core/pdf.d.ts.map +1 -1
- package/dist/core/pdf.js +3 -1
- package/dist/core/pdf.js.map +1 -1
- package/dist/core/screenshot.d.ts +33 -0
- package/dist/core/screenshot.d.ts.map +1 -0
- package/dist/core/screenshot.js +30 -0
- package/dist/core/screenshot.js.map +1 -0
- package/dist/core/search-provider.d.ts +46 -0
- package/dist/core/search-provider.d.ts.map +1 -0
- package/dist/core/search-provider.js +281 -0
- package/dist/core/search-provider.js.map +1 -0
- package/dist/core/strategies.d.ts +7 -10
- package/dist/core/strategies.d.ts.map +1 -1
- package/dist/core/strategies.js +370 -63
- package/dist/core/strategies.js.map +1 -1
- package/dist/index.d.ts +9 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +61 -32
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +335 -70
- package/dist/mcp/server.js.map +1 -1
- package/dist/types.d.ts +43 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/llms.txt +85 -47
- package/package.json +11 -5
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Search provider abstraction
|
|
3
|
+
*
|
|
4
|
+
* WebPeel supports multiple web search backends. DuckDuckGo is the default
|
|
5
|
+
* (no API key required). Brave Search is supported via BYOK.
|
|
6
|
+
*/
|
|
7
|
+
import { fetch as undiciFetch } from 'undici';
|
|
8
|
+
import { load } from 'cheerio';
|
|
9
|
+
function decodeHtmlEntities(input) {
|
|
10
|
+
// Cheerio usually decodes entities when using `.text()`, but keep this as a
|
|
11
|
+
// safety net since DuckDuckGo snippets sometimes leak encoded entities.
|
|
12
|
+
return input
|
|
13
|
+
.replace(/ /gi, ' ')
|
|
14
|
+
.replace(/&/gi, '&')
|
|
15
|
+
.replace(/</gi, '<')
|
|
16
|
+
.replace(/>/gi, '>')
|
|
17
|
+
.replace(/"/gi, '"')
|
|
18
|
+
.replace(/'/g, "'")
|
|
19
|
+
.replace(/&#x([0-9a-f]+);/gi, (_m, hex) => {
|
|
20
|
+
const cp = Number.parseInt(String(hex), 16);
|
|
21
|
+
if (!Number.isFinite(cp) || cp < 0 || cp > 0x10ffff)
|
|
22
|
+
return _m;
|
|
23
|
+
try {
|
|
24
|
+
return String.fromCodePoint(cp);
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
return _m;
|
|
28
|
+
}
|
|
29
|
+
})
|
|
30
|
+
.replace(/&#(\d+);/g, (_m, num) => {
|
|
31
|
+
const cp = Number.parseInt(String(num), 10);
|
|
32
|
+
if (!Number.isFinite(cp) || cp < 0 || cp > 0x10ffff)
|
|
33
|
+
return _m;
|
|
34
|
+
try {
|
|
35
|
+
return String.fromCodePoint(cp);
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
return _m;
|
|
39
|
+
}
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
function cleanText(input, opts) {
|
|
43
|
+
let s = decodeHtmlEntities(input);
|
|
44
|
+
s = s.replace(/\s+/g, ' ').trim();
|
|
45
|
+
if (opts.stripEllipsisPadding) {
|
|
46
|
+
// Remove leading/trailing "..." or Unicode ellipsis padding.
|
|
47
|
+
s = s
|
|
48
|
+
.replace(/^(?:\.{3,}|…)+\s*/g, '')
|
|
49
|
+
.replace(/\s*(?:\.{3,}|…)+$/g, '')
|
|
50
|
+
.trim();
|
|
51
|
+
}
|
|
52
|
+
if (s.length > opts.maxLen)
|
|
53
|
+
s = s.slice(0, opts.maxLen);
|
|
54
|
+
return s;
|
|
55
|
+
}
|
|
56
|
+
function normalizeUrlForDedupe(rawUrl) {
|
|
57
|
+
try {
|
|
58
|
+
const u = new URL(rawUrl);
|
|
59
|
+
const host = u.hostname.toLowerCase().replace(/^www\./, '');
|
|
60
|
+
let path = u.pathname || '/';
|
|
61
|
+
path = path.replace(/\/+$/g, '');
|
|
62
|
+
return `${host}${path}`;
|
|
63
|
+
}
|
|
64
|
+
catch {
|
|
65
|
+
return rawUrl
|
|
66
|
+
.trim()
|
|
67
|
+
.toLowerCase()
|
|
68
|
+
.replace(/^https?:\/\//, '')
|
|
69
|
+
.replace(/^www\./, '')
|
|
70
|
+
.replace(/[?#].*$/, '')
|
|
71
|
+
.replace(/\/+$/g, '');
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
export class DuckDuckGoProvider {
|
|
75
|
+
id = 'duckduckgo';
|
|
76
|
+
requiresApiKey = false;
|
|
77
|
+
buildQueryAttempts(originalQuery) {
|
|
78
|
+
const q = originalQuery.trim();
|
|
79
|
+
if (!q)
|
|
80
|
+
return [];
|
|
81
|
+
const attempts = [];
|
|
82
|
+
// Required retry strategy order:
|
|
83
|
+
// 1) original query
|
|
84
|
+
// 2) quoted query
|
|
85
|
+
// 3) query site:*
|
|
86
|
+
attempts.push(q);
|
|
87
|
+
if (!/^".*"$/.test(q))
|
|
88
|
+
attempts.push(`"${q}"`);
|
|
89
|
+
attempts.push(`${q} site:*`);
|
|
90
|
+
// Single-word queries are disproportionately likely to return 0 results on
|
|
91
|
+
// the DDG HTML endpoint (e.g. "openai" vs "open ai"). When the first three
|
|
92
|
+
// attempts fail, try a few light-touch strategies that tend to coax the
|
|
93
|
+
// parser into returning web results.
|
|
94
|
+
const isSingleWord = !/\s/.test(q);
|
|
95
|
+
const looksLikeUrlOrDomain = /[./]/.test(q) || /^https?:/i.test(q);
|
|
96
|
+
if (isSingleWord && !looksLikeUrlOrDomain) {
|
|
97
|
+
// Try splitting a common suffix (e.g. openai -> open ai)
|
|
98
|
+
if (/^[a-z]{5,}ai$/i.test(q)) {
|
|
99
|
+
attempts.push(`${q.slice(0, -2)} ai`);
|
|
100
|
+
}
|
|
101
|
+
// Common suffixes that often return at least the official domain
|
|
102
|
+
attempts.push(`${q}.com`);
|
|
103
|
+
attempts.push(`site:${q}.com`);
|
|
104
|
+
attempts.push(`${q} website`);
|
|
105
|
+
}
|
|
106
|
+
// De-dupe attempts (case-insensitive)
|
|
107
|
+
const seen = new Set();
|
|
108
|
+
return attempts
|
|
109
|
+
.map((s) => s.trim())
|
|
110
|
+
.filter((s) => s.length > 0)
|
|
111
|
+
.filter((s) => {
|
|
112
|
+
const key = s.toLowerCase();
|
|
113
|
+
if (seen.has(key))
|
|
114
|
+
return false;
|
|
115
|
+
seen.add(key);
|
|
116
|
+
return true;
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
buildSearchUrl(query, options) {
|
|
120
|
+
const { tbs, country, location } = options;
|
|
121
|
+
const params = new URLSearchParams();
|
|
122
|
+
params.set('q', query);
|
|
123
|
+
// DuckDuckGo HTML endpoint supports some filtering
|
|
124
|
+
if (tbs) {
|
|
125
|
+
// DDG uses `df` for time filtering on html endpoint
|
|
126
|
+
params.set('df', tbs);
|
|
127
|
+
}
|
|
128
|
+
if (country || location) {
|
|
129
|
+
const region = (country || location || '').toLowerCase();
|
|
130
|
+
if (region)
|
|
131
|
+
params.set('kl', region);
|
|
132
|
+
}
|
|
133
|
+
return `https://html.duckduckgo.com/html/?${params.toString()}`;
|
|
134
|
+
}
|
|
135
|
+
async searchOnce(query, options) {
|
|
136
|
+
const { count, signal } = options;
|
|
137
|
+
const searchUrl = this.buildSearchUrl(query, options);
|
|
138
|
+
const response = await undiciFetch(searchUrl, {
|
|
139
|
+
headers: {
|
|
140
|
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
|
|
141
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
142
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
143
|
+
},
|
|
144
|
+
signal,
|
|
145
|
+
});
|
|
146
|
+
if (!response.ok) {
|
|
147
|
+
throw new Error(`Search failed: HTTP ${response.status}`);
|
|
148
|
+
}
|
|
149
|
+
const html = await response.text();
|
|
150
|
+
const $ = load(html);
|
|
151
|
+
const results = [];
|
|
152
|
+
const seen = new Set();
|
|
153
|
+
$('.result').each((_i, elem) => {
|
|
154
|
+
if (results.length >= count)
|
|
155
|
+
return;
|
|
156
|
+
const $result = $(elem);
|
|
157
|
+
// Be resilient to markup variations: title can be in .result__title or
|
|
158
|
+
// directly on the anchor.
|
|
159
|
+
const titleRaw = $result.find('.result__title').text() || $result.find('.result__a').text();
|
|
160
|
+
const rawUrl = $result.find('.result__a').attr('href') || '';
|
|
161
|
+
const snippetRaw = $result.find('.result__snippet').text();
|
|
162
|
+
let title = cleanText(titleRaw, { maxLen: 200 });
|
|
163
|
+
let snippet = cleanText(snippetRaw, { maxLen: 500, stripEllipsisPadding: true });
|
|
164
|
+
if (!title || !rawUrl)
|
|
165
|
+
return;
|
|
166
|
+
// Extract actual URL from DuckDuckGo redirect
|
|
167
|
+
let url = rawUrl;
|
|
168
|
+
try {
|
|
169
|
+
const ddgUrl = new URL(rawUrl, 'https://duckduckgo.com');
|
|
170
|
+
const uddg = ddgUrl.searchParams.get('uddg');
|
|
171
|
+
if (uddg)
|
|
172
|
+
url = decodeURIComponent(uddg);
|
|
173
|
+
}
|
|
174
|
+
catch {
|
|
175
|
+
// Use raw URL if parsing fails
|
|
176
|
+
}
|
|
177
|
+
// SECURITY: Validate and sanitize results — only allow HTTP/HTTPS URLs
|
|
178
|
+
try {
|
|
179
|
+
let parsed;
|
|
180
|
+
try {
|
|
181
|
+
parsed = new URL(url);
|
|
182
|
+
}
|
|
183
|
+
catch {
|
|
184
|
+
// Handle protocol-relative or relative URLs (rare but possible)
|
|
185
|
+
parsed = new URL(url, 'https://duckduckgo.com');
|
|
186
|
+
}
|
|
187
|
+
if (!['http:', 'https:'].includes(parsed.protocol)) {
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
url = parsed.href;
|
|
191
|
+
}
|
|
192
|
+
catch {
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
// Deduplicate by normalized URL (strip query params, www, trailing slash)
|
|
196
|
+
const dedupeKey = normalizeUrlForDedupe(url);
|
|
197
|
+
if (seen.has(dedupeKey))
|
|
198
|
+
return;
|
|
199
|
+
seen.add(dedupeKey);
|
|
200
|
+
results.push({ title, url, snippet });
|
|
201
|
+
});
|
|
202
|
+
return results;
|
|
203
|
+
}
|
|
204
|
+
async searchWeb(query, options) {
|
|
205
|
+
const attempts = this.buildQueryAttempts(query);
|
|
206
|
+
// Retry only when DDG returns 0 results.
|
|
207
|
+
for (const q of attempts) {
|
|
208
|
+
const results = await this.searchOnce(q, options);
|
|
209
|
+
if (results.length > 0)
|
|
210
|
+
return results;
|
|
211
|
+
}
|
|
212
|
+
return [];
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
export class BraveSearchProvider {
|
|
216
|
+
id = 'brave';
|
|
217
|
+
requiresApiKey = true;
|
|
218
|
+
async searchWeb(query, options) {
|
|
219
|
+
const { count, apiKey, signal } = options;
|
|
220
|
+
if (!apiKey || apiKey.trim().length === 0) {
|
|
221
|
+
throw new Error('Brave Search requires an API key');
|
|
222
|
+
}
|
|
223
|
+
const url = new URL('https://api.search.brave.com/res/v1/web/search');
|
|
224
|
+
url.searchParams.set('q', query);
|
|
225
|
+
url.searchParams.set('count', String(Math.min(Math.max(count, 1), 10)));
|
|
226
|
+
const response = await undiciFetch(url.toString(), {
|
|
227
|
+
headers: {
|
|
228
|
+
'Accept': 'application/json',
|
|
229
|
+
'X-Subscription-Token': apiKey,
|
|
230
|
+
},
|
|
231
|
+
signal,
|
|
232
|
+
});
|
|
233
|
+
if (!response.ok) {
|
|
234
|
+
const text = await response.text().catch(() => '');
|
|
235
|
+
throw new Error(`Brave Search failed: HTTP ${response.status}${text ? ` - ${text}` : ''}`);
|
|
236
|
+
}
|
|
237
|
+
const data = await response.json();
|
|
238
|
+
const resultsArray = data?.web?.results;
|
|
239
|
+
if (!Array.isArray(resultsArray)) {
|
|
240
|
+
return [];
|
|
241
|
+
}
|
|
242
|
+
const results = [];
|
|
243
|
+
for (const r of resultsArray) {
|
|
244
|
+
if (results.length >= count)
|
|
245
|
+
break;
|
|
246
|
+
const title = typeof r?.title === 'string' ? r.title.trim() : '';
|
|
247
|
+
const rawUrl = typeof r?.url === 'string' ? r.url.trim() : '';
|
|
248
|
+
const snippet = typeof r?.description === 'string'
|
|
249
|
+
? r.description.trim()
|
|
250
|
+
: typeof r?.snippet === 'string'
|
|
251
|
+
? r.snippet.trim()
|
|
252
|
+
: '';
|
|
253
|
+
if (!title || !rawUrl)
|
|
254
|
+
continue;
|
|
255
|
+
// SECURITY: Validate URL protocol
|
|
256
|
+
try {
|
|
257
|
+
const parsed = new URL(rawUrl);
|
|
258
|
+
if (!['http:', 'https:'].includes(parsed.protocol))
|
|
259
|
+
continue;
|
|
260
|
+
}
|
|
261
|
+
catch {
|
|
262
|
+
continue;
|
|
263
|
+
}
|
|
264
|
+
results.push({
|
|
265
|
+
title: title.slice(0, 200),
|
|
266
|
+
url: rawUrl,
|
|
267
|
+
snippet: snippet.slice(0, 500),
|
|
268
|
+
});
|
|
269
|
+
}
|
|
270
|
+
return results;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
export function getSearchProvider(id) {
|
|
274
|
+
if (!id || id === 'duckduckgo')
|
|
275
|
+
return new DuckDuckGoProvider();
|
|
276
|
+
if (id === 'brave')
|
|
277
|
+
return new BraveSearchProvider();
|
|
278
|
+
// Exhaustive fallback (should be unreachable due to typing)
|
|
279
|
+
return new DuckDuckGoProvider();
|
|
280
|
+
}
|
|
281
|
+
//# sourceMappingURL=search-provider.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search-provider.js","sourceRoot":"","sources":["../../src/core/search-provider.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,KAAK,IAAI,WAAW,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAgC/B,SAAS,kBAAkB,CAAC,KAAa;IACvC,4EAA4E;IAC5E,wEAAwE;IACxE,OAAO,KAAK;SACT,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,mBAAmB,EAAE,CAAC,EAAE,EAAE,GAAG,EAAE,EAAE;QACxC,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,QAAQ;YAAE,OAAO,EAAE,CAAC;QAC/D,IAAI,CAAC;YACH,OAAO,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC;QAClC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC,CAAC;SACD,OAAO,CAAC,WAAW,EAAE,CAAC,EAAE,EAAE,GAAG,EAAE,EAAE;QAChC,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,QAAQ;YAAE,OAAO,EAAE,CAAC;QAC/D,IAAI,CAAC;YACH,OAAO,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC;QAClC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC,CAAC,CAAC;AACP,CAAC;AAED,SAAS,SAAS,CAChB,KAAa,EACb,IAGC;IAED,IAAI,CAAC,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC;IAClC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAElC,IAAI,IAAI,CAAC,oBAAoB,EAAE,CAAC;QAC9B,6DAA6D;QAC7D,CAAC,GAAG,CAAC;aACF,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC;aACjC,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC;aACjC,IAAI,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IACxD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,qBAAqB,CAAC,MAAc;IAC3C,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;QAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAC5D,IAAI,IAAI,GAAG,CAAC,CAAC,QAAQ,IAAI,GAAG,CAAC;QAC7B,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACjC,OAAO,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC;IAC1B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,MAAM;aACV,IAAI,EAAE;aACN,WAAW,EAAE;aACb,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC;aAC3B,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;aACrB,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;aACtB,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IAC1B,CAAC;AACH,CAAC;AAED,MAAM,OAAO,kBAAkB;IACpB,EAAE,GAAqB,YAAY,CAAC;IACpC,cAAc,GAAG,KAAK,CAAC;IAExB,kBAAkB,CAAC,aAAqB;QAC9C,MAAM,CAAC,GAAG,aAAa,CAAC,IAAI,EAAE,CAAC;QAC/B,IAAI,CAAC,CAAC;YAAE,OAAO,EAAE,CAAC;QAElB,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,iCAAiC;QACjC,oBAAoB;QACpB,kBAAkB;QAClB,kBAAkB;QAClB,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACjB,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;YAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC/C,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAE7B,2EAA2E;QAC3E,2EAA2E;QAC3E,wEAAwE;QACxE,qCAAqC;QACrC,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACnC,MAAM,oBAAoB,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAEnE,IAAI,YAAY,IAAI,CAAC,oBAAoB,EAAE,CAAC;YAC1C,yDAAyD;YACzD,IAAI,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC7B,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACxC,CAAC;YAED,iEAAiE;YACjE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAC1B,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC/B,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAChC,CAAC;QAED,sCAAsC;QACtC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAC/B,OAAO,QAAQ;aACZ,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;aAC3B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;YACZ,MAAM,GAAG,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;YAC5B,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,OAAO,KAAK,CAAC;YAChC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACP,CAAC;IAEO,cAAc,CAAC,KAAa,EAAE,OAAyB;QAC7D,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC;QAE3C,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAEvB,mDAAmD;QACnD,IAAI,GAAG,EAAE,CAAC;YACR,oDAAoD;YACpD,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACxB,CAAC;QAED,IAAI,OAAO,IAAI,QAAQ,EAAE,CAAC;YACxB,MAAM,MAAM,GAAG,CAAC,OAAO,IAAI,QAAQ,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;YACzD,IAAI,MAAM;gBAAE,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QACvC,CAAC;QAED,OAAO,qCAAqC,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC;IAClE,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,KAAa,EAAE,OAAyB;QAC/D,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;QAElC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAEtD,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE;YAC5C,OAAO,EAAE;gBACP,YAAY,EAAE,oEAAoE;gBAClF,QAAQ,EAAE,iEAAiE;gBAC3E,iBAAiB,EAAE,gBAAgB;aACpC;YACD,MAAM;SACP,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC5D,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;QAErB,MAAM,OAAO,GAAsB,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAE/B,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,EAAE;YAC7B,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK;gBAAE,OAAO;YAEpC,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;YAExB,uEAAuE;YACvE,0BAA0B;YAC1B,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,IAAI,EAAE,IAAI,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC;YAC5F,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;YAC7D,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,IAAI,EAAE,CAAC;YAE3D,IAAI,KAAK,GAAG,SAAS,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;YACjD,IAAI,OAAO,GAAG,SAAS,CAAC,UAAU,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,oBAAoB,EAAE,IAAI,EAAE,CAAC,CAAC;YAEjF,IAAI,CAAC,KAAK,IAAI,CAAC,MAAM;gBAAE,OAAO;YAE9B,8CAA8C;YAC9C,IAAI,GAAG,GAAG,MAAM,CAAC;YACjB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,EAAE,wBAAwB,CAAC,CAAC;gBACzD,MAAM,IAAI,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;gBAC7C,IAAI,IAAI;oBAAE,GAAG,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;YAC3C,CAAC;YAAC,MAAM,CAAC;gBACP,+BAA+B;YACjC,CAAC;YAED,uEAAuE;YACvE,IAAI,CAAC;gBACH,IAAI,MAAW,CAAC;gBAChB,IAAI,CAAC;oBACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;gBACxB,CAAC;gBAAC,MAAM,CAAC;oBACP,gEAAgE;oBAChE,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,wBAAwB,CAAC,CAAC;gBAClD,CAAC;gBACD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACnD,OAAO;gBACT,CAAC;gBACD,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC;YACpB,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO;YACT,CAAC;YAED,0EAA0E;YAC1E,MAAM,SAAS,GAAG,qBAAqB,CAAC,GAAG,CAAC,CAAC;YAC7C,IAAI,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC;gBAAE,OAAO;YAChC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAEpB,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;QACxC,CAAC,CAAC,CAAC;QAEH,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,KAAa,EAAE,OAAyB;QACtD,MAAM,QAAQ,GAAG,IAAI,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC;QAEhD,yCAAyC;QACzC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAClD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,OAAO,CAAC;QACzC,CAAC;QAED,OAAO,EAAE,CAAC;IACZ,CAAC;CACF;AAED,MAAM,OAAO,mBAAmB;IACrB,EAAE,GAAqB,OAAO,CAAC;IAC/B,cAAc,GAAG,IAAI,CAAC;IAE/B,KAAK,CAAC,SAAS,CAAC,KAAa,EAAE,OAAyB;QACtD,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;QAE1C,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1C,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACtD,CAAC;QAED,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,gDAAgD,CAAC,CAAC;QACtE,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACjC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;QAExE,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE;YACjD,OAAO,EAAE;gBACP,QAAQ,EAAE,kBAAkB;gBAC5B,sBAAsB,EAAE,MAAM;aAC/B;YACD,MAAM;SACP,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CAAC,6BAA6B,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC7F,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAS,CAAC;QAC1C,MAAM,YAAY,GAAU,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC;QAE/C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC;YACjC,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,OAAO,GAAsB,EAAE,CAAC;QAEtC,KAAK,MAAM,CAAC,IAAI,YAAY,EAAE,CAAC;YAC7B,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK;gBAAE,MAAM;YACnC,MAAM,KAAK,GAAG,OAAO,CAAC,EAAE,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACjE,MAAM,MAAM,GAAG,OAAO,CAAC,EAAE,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9D,MAAM,OAAO,GAAG,OAAO,CAAC,EAAE,WAAW,KAAK,QAAQ;gBAChD,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,EAAE;gBACtB,CAAC,CAAC,OAAO,CAAC,EAAE,OAAO,KAAK,QAAQ;oBAC9B,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE;oBAClB,CAAC,CAAC,EAAE,CAAC;YAET,IAAI,CAAC,KAAK,IAAI,CAAC,MAAM;gBAAE,SAAS;YAEhC,kCAAkC;YAClC,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;gBAC/B,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC;oBAAE,SAAS;YAC/D,CAAC;YAAC,MAAM,CAAC;gBACP,SAAS;YACX,CAAC;YAED,OAAO,CAAC,IAAI,CAAC;gBACX,KAAK,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;gBAC1B,GAAG,EAAE,MAAM;gBACX,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;aAC/B,CAAC,CAAC;QACL,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;CACF;AAED,MAAM,UAAU,iBAAiB,CAAC,EAAgC;IAChE,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,YAAY;QAAE,OAAO,IAAI,kBAAkB,EAAE,CAAC;IAChE,IAAI,EAAE,KAAK,OAAO;QAAE,OAAO,IAAI,mBAAmB,EAAE,CAAC;IAErD,4DAA4D;IAC5D,OAAO,IAAI,kBAAkB,EAAE,CAAC;AAClC,CAAC"}
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* Smart escalation strategy: try simple fetch first, escalate to browser if needed
|
|
3
3
|
*/
|
|
4
4
|
import { type FetchResult } from './fetcher.js';
|
|
5
|
+
export declare function clearDomainIntel(): void;
|
|
5
6
|
export interface StrategyOptions {
|
|
6
7
|
/** Force browser mode (skip simple fetch) */
|
|
7
8
|
forceBrowser?: boolean;
|
|
@@ -33,6 +34,10 @@ export interface StrategyOptions {
|
|
|
33
34
|
}>;
|
|
34
35
|
/** Keep browser page open for reuse (caller must close) */
|
|
35
36
|
keepPageOpen?: boolean;
|
|
37
|
+
/** Disable response cache for this request */
|
|
38
|
+
noCache?: boolean;
|
|
39
|
+
/** Time to wait before launching browser in parallel with simple fetch */
|
|
40
|
+
raceTimeoutMs?: number;
|
|
36
41
|
/** Location/language for geo-targeted scraping */
|
|
37
42
|
location?: {
|
|
38
43
|
country?: string;
|
|
@@ -40,19 +45,11 @@ export interface StrategyOptions {
|
|
|
40
45
|
};
|
|
41
46
|
}
|
|
42
47
|
export interface StrategyResult extends FetchResult {
|
|
43
|
-
/** Which strategy succeeded: 'simple' | 'browser' | 'stealth' */
|
|
44
|
-
method: 'simple' | 'browser' | 'stealth';
|
|
48
|
+
/** Which strategy succeeded: 'simple' | 'browser' | 'stealth' | 'cached' */
|
|
49
|
+
method: 'simple' | 'browser' | 'stealth' | 'cached';
|
|
45
50
|
}
|
|
46
51
|
/**
|
|
47
52
|
* Smart fetch with automatic escalation
|
|
48
|
-
*
|
|
49
|
-
* Strategy:
|
|
50
|
-
* 1. Try simple HTTP fetch first (fast, ~200ms)
|
|
51
|
-
* 2. If blocked (403, 503, Cloudflare, empty body) → try browser
|
|
52
|
-
* 3. If browser gets blocked (403, CAPTCHA) → try stealth mode
|
|
53
|
-
* 4. If stealth mode is explicitly requested → skip to stealth
|
|
54
|
-
*
|
|
55
|
-
* Returns the result along with which method worked
|
|
56
53
|
*/
|
|
57
54
|
export declare function smartFetch(url: string, options?: StrategyOptions): Promise<StrategyResult>;
|
|
58
55
|
//# sourceMappingURL=strategies.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"strategies.d.ts","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAyC,KAAK,WAAW,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"strategies.d.ts","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAyC,KAAK,WAAW,EAAE,MAAM,cAAc,CAAC;AAoIvF,wBAAgB,gBAAgB,IAAI,IAAI,CAGvC;AA6ED,MAAM,WAAW,eAAe;IAC9B,6CAA6C;IAC7C,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,+CAA+C;IAC/C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oDAAoD;IACpD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gDAAgD;IAChD,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,iBAAiB,GAAG,YAAY,CAAC;QACtH,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,EAAE,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC,CAAC;IACH,2DAA2D;IAC3D,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,8CAA8C;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,0EAA0E;IAC1E,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,kDAAkD;IAClD,QAAQ,CAAC,EAAE;QACT,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;KACtB,CAAC;CACH;AAED,MAAM,WAAW,cAAe,SAAQ,WAAW;IACjD,4EAA4E;IAC5E,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,GAAG,QAAQ,CAAC;CACrD;AAuGD;;GAEG;AACH,wBAAsB,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,cAAc,CAAC,CAqNpG"}
|