webpeel 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/README.md +140 -500
  2. package/dist/cli-auth.d.ts +2 -0
  3. package/dist/cli-auth.d.ts.map +1 -1
  4. package/dist/cli-auth.js +16 -3
  5. package/dist/cli-auth.js.map +1 -1
  6. package/dist/cli.js +475 -77
  7. package/dist/cli.js.map +1 -1
  8. package/dist/core/actions.d.ts +19 -10
  9. package/dist/core/actions.d.ts.map +1 -1
  10. package/dist/core/actions.js +214 -43
  11. package/dist/core/actions.js.map +1 -1
  12. package/dist/core/agent.d.ts +60 -3
  13. package/dist/core/agent.d.ts.map +1 -1
  14. package/dist/core/agent.js +375 -86
  15. package/dist/core/agent.js.map +1 -1
  16. package/dist/core/answer.d.ts +43 -0
  17. package/dist/core/answer.d.ts.map +1 -0
  18. package/dist/core/answer.js +378 -0
  19. package/dist/core/answer.js.map +1 -0
  20. package/dist/core/cache.d.ts +14 -0
  21. package/dist/core/cache.d.ts.map +1 -0
  22. package/dist/core/cache.js +122 -0
  23. package/dist/core/cache.js.map +1 -0
  24. package/dist/core/dns-cache.d.ts +21 -0
  25. package/dist/core/dns-cache.d.ts.map +1 -0
  26. package/dist/core/dns-cache.js +184 -0
  27. package/dist/core/dns-cache.js.map +1 -0
  28. package/dist/core/documents.d.ts +24 -0
  29. package/dist/core/documents.d.ts.map +1 -0
  30. package/dist/core/documents.js +124 -0
  31. package/dist/core/documents.js.map +1 -0
  32. package/dist/core/extract-inline.d.ts +39 -0
  33. package/dist/core/extract-inline.d.ts.map +1 -0
  34. package/dist/core/extract-inline.js +214 -0
  35. package/dist/core/extract-inline.js.map +1 -0
  36. package/dist/core/fetcher.d.ts +33 -7
  37. package/dist/core/fetcher.d.ts.map +1 -1
  38. package/dist/core/fetcher.js +608 -41
  39. package/dist/core/fetcher.js.map +1 -1
  40. package/dist/core/jobs.d.ts +66 -0
  41. package/dist/core/jobs.d.ts.map +1 -0
  42. package/dist/core/jobs.js +513 -0
  43. package/dist/core/jobs.js.map +1 -0
  44. package/dist/core/markdown.d.ts.map +1 -1
  45. package/dist/core/markdown.js +141 -31
  46. package/dist/core/markdown.js.map +1 -1
  47. package/dist/core/pdf.d.ts.map +1 -1
  48. package/dist/core/pdf.js +3 -1
  49. package/dist/core/pdf.js.map +1 -1
  50. package/dist/core/screenshot.d.ts +33 -0
  51. package/dist/core/screenshot.d.ts.map +1 -0
  52. package/dist/core/screenshot.js +30 -0
  53. package/dist/core/screenshot.js.map +1 -0
  54. package/dist/core/search-provider.d.ts +46 -0
  55. package/dist/core/search-provider.d.ts.map +1 -0
  56. package/dist/core/search-provider.js +281 -0
  57. package/dist/core/search-provider.js.map +1 -0
  58. package/dist/core/strategies.d.ts +7 -10
  59. package/dist/core/strategies.d.ts.map +1 -1
  60. package/dist/core/strategies.js +370 -63
  61. package/dist/core/strategies.js.map +1 -1
  62. package/dist/index.d.ts +9 -3
  63. package/dist/index.d.ts.map +1 -1
  64. package/dist/index.js +61 -32
  65. package/dist/index.js.map +1 -1
  66. package/dist/mcp/server.js +335 -70
  67. package/dist/mcp/server.js.map +1 -1
  68. package/dist/types.d.ts +43 -1
  69. package/dist/types.d.ts.map +1 -1
  70. package/dist/types.js.map +1 -1
  71. package/llms.txt +85 -47
  72. package/package.json +11 -5
@@ -0,0 +1,281 @@
1
+ /**
2
+ * Search provider abstraction
3
+ *
4
+ * WebPeel supports multiple web search backends. DuckDuckGo is the default
5
+ * (no API key required). Brave Search is supported via BYOK.
6
+ */
7
+ import { fetch as undiciFetch } from 'undici';
8
+ import { load } from 'cheerio';
9
+ function decodeHtmlEntities(input) {
10
+ // Cheerio usually decodes entities when using `.text()`, but keep this as a
11
+ // safety net since DuckDuckGo snippets sometimes leak encoded entities.
12
+ return input
13
+ .replace(/ /gi, ' ')
14
+ .replace(/&/gi, '&')
15
+ .replace(/&lt;/gi, '<')
16
+ .replace(/&gt;/gi, '>')
17
+ .replace(/&quot;/gi, '"')
18
+ .replace(/&#39;/g, "'")
19
+ .replace(/&#x([0-9a-f]+);/gi, (_m, hex) => {
20
+ const cp = Number.parseInt(String(hex), 16);
21
+ if (!Number.isFinite(cp) || cp < 0 || cp > 0x10ffff)
22
+ return _m;
23
+ try {
24
+ return String.fromCodePoint(cp);
25
+ }
26
+ catch {
27
+ return _m;
28
+ }
29
+ })
30
+ .replace(/&#(\d+);/g, (_m, num) => {
31
+ const cp = Number.parseInt(String(num), 10);
32
+ if (!Number.isFinite(cp) || cp < 0 || cp > 0x10ffff)
33
+ return _m;
34
+ try {
35
+ return String.fromCodePoint(cp);
36
+ }
37
+ catch {
38
+ return _m;
39
+ }
40
+ });
41
+ }
42
+ function cleanText(input, opts) {
43
+ let s = decodeHtmlEntities(input);
44
+ s = s.replace(/\s+/g, ' ').trim();
45
+ if (opts.stripEllipsisPadding) {
46
+ // Remove leading/trailing "..." or Unicode ellipsis padding.
47
+ s = s
48
+ .replace(/^(?:\.{3,}|…)+\s*/g, '')
49
+ .replace(/\s*(?:\.{3,}|…)+$/g, '')
50
+ .trim();
51
+ }
52
+ if (s.length > opts.maxLen)
53
+ s = s.slice(0, opts.maxLen);
54
+ return s;
55
+ }
56
+ function normalizeUrlForDedupe(rawUrl) {
57
+ try {
58
+ const u = new URL(rawUrl);
59
+ const host = u.hostname.toLowerCase().replace(/^www\./, '');
60
+ let path = u.pathname || '/';
61
+ path = path.replace(/\/+$/g, '');
62
+ return `${host}${path}`;
63
+ }
64
+ catch {
65
+ return rawUrl
66
+ .trim()
67
+ .toLowerCase()
68
+ .replace(/^https?:\/\//, '')
69
+ .replace(/^www\./, '')
70
+ .replace(/[?#].*$/, '')
71
+ .replace(/\/+$/g, '');
72
+ }
73
+ }
74
+ export class DuckDuckGoProvider {
75
+ id = 'duckduckgo';
76
+ requiresApiKey = false;
77
+ buildQueryAttempts(originalQuery) {
78
+ const q = originalQuery.trim();
79
+ if (!q)
80
+ return [];
81
+ const attempts = [];
82
+ // Required retry strategy order:
83
+ // 1) original query
84
+ // 2) quoted query
85
+ // 3) query site:*
86
+ attempts.push(q);
87
+ if (!/^".*"$/.test(q))
88
+ attempts.push(`"${q}"`);
89
+ attempts.push(`${q} site:*`);
90
+ // Single-word queries are disproportionately likely to return 0 results on
91
+ // the DDG HTML endpoint (e.g. "openai" vs "open ai"). When the first three
92
+ // attempts fail, try a few light-touch strategies that tend to coax the
93
+ // parser into returning web results.
94
+ const isSingleWord = !/\s/.test(q);
95
+ const looksLikeUrlOrDomain = /[./]/.test(q) || /^https?:/i.test(q);
96
+ if (isSingleWord && !looksLikeUrlOrDomain) {
97
+ // Try splitting a common suffix (e.g. openai -> open ai)
98
+ if (/^[a-z]{5,}ai$/i.test(q)) {
99
+ attempts.push(`${q.slice(0, -2)} ai`);
100
+ }
101
+ // Common suffixes that often return at least the official domain
102
+ attempts.push(`${q}.com`);
103
+ attempts.push(`site:${q}.com`);
104
+ attempts.push(`${q} website`);
105
+ }
106
+ // De-dupe attempts (case-insensitive)
107
+ const seen = new Set();
108
+ return attempts
109
+ .map((s) => s.trim())
110
+ .filter((s) => s.length > 0)
111
+ .filter((s) => {
112
+ const key = s.toLowerCase();
113
+ if (seen.has(key))
114
+ return false;
115
+ seen.add(key);
116
+ return true;
117
+ });
118
+ }
119
+ buildSearchUrl(query, options) {
120
+ const { tbs, country, location } = options;
121
+ const params = new URLSearchParams();
122
+ params.set('q', query);
123
+ // DuckDuckGo HTML endpoint supports some filtering
124
+ if (tbs) {
125
+ // DDG uses `df` for time filtering on html endpoint
126
+ params.set('df', tbs);
127
+ }
128
+ if (country || location) {
129
+ const region = (country || location || '').toLowerCase();
130
+ if (region)
131
+ params.set('kl', region);
132
+ }
133
+ return `https://html.duckduckgo.com/html/?${params.toString()}`;
134
+ }
135
+ async searchOnce(query, options) {
136
+ const { count, signal } = options;
137
+ const searchUrl = this.buildSearchUrl(query, options);
138
+ const response = await undiciFetch(searchUrl, {
139
+ headers: {
140
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
141
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
142
+ 'Accept-Language': 'en-US,en;q=0.9',
143
+ },
144
+ signal,
145
+ });
146
+ if (!response.ok) {
147
+ throw new Error(`Search failed: HTTP ${response.status}`);
148
+ }
149
+ const html = await response.text();
150
+ const $ = load(html);
151
+ const results = [];
152
+ const seen = new Set();
153
+ $('.result').each((_i, elem) => {
154
+ if (results.length >= count)
155
+ return;
156
+ const $result = $(elem);
157
+ // Be resilient to markup variations: title can be in .result__title or
158
+ // directly on the anchor.
159
+ const titleRaw = $result.find('.result__title').text() || $result.find('.result__a').text();
160
+ const rawUrl = $result.find('.result__a').attr('href') || '';
161
+ const snippetRaw = $result.find('.result__snippet').text();
162
+ let title = cleanText(titleRaw, { maxLen: 200 });
163
+ let snippet = cleanText(snippetRaw, { maxLen: 500, stripEllipsisPadding: true });
164
+ if (!title || !rawUrl)
165
+ return;
166
+ // Extract actual URL from DuckDuckGo redirect
167
+ let url = rawUrl;
168
+ try {
169
+ const ddgUrl = new URL(rawUrl, 'https://duckduckgo.com');
170
+ const uddg = ddgUrl.searchParams.get('uddg');
171
+ if (uddg)
172
+ url = decodeURIComponent(uddg);
173
+ }
174
+ catch {
175
+ // Use raw URL if parsing fails
176
+ }
177
+ // SECURITY: Validate and sanitize results — only allow HTTP/HTTPS URLs
178
+ try {
179
+ let parsed;
180
+ try {
181
+ parsed = new URL(url);
182
+ }
183
+ catch {
184
+ // Handle protocol-relative or relative URLs (rare but possible)
185
+ parsed = new URL(url, 'https://duckduckgo.com');
186
+ }
187
+ if (!['http:', 'https:'].includes(parsed.protocol)) {
188
+ return;
189
+ }
190
+ url = parsed.href;
191
+ }
192
+ catch {
193
+ return;
194
+ }
195
+ // Deduplicate by normalized URL (strip query params, www, trailing slash)
196
+ const dedupeKey = normalizeUrlForDedupe(url);
197
+ if (seen.has(dedupeKey))
198
+ return;
199
+ seen.add(dedupeKey);
200
+ results.push({ title, url, snippet });
201
+ });
202
+ return results;
203
+ }
204
+ async searchWeb(query, options) {
205
+ const attempts = this.buildQueryAttempts(query);
206
+ // Retry only when DDG returns 0 results.
207
+ for (const q of attempts) {
208
+ const results = await this.searchOnce(q, options);
209
+ if (results.length > 0)
210
+ return results;
211
+ }
212
+ return [];
213
+ }
214
+ }
215
+ export class BraveSearchProvider {
216
+ id = 'brave';
217
+ requiresApiKey = true;
218
+ async searchWeb(query, options) {
219
+ const { count, apiKey, signal } = options;
220
+ if (!apiKey || apiKey.trim().length === 0) {
221
+ throw new Error('Brave Search requires an API key');
222
+ }
223
+ const url = new URL('https://api.search.brave.com/res/v1/web/search');
224
+ url.searchParams.set('q', query);
225
+ url.searchParams.set('count', String(Math.min(Math.max(count, 1), 10)));
226
+ const response = await undiciFetch(url.toString(), {
227
+ headers: {
228
+ 'Accept': 'application/json',
229
+ 'X-Subscription-Token': apiKey,
230
+ },
231
+ signal,
232
+ });
233
+ if (!response.ok) {
234
+ const text = await response.text().catch(() => '');
235
+ throw new Error(`Brave Search failed: HTTP ${response.status}${text ? ` - ${text}` : ''}`);
236
+ }
237
+ const data = await response.json();
238
+ const resultsArray = data?.web?.results;
239
+ if (!Array.isArray(resultsArray)) {
240
+ return [];
241
+ }
242
+ const results = [];
243
+ for (const r of resultsArray) {
244
+ if (results.length >= count)
245
+ break;
246
+ const title = typeof r?.title === 'string' ? r.title.trim() : '';
247
+ const rawUrl = typeof r?.url === 'string' ? r.url.trim() : '';
248
+ const snippet = typeof r?.description === 'string'
249
+ ? r.description.trim()
250
+ : typeof r?.snippet === 'string'
251
+ ? r.snippet.trim()
252
+ : '';
253
+ if (!title || !rawUrl)
254
+ continue;
255
+ // SECURITY: Validate URL protocol
256
+ try {
257
+ const parsed = new URL(rawUrl);
258
+ if (!['http:', 'https:'].includes(parsed.protocol))
259
+ continue;
260
+ }
261
+ catch {
262
+ continue;
263
+ }
264
+ results.push({
265
+ title: title.slice(0, 200),
266
+ url: rawUrl,
267
+ snippet: snippet.slice(0, 500),
268
+ });
269
+ }
270
+ return results;
271
+ }
272
+ }
273
+ export function getSearchProvider(id) {
274
+ if (!id || id === 'duckduckgo')
275
+ return new DuckDuckGoProvider();
276
+ if (id === 'brave')
277
+ return new BraveSearchProvider();
278
+ // Exhaustive fallback (should be unreachable due to typing)
279
+ return new DuckDuckGoProvider();
280
+ }
281
+ //# sourceMappingURL=search-provider.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search-provider.js","sourceRoot":"","sources":["../../src/core/search-provider.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,KAAK,IAAI,WAAW,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAgC/B,SAAS,kBAAkB,CAAC,KAAa;IACvC,4EAA4E;IAC5E,wEAAwE;IACxE,OAAO,KAAK;SACT,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,mBAAmB,EAAE,CAAC,EAAE,EAAE,GAAG,EAAE,EAAE;QACxC,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,QAAQ;YAAE,OAAO,EAAE,CAAC;QAC/D,IAAI,CAAC;YACH,OAAO,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC;QAClC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC,CAAC;SACD,OAAO,CAAC,WAAW,EAAE,CAAC,EAAE,EAAE,GAAG,EAAE,EAAE;QAChC,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,GAAG,QAAQ;YAAE,OAAO,EAAE,CAAC;QAC/D,IAAI,CAAC;YACH,OAAO,MAAM,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC;QAClC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC,CAAC,CAAC;AACP,CAAC;AAED,SAAS,SAAS,CAChB,KAAa,EACb,IAGC;IAED,IAAI,CAAC,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC;IAClC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAElC,IAAI,IAAI,CAAC,oBAAoB,EAAE,CAAC;QAC9B,6DAA6D;QAC7D,CAAC,GAAG,CAAC;aACF,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC;aACjC,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC;aACjC,IAAI,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IACxD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,qBAAqB,CAAC,MAAc;IAC3C,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;QAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAC5D,IAAI,IAAI,GAAG,CAAC,CAAC,QAAQ,IAAI,GAAG,CAAC;QAC7B,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACjC,OAAO,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC;IAC1B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,MAAM;aACV,IAAI,EAAE;aACN,WAAW,EAAE;aACb,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC;aAC3B,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;aACrB,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;aACtB,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IAC1B,CAAC;AACH,CAAC;AAED,MAAM,OAAO,kBAAkB;IACpB,EAAE,GAAqB,YAAY,CAAC;IACpC,cAAc,GAAG,KAAK,CAAC;IAExB,kBAAkB,CAAC,aAAqB;QAC9C,MAAM,CAAC,GAAG,aAAa,CAAC,IAAI,EAAE,CAAC;QAC/B,IAAI,CAAC,CAAC;YAAE,OAAO,EAAE,CAAC;QAElB,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,iCAAiC;QACjC,oBAAoB;QACpB,kBAAkB;QAClB,kBAAkB;QAClB,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACjB,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;YAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC/C,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAE7B,2EAA2E;QAC3E,2EAA2E;QAC3E,wEAAwE;QACxE,qCAAqC;QACrC,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACnC,MAAM,oBAAoB,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAEnE,IAAI,YAAY,IAAI,CAAC,oBAAoB,EAAE,CAAC;YAC1C,yDAAyD;YACzD,IAAI,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC7B,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACxC,CAAC;YAED,iEAAiE;YACjE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YAC1B,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC/B,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAChC,CAAC;QAED,sCAAsC;QACtC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAC/B,OAAO,QAAQ;aACZ,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;aAC3B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;YACZ,MAAM,GAAG,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;YAC5B,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,OAAO,KAAK,CAAC;YAChC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;IACP,CAAC;IAEO,cAAc,CAAC,KAAa,EAAE,OAAyB;QAC7D,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC;QAE3C,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAEvB,mDAAmD;QACnD,IAAI,GAAG,EAAE,CAAC;YACR,oDAAoD;YACpD,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACxB,CAAC;QAED,IAAI,OAAO,IAAI,QAAQ,EAAE,CAAC;YACxB,MAAM,MAAM,GAAG,CAAC,OAAO,IAAI,QAAQ,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;YACzD,IAAI,MAAM;gBAAE,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QACvC,CAAC;QAED,OAAO,qCAAqC,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC;IAClE,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,KAAa,EAAE,OAAyB;QAC/D,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;QAElC,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAEtD,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE;YAC5C,OAAO,EAAE;gBACP,YAAY,EAAE,oEAAoE;gBAClF,QAAQ,EAAE,iEAAiE;gBAC3E,iBAAiB,EAAE,gBAAgB;aACpC;YACD,MAAM;SACP,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC5D,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;QAErB,MAAM,OAAO,GAAsB,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAE/B,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,EAAE;YAC7B,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK;gBAAE,OAAO;YAEpC,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;YAExB,uEAAuE;YACvE,0BAA0B;YAC1B,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,IAAI,EAAE,IAAI,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC;YAC5F,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;YAC7D,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC,IAAI,EAAE,CAAC;YAE3D,IAAI,KAAK,GAAG,SAAS,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;YACjD,IAAI,OAAO,GAAG,SAAS,CAAC,UAAU,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,oBAAoB,EAAE,IAAI,EAAE,CAAC,CAAC;YAEjF,IAAI,CAAC,KAAK,IAAI,CAAC,MAAM;gBAAE,OAAO;YAE9B,8CAA8C;YAC9C,IAAI,GAAG,GAAG,MAAM,CAAC;YACjB,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,EAAE,wBAAwB,CAAC,CAAC;gBACzD,MAAM,IAAI,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;gBAC7C,IAAI,IAAI;oBAAE,GAAG,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;YAC3C,CAAC;YAAC,MAAM,CAAC;gBACP,+BAA+B;YACjC,CAAC;YAED,uEAAuE;YACvE,IAAI,CAAC;gBACH,IAAI,MAAW,CAAC;gBAChB,IAAI,CAAC;oBACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;gBACxB,CAAC;gBAAC,MAAM,CAAC;oBACP,gEAAgE;oBAChE,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,wBAAwB,CAAC,CAAC;gBAClD,CAAC;gBACD,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACnD,OAAO;gBACT,CAAC;gBACD,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC;YACpB,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO;YACT,CAAC;YAED,0EAA0E;YAC1E,MAAM,SAAS,GAAG,qBAAqB,CAAC,GAAG,CAAC,CAAC;YAC7C,IAAI,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC;gBAAE,OAAO;YAChC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAEpB,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,CAAC;QACxC,CAAC,CAAC,CAAC;QAEH,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,KAAa,EAAE,OAAyB;QACtD,MAAM,QAAQ,GAAG,IAAI,CAAC,kBAAkB,CAAC,KAAK,CAAC,CAAC;QAEhD,yCAAyC;QACzC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAClD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,OAAO,CAAC;QACzC,CAAC;QAED,OAAO,EAAE,CAAC;IACZ,CAAC;CACF;AAED,MAAM,OAAO,mBAAmB;IACrB,EAAE,GAAqB,OAAO,CAAC;IAC/B,cAAc,GAAG,IAAI,CAAC;IAE/B,KAAK,CAAC,SAAS,CAAC,KAAa,EAAE,OAAyB;QACtD,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;QAE1C,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1C,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACtD,CAAC;QAED,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,gDAAgD,CAAC,CAAC;QACtE,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACjC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;QAExE,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE;YACjD,OAAO,EAAE;gBACP,QAAQ,EAAE,kBAAkB;gBAC5B,sBAAsB,EAAE,MAAM;aAC/B;YACD,MAAM;SACP,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CAAC,6BAA6B,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC7F,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAS,CAAC;QAC1C,MAAM,YAAY,GAAU,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC;QAE/C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC;YACjC,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,OAAO,GAAsB,EAAE,CAAC;QAEtC,KAAK,MAAM,CAAC,IAAI,YAAY,EAAE,CAAC;YAC7B,IAAI,OAAO,CAAC,MAAM,IAAI,KAAK;gBAAE,MAAM;YACnC,MAAM,KAAK,GAAG,OAAO,CAAC,EAAE,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACjE,MAAM,MAAM,GAAG,OAAO,CAAC,EAAE,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC9D,MAAM,OAAO,GAAG,OAAO,CAAC,EAAE,WAAW,KAAK,QAAQ;gBAChD,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,EAAE;gBACtB,CAAC,CAAC,OAAO,CAAC,EAAE,OAAO,KAAK,QAAQ;oBAC9B,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE;oBAClB,CAAC,CAAC,EAAE,CAAC;YAET,IAAI,CAAC,KAAK,IAAI,CAAC,MAAM;gBAAE,SAAS;YAEhC,kCAAkC;YAClC,IAAI,CAAC;gBACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;gBAC/B,IAAI,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC;oBAAE,SAAS;YAC/D,CAAC;YAAC,MAAM,CAAC;gBACP,SAAS;YACX,CAAC;YAED,OAAO,CAAC,IAAI,CAAC;gBACX,KAAK,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;gBAC1B,GAAG,EAAE,MAAM;gBACX,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;aAC/B,CAAC,CAAC;QACL,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;CACF;AAED,MAAM,UAAU,iBAAiB,CAAC,EAAgC;IAChE,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,YAAY;QAAE,OAAO,IAAI,kBAAkB,EAAE,CAAC;IAChE,IAAI,EAAE,KAAK,OAAO;QAAE,OAAO,IAAI,mBAAmB,EAAE,CAAC;IAErD,4DAA4D;IAC5D,OAAO,IAAI,kBAAkB,EAAE,CAAC;AAClC,CAAC"}
@@ -2,6 +2,7 @@
2
2
  * Smart escalation strategy: try simple fetch first, escalate to browser if needed
3
3
  */
4
4
  import { type FetchResult } from './fetcher.js';
5
+ export declare function clearDomainIntel(): void;
5
6
  export interface StrategyOptions {
6
7
  /** Force browser mode (skip simple fetch) */
7
8
  forceBrowser?: boolean;
@@ -33,6 +34,10 @@ export interface StrategyOptions {
33
34
  }>;
34
35
  /** Keep browser page open for reuse (caller must close) */
35
36
  keepPageOpen?: boolean;
37
+ /** Disable response cache for this request */
38
+ noCache?: boolean;
39
+ /** Time to wait before launching browser in parallel with simple fetch */
40
+ raceTimeoutMs?: number;
36
41
  /** Location/language for geo-targeted scraping */
37
42
  location?: {
38
43
  country?: string;
@@ -40,19 +45,11 @@ export interface StrategyOptions {
40
45
  };
41
46
  }
42
47
  export interface StrategyResult extends FetchResult {
43
- /** Which strategy succeeded: 'simple' | 'browser' | 'stealth' */
44
- method: 'simple' | 'browser' | 'stealth';
48
+ /** Which strategy succeeded: 'simple' | 'browser' | 'stealth' | 'cached' */
49
+ method: 'simple' | 'browser' | 'stealth' | 'cached';
45
50
  }
46
51
  /**
47
52
  * Smart fetch with automatic escalation
48
- *
49
- * Strategy:
50
- * 1. Try simple HTTP fetch first (fast, ~200ms)
51
- * 2. If blocked (403, 503, Cloudflare, empty body) → try browser
52
- * 3. If browser gets blocked (403, CAPTCHA) → try stealth mode
53
- * 4. If stealth mode is explicitly requested → skip to stealth
54
- *
55
- * Returns the result along with which method worked
56
53
  */
57
54
  export declare function smartFetch(url: string, options?: StrategyOptions): Promise<StrategyResult>;
58
55
  //# sourceMappingURL=strategies.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"strategies.d.ts","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAyC,KAAK,WAAW,EAAE,MAAM,cAAc,CAAC;AAGvF,MAAM,WAAW,eAAe;IAC9B,6CAA6C;IAC7C,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,+CAA+C;IAC/C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oDAAoD;IACpD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gDAAgD;IAChD,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,iBAAiB,GAAG,YAAY,CAAC;QACtH,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,EAAE,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC,CAAC;IACH,2DAA2D;IAC3D,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,kDAAkD;IAClD,QAAQ,CAAC,EAAE;QACT,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;KACtB,CAAC;CACH;AAED,MAAM,WAAW,cAAe,SAAQ,WAAW;IACjD,iEAAiE;IACjE,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,CAAC;CAC1C;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,cAAc,CAAC,CA6GpG"}
1
+ {"version":3,"file":"strategies.d.ts","sourceRoot":"","sources":["../../src/core/strategies.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAyC,KAAK,WAAW,EAAE,MAAM,cAAc,CAAC;AAoIvF,wBAAgB,gBAAgB,IAAI,IAAI,CAGvC;AA6ED,MAAM,WAAW,eAAe;IAC9B,6CAA6C;IAC7C,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,+CAA+C;IAC/C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,qDAAqD;IACrD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,wBAAwB;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,oDAAoD;IACpD,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,uCAAuC;IACvC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,gDAAgD;IAChD,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,OAAO,GAAG,iBAAiB,GAAG,YAAY,CAAC;QACtH,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,EAAE,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC,CAAC;IACH,2DAA2D;IAC3D,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,8CAA8C;IAC9C,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,0EAA0E;IAC1E,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,kDAAkD;IAClD,QAAQ,CAAC,EAAE;QACT,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;KACtB,CAAC;CACH;AAED,MAAM,WAAW,cAAe,SAAQ,WAAW;IACjD,4EAA4E;IAC5E,MAAM,EAAE,QAAQ,GAAG,SAAS,GAAG,SAAS,GAAG,QAAQ,CAAC;CACrD;AAuGD;;GAEG;AACH,wBAAsB,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,cAAc,CAAC,CAqNpG"}