@ignidor/web-search-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +267 -0
- package/bin/web-search-mcp.js +13 -0
- package/dist/crawl4ai-client.d.ts +238 -0
- package/dist/crawl4ai-client.d.ts.map +1 -0
- package/dist/crawl4ai-client.js +608 -0
- package/dist/crawl4ai-client.js.map +1 -0
- package/dist/index.d.ts +39 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +561 -0
- package/dist/index.js.map +1 -0
- package/dist/playwright-crawler.d.ts +92 -0
- package/dist/playwright-crawler.d.ts.map +1 -0
- package/dist/playwright-crawler.js +454 -0
- package/dist/playwright-crawler.js.map +1 -0
- package/dist/ranking.d.ts +58 -0
- package/dist/ranking.d.ts.map +1 -0
- package/dist/ranking.js +218 -0
- package/dist/ranking.js.map +1 -0
- package/dist/search.d.ts +15 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +187 -0
- package/dist/search.js.map +1 -0
- package/dist/types/index.d.ts +131 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +3 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/concurrency.d.ts +24 -0
- package/dist/utils/concurrency.d.ts.map +1 -0
- package/dist/utils/concurrency.js +53 -0
- package/dist/utils/concurrency.js.map +1 -0
- package/dist/utils/validators.d.ts +21 -0
- package/dist/utils/validators.d.ts.map +1 -0
- package/dist/utils/validators.js +75 -0
- package/dist/utils/validators.js.map +1 -0
- package/package.json +77 -0
|
@@ -0,0 +1,608 @@
|
|
|
1
|
+
// Crawl4AI REST Client - Full Feature Set
|
|
2
|
+
import axios from 'axios';
|
|
3
|
+
// ============================================================================
|
|
4
|
+
// Configuration
|
|
5
|
+
// ============================================================================
|
|
6
|
+
const DEFAULT_CRAWL4AI_URL = 'http://localhost:11235';
|
|
7
|
+
const DEFAULT_TIMEOUT = 30000; // 30 seconds
|
|
8
|
+
// ============================================================================
|
|
9
|
+
// Crawl4AI Client - Full Featured
|
|
10
|
+
// ============================================================================
|
|
11
|
+
export class Crawl4AIClient {
|
|
12
|
+
baseUrl;
|
|
13
|
+
timeout;
|
|
14
|
+
available = false;
|
|
15
|
+
constructor(baseUrl = DEFAULT_CRAWL4AI_URL, timeout = DEFAULT_TIMEOUT) {
|
|
16
|
+
this.baseUrl = baseUrl.replace(/\/$/, ''); // Remove trailing slash
|
|
17
|
+
this.timeout = timeout;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Check if Crawl4AI service is available.
|
|
21
|
+
* Gracefully handles if the service is not running.
|
|
22
|
+
*/
|
|
23
|
+
async checkHealth() {
|
|
24
|
+
try {
|
|
25
|
+
const response = await axios.get(`${this.baseUrl}/health`, {
|
|
26
|
+
timeout: 2000
|
|
27
|
+
});
|
|
28
|
+
this.available = response.status === 200;
|
|
29
|
+
if (this.available) {
|
|
30
|
+
console.error('[Crawl4AI] Service available at', this.baseUrl);
|
|
31
|
+
}
|
|
32
|
+
return this.available;
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
this.available = false;
|
|
36
|
+
console.warn('[Crawl4AI] Service not available - deep extraction disabled');
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
// ========================================================================
|
|
41
|
+
// Basic Content Extraction
|
|
42
|
+
// ========================================================================
|
|
43
|
+
/**
|
|
44
|
+
* Extract full content from a URL using Crawl4AI /crawl endpoint.
|
|
45
|
+
* Returns null if service is unavailable.
|
|
46
|
+
*/
|
|
47
|
+
async extractUrl(url, config) {
|
|
48
|
+
if (!this.available) {
|
|
49
|
+
const isHealthy = await this.checkHealth();
|
|
50
|
+
if (!isHealthy)
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
try {
|
|
54
|
+
const response = await axios.post(`${this.baseUrl}/crawl`, {
|
|
55
|
+
urls: [url],
|
|
56
|
+
browser_config: {
|
|
57
|
+
type: 'BrowserConfig',
|
|
58
|
+
params: { headless: true }
|
|
59
|
+
},
|
|
60
|
+
crawler_config: {
|
|
61
|
+
type: 'CrawlerRunConfig',
|
|
62
|
+
params: {
|
|
63
|
+
cache_mode: 'bypass',
|
|
64
|
+
screenshot: config?.includeImages || false
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}, { timeout: this.timeout });
|
|
68
|
+
if (response.data?.results?.[0]) {
|
|
69
|
+
const result = response.data.results[0];
|
|
70
|
+
return {
|
|
71
|
+
success: result.success || false,
|
|
72
|
+
url: result.url || url,
|
|
73
|
+
markdown: result.markdown,
|
|
74
|
+
cleaned_html: result.cleaned_html,
|
|
75
|
+
title: result.title,
|
|
76
|
+
links: result.links
|
|
77
|
+
? { internal: result.links.internal || 0, external: result.links.external || 0 }
|
|
78
|
+
: undefined,
|
|
79
|
+
media: result.media
|
|
80
|
+
? { images: result.media.images || [] }
|
|
81
|
+
: undefined,
|
|
82
|
+
error: result.error
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
catch (error) {
|
|
88
|
+
console.error(`[Crawl4AI] Failed to extract ${url}:`, error instanceof Error ? error.message : error);
|
|
89
|
+
return null;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Extract multiple URLs in parallel with concurrency control.
|
|
94
|
+
*/
|
|
95
|
+
async extractMultipleUrls(urls, config, concurrency = 3) {
|
|
96
|
+
const results = new Map();
|
|
97
|
+
for (let i = 0; i < urls.length; i += concurrency) {
|
|
98
|
+
const batch = urls.slice(i, i + concurrency);
|
|
99
|
+
const responses = await Promise.allSettled(batch.map(url => this.extractUrl(url, config)));
|
|
100
|
+
batch.forEach((url, idx) => {
|
|
101
|
+
const response = responses[idx];
|
|
102
|
+
if (response.status === 'fulfilled' && response.value) {
|
|
103
|
+
results.set(url, response.value);
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
return results;
|
|
108
|
+
}
|
|
109
|
+
// ========================================================================
|
|
110
|
+
// Markdown Generation with Advanced Formatting
|
|
111
|
+
// ========================================================================
|
|
112
|
+
/**
|
|
113
|
+
* Generate markdown from a URL with formatting options.
|
|
114
|
+
* Formats: raw (raw content), fit (clean content), bm25 (BM25 ranked), llm (LLM optimized)
|
|
115
|
+
*/
|
|
116
|
+
async generateMarkdown(url, options = {}) {
|
|
117
|
+
if (!this.available) {
|
|
118
|
+
const isHealthy = await this.checkHealth();
|
|
119
|
+
if (!isHealthy)
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
try {
|
|
123
|
+
const response = await axios.post(`${this.baseUrl}/md`, {
|
|
124
|
+
url,
|
|
125
|
+
f: options.format || 'fit',
|
|
126
|
+
q: options.query,
|
|
127
|
+
c: options.bypassCache ? '0' : '1'
|
|
128
|
+
}, { timeout: this.timeout });
|
|
129
|
+
return response.data?.markdown_content || null;
|
|
130
|
+
}
|
|
131
|
+
catch (error) {
|
|
132
|
+
console.error(`[Crawl4AI] Markdown generation failed:`, error instanceof Error ? error.message : error);
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// ========================================================================
|
|
137
|
+
// Screenshot Capture
|
|
138
|
+
// ========================================================================
|
|
139
|
+
/**
|
|
140
|
+
* Capture a screenshot of a webpage.
|
|
141
|
+
* Returns the screenshot path or base64 data if no path provided.
|
|
142
|
+
*/
|
|
143
|
+
async captureScreenshot(url, options = {}) {
|
|
144
|
+
if (!this.available) {
|
|
145
|
+
const isHealthy = await this.checkHealth();
|
|
146
|
+
if (!isHealthy)
|
|
147
|
+
return { success: false, error: 'Crawl4AI not available' };
|
|
148
|
+
}
|
|
149
|
+
try {
|
|
150
|
+
const response = await axios.post(`${this.baseUrl}/screenshot`, {
|
|
151
|
+
url,
|
|
152
|
+
screenshot_wait_for: options.waitFor || 2,
|
|
153
|
+
output_path: options.fullPath
|
|
154
|
+
}, { timeout: this.timeout });
|
|
155
|
+
if (response.data?.image_data) {
|
|
156
|
+
return { success: true, base64: response.data.image_data };
|
|
157
|
+
}
|
|
158
|
+
return { success: true, path: response.data?.message || 'Screenshot captured' };
|
|
159
|
+
}
|
|
160
|
+
catch (error) {
|
|
161
|
+
return {
|
|
162
|
+
success: false,
|
|
163
|
+
error: error instanceof Error ? error.message : String(error)
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
// ========================================================================
|
|
168
|
+
// PDF Generation
|
|
169
|
+
// ========================================================================
|
|
170
|
+
/**
|
|
171
|
+
* Generate a PDF from a webpage.
|
|
172
|
+
*/
|
|
173
|
+
async generatePDF(url, outputPath) {
|
|
174
|
+
if (!this.available) {
|
|
175
|
+
const isHealthy = await this.checkHealth();
|
|
176
|
+
if (!isHealthy)
|
|
177
|
+
return { success: false, error: 'Crawl4AI not available' };
|
|
178
|
+
}
|
|
179
|
+
try {
|
|
180
|
+
const response = await axios.post(`${this.baseUrl}/pdf`, {
|
|
181
|
+
url,
|
|
182
|
+
output_path: outputPath
|
|
183
|
+
}, { timeout: this.timeout });
|
|
184
|
+
if (response.data?.pdf_data) {
|
|
185
|
+
return { success: true, base64: response.data.pdf_data };
|
|
186
|
+
}
|
|
187
|
+
return { success: true, path: response.data?.message || 'PDF generated' };
|
|
188
|
+
}
|
|
189
|
+
catch (error) {
|
|
190
|
+
return {
|
|
191
|
+
success: false,
|
|
192
|
+
error: error instanceof Error ? error.message : String(error)
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
// ========================================================================
|
|
197
|
+
// JavaScript Execution
|
|
198
|
+
// ========================================================================
|
|
199
|
+
/**
|
|
200
|
+
* Execute custom JavaScript on a webpage.
|
|
201
|
+
* Returns array of results from each script.
|
|
202
|
+
*/
|
|
203
|
+
async executeJavaScript(url, options) {
|
|
204
|
+
if (!this.available) {
|
|
205
|
+
const isHealthy = await this.checkHealth();
|
|
206
|
+
if (!isHealthy)
|
|
207
|
+
return { success: false, error: 'Crawl4AI not available' };
|
|
208
|
+
}
|
|
209
|
+
try {
|
|
210
|
+
const response = await axios.post(`${this.baseUrl}/execute_js`, {
|
|
211
|
+
url,
|
|
212
|
+
scripts: options.scripts
|
|
213
|
+
}, { timeout: this.timeout });
|
|
214
|
+
return { success: true, results: response.data?.results || [] };
|
|
215
|
+
}
|
|
216
|
+
catch (error) {
|
|
217
|
+
return {
|
|
218
|
+
success: false,
|
|
219
|
+
error: error instanceof Error ? error.message : String(error)
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
// ========================================================================
|
|
224
|
+
// CSS-Based Structured Extraction
|
|
225
|
+
// ========================================================================
|
|
226
|
+
/**
|
|
227
|
+
* Extract structured data using CSS selectors.
|
|
228
|
+
*/
|
|
229
|
+
async extractWithCSS(url, schema) {
|
|
230
|
+
if (!this.available) {
|
|
231
|
+
const isHealthy = await this.checkHealth();
|
|
232
|
+
if (!isHealthy)
|
|
233
|
+
return { success: false, error: 'Crawl4AI not available' };
|
|
234
|
+
}
|
|
235
|
+
try {
|
|
236
|
+
const response = await axios.post(`${this.baseUrl}/crawl`, {
|
|
237
|
+
urls: [url],
|
|
238
|
+
crawler_config: {
|
|
239
|
+
type: 'CrawlerRunConfig',
|
|
240
|
+
params: {
|
|
241
|
+
extraction_strategy: {
|
|
242
|
+
type: 'JsonCssExtractionStrategy',
|
|
243
|
+
params: {
|
|
244
|
+
schema: {
|
|
245
|
+
type: 'dict',
|
|
246
|
+
value: schema
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}, { timeout: this.timeout });
|
|
253
|
+
if (response.data?.results?.[0]?.extracted_content) {
|
|
254
|
+
const data = typeof response.data.results[0].extracted_content === 'string'
|
|
255
|
+
? JSON.parse(response.data.results[0].extracted_content)
|
|
256
|
+
: response.data.results[0].extracted_content;
|
|
257
|
+
return { success: true, data };
|
|
258
|
+
}
|
|
259
|
+
return { success: false, error: 'No content extracted' };
|
|
260
|
+
}
|
|
261
|
+
catch (error) {
|
|
262
|
+
return {
|
|
263
|
+
success: false,
|
|
264
|
+
error: error instanceof Error ? error.message : String(error)
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
// ========================================================================
|
|
269
|
+
// HTML Extraction (Preprocessed)
|
|
270
|
+
// ========================================================================
|
|
271
|
+
/**
|
|
272
|
+
* Get preprocessed HTML optimized for schema extraction.
|
|
273
|
+
*/
|
|
274
|
+
async getHTML(url) {
|
|
275
|
+
if (!this.available) {
|
|
276
|
+
const isHealthy = await this.checkHealth();
|
|
277
|
+
if (!isHealthy)
|
|
278
|
+
return { success: false, error: 'Crawl4AI not available' };
|
|
279
|
+
}
|
|
280
|
+
try {
|
|
281
|
+
const response = await axios.post(`${this.baseUrl}/html`, { url }, { timeout: this.timeout });
|
|
282
|
+
return { success: true, html: response.data?.html_content || '' };
|
|
283
|
+
}
|
|
284
|
+
catch (error) {
|
|
285
|
+
return {
|
|
286
|
+
success: false,
|
|
287
|
+
error: error instanceof Error ? error.message : String(error)
|
|
288
|
+
};
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
// ========================================================================
|
|
292
|
+
// Deep Crawling
|
|
293
|
+
// ========================================================================
|
|
294
|
+
/**
|
|
295
|
+
* Perform deep crawling with BFS, DFS, or BestFirst strategy.
|
|
296
|
+
* Returns a list of crawled pages with their content.
|
|
297
|
+
*/
|
|
298
|
+
async deepCrawl(url, options = {}) {
|
|
299
|
+
if (!this.available) {
|
|
300
|
+
const isHealthy = await this.checkHealth();
|
|
301
|
+
if (!isHealthy)
|
|
302
|
+
return { success: false, error: 'Crawl4AI not available' };
|
|
303
|
+
}
|
|
304
|
+
try {
|
|
305
|
+
// Build deep crawl strategy config
|
|
306
|
+
const strategyType = options.strategy || 'bfs';
|
|
307
|
+
const strategyConfig = {
|
|
308
|
+
max_depth: options.maxDepth || 2,
|
|
309
|
+
include_external: options.includeExternal || false,
|
|
310
|
+
max_pages: options.maxPages,
|
|
311
|
+
};
|
|
312
|
+
// Add strategy-specific params
|
|
313
|
+
if (strategyType === 'best_first' && options.urlScorer) {
|
|
314
|
+
strategyConfig.url_scorer = {
|
|
315
|
+
type: 'KeywordRelevanceScorer',
|
|
316
|
+
keywords: options.urlScorer.keywords,
|
|
317
|
+
weight: options.urlScorer.weight || 0.7
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
else if (options.scoreThreshold) {
|
|
321
|
+
strategyConfig.score_threshold = options.scoreThreshold;
|
|
322
|
+
}
|
|
323
|
+
// Add filter chain if provided
|
|
324
|
+
if (options.filterChain) {
|
|
325
|
+
strategyConfig.filter_chain = {
|
|
326
|
+
type: 'FilterChain',
|
|
327
|
+
filters: options.filterChain.filters
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
const response = await axios.post(`${this.baseUrl}/crawl`, {
|
|
331
|
+
urls: [url],
|
|
332
|
+
crawler_config: {
|
|
333
|
+
type: 'CrawlerRunConfig',
|
|
334
|
+
params: {
|
|
335
|
+
deep_crawl_strategy: {
|
|
336
|
+
type: `${strategyType.toUpperCase()}DeepCrawlStrategy`,
|
|
337
|
+
params: strategyConfig
|
|
338
|
+
},
|
|
339
|
+
stream: options.stream || false
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
}, { timeout: this.timeout * 5 } // Longer timeout for deep crawl
|
|
343
|
+
);
|
|
344
|
+
if (response.data?.results) {
|
|
345
|
+
const results = response.data.results.map((r) => ({
|
|
346
|
+
url: r.url || url,
|
|
347
|
+
depth: r.metadata?.depth || 0,
|
|
348
|
+
score: r.metadata?.score,
|
|
349
|
+
success: r.success || false,
|
|
350
|
+
markdown: r.markdown,
|
|
351
|
+
title: r.title,
|
|
352
|
+
error: r.error
|
|
353
|
+
}));
|
|
354
|
+
return { success: true, results };
|
|
355
|
+
}
|
|
356
|
+
return { success: false, error: 'Deep crawl failed' };
|
|
357
|
+
}
|
|
358
|
+
catch (error) {
|
|
359
|
+
return {
|
|
360
|
+
success: false,
|
|
361
|
+
error: error instanceof Error ? error.message : String(error)
|
|
362
|
+
};
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
// ========================================================================
|
|
366
|
+
// Adaptive Crawling
|
|
367
|
+
// ========================================================================
|
|
368
|
+
/**
|
|
369
|
+
* Perform adaptive crawling - intelligently explores until enough information is gathered.
|
|
370
|
+
* Uses statistical (free) or embedding (API) strategies.
|
|
371
|
+
*/
|
|
372
|
+
async adaptiveCrawl(url, query, options = {}) {
|
|
373
|
+
if (!this.available) {
|
|
374
|
+
const isHealthy = await this.checkHealth();
|
|
375
|
+
if (!isHealthy)
|
|
376
|
+
return { success: false, confidence: 0, pagesCrawled: 0, error: 'Crawl4AI not available' };
|
|
377
|
+
}
|
|
378
|
+
try {
|
|
379
|
+
const config = {
|
|
380
|
+
strategy: options.strategy || 'statistical',
|
|
381
|
+
confidence_threshold: options.confidenceThreshold || 0.7,
|
|
382
|
+
max_pages: options.maxPages || 20,
|
|
383
|
+
top_k_links: options.topKLinks || 3,
|
|
384
|
+
};
|
|
385
|
+
if (options.strategy === 'embedding') {
|
|
386
|
+
config.embedding_model = options.embeddingModel || 'sentence-transformers/all-MiniLM-L6-v2';
|
|
387
|
+
}
|
|
388
|
+
const response = await axios.post(`${this.baseUrl}/adaptive_crawl`, {
|
|
389
|
+
url,
|
|
390
|
+
query,
|
|
391
|
+
config,
|
|
392
|
+
save_state: options.saveState,
|
|
393
|
+
state_path: options.statePath
|
|
394
|
+
}, { timeout: this.timeout * 10 } // Very long timeout for adaptive crawl
|
|
395
|
+
);
|
|
396
|
+
if (response.data) {
|
|
397
|
+
return {
|
|
398
|
+
success: true,
|
|
399
|
+
confidence: response.data.confidence || 0,
|
|
400
|
+
pagesCrawled: response.data.pages_crawled || 0,
|
|
401
|
+
relevantContent: response.data.relevant_content,
|
|
402
|
+
metrics: response.data.metrics
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
return { success: false, confidence: 0, pagesCrawled: 0, error: 'Adaptive crawl failed' };
|
|
406
|
+
}
|
|
407
|
+
catch (error) {
|
|
408
|
+
return {
|
|
409
|
+
success: false,
|
|
410
|
+
confidence: 0,
|
|
411
|
+
pagesCrawled: 0,
|
|
412
|
+
error: error instanceof Error ? error.message : String(error)
|
|
413
|
+
};
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
// ========================================================================
|
|
417
|
+
// Regex Extraction
|
|
418
|
+
// ========================================================================
|
|
419
|
+
/**
|
|
420
|
+
* Extract data using regex patterns.
|
|
421
|
+
* Supports built-in patterns (email, phone, url, currency, etc.) and custom patterns.
|
|
422
|
+
*/
|
|
423
|
+
async extractWithRegex(url, options) {
|
|
424
|
+
if (!this.available) {
|
|
425
|
+
const isHealthy = await this.checkHealth();
|
|
426
|
+
if (!isHealthy)
|
|
427
|
+
return { success: false, error: 'Crawl4AI not available' };
|
|
428
|
+
}
|
|
429
|
+
try {
|
|
430
|
+
// Map pattern types to Crawl4AI expected format
|
|
431
|
+
const patternMap = {
|
|
432
|
+
email: 'Email',
|
|
433
|
+
phone_intl: 'PhoneIntl',
|
|
434
|
+
phone_us: 'PhoneUS',
|
|
435
|
+
url: 'Url',
|
|
436
|
+
ipv4: 'IPv4',
|
|
437
|
+
ipv6: 'IPv6',
|
|
438
|
+
uuid: 'Uuid',
|
|
439
|
+
currency: 'Currency',
|
|
440
|
+
percentage: 'Percentage',
|
|
441
|
+
number: 'Number',
|
|
442
|
+
date_iso: 'DateIso',
|
|
443
|
+
date_us: 'DateUS',
|
|
444
|
+
time_24h: 'Time24h',
|
|
445
|
+
postal_us: 'PostalUS',
|
|
446
|
+
postal_uk: 'PostalUK',
|
|
447
|
+
hex_color: 'HexColor',
|
|
448
|
+
twitter_handle: 'TwitterHandle',
|
|
449
|
+
hashtag: 'Hashtag',
|
|
450
|
+
mac_addr: 'MacAddr',
|
|
451
|
+
iban: 'Iban',
|
|
452
|
+
credit_card: 'CreditCard',
|
|
453
|
+
all: 'All'
|
|
454
|
+
};
|
|
455
|
+
let extractionConfig;
|
|
456
|
+
if (options.custom) {
|
|
457
|
+
// Custom regex patterns
|
|
458
|
+
extractionConfig = {
|
|
459
|
+
type: 'RegexExtractionStrategy',
|
|
460
|
+
custom: options.custom
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
else {
|
|
464
|
+
// Built-in patterns
|
|
465
|
+
const patterns = Array.isArray(options.pattern)
|
|
466
|
+
? options.pattern
|
|
467
|
+
: [options.pattern || 'all'];
|
|
468
|
+
const patternFlags = patterns
|
|
469
|
+
.map((p) => patternMap[p.toLowerCase()])
|
|
470
|
+
.filter(Boolean);
|
|
471
|
+
extractionConfig = {
|
|
472
|
+
type: 'RegexExtractionStrategy',
|
|
473
|
+
pattern: patternFlags.length > 0 ? patternFlags.join('|') : 'All'
|
|
474
|
+
};
|
|
475
|
+
}
|
|
476
|
+
const response = await axios.post(`${this.baseUrl}/crawl`, {
|
|
477
|
+
urls: [url],
|
|
478
|
+
crawler_config: {
|
|
479
|
+
type: 'CrawlerRunConfig',
|
|
480
|
+
params: {
|
|
481
|
+
extraction_strategy: extractionConfig
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
}, { timeout: this.timeout });
|
|
485
|
+
if (response.data?.results?.[0]?.extracted_content) {
|
|
486
|
+
const data = typeof response.data.results[0].extracted_content === 'string'
|
|
487
|
+
? JSON.parse(response.data.results[0].extracted_content)
|
|
488
|
+
: response.data.results[0].extracted_content;
|
|
489
|
+
return { success: true, data: data };
|
|
490
|
+
}
|
|
491
|
+
return { success: false, error: 'No content extracted' };
|
|
492
|
+
}
|
|
493
|
+
catch (error) {
|
|
494
|
+
return {
|
|
495
|
+
success: false,
|
|
496
|
+
error: error instanceof Error ? error.message : String(error)
|
|
497
|
+
};
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
// ========================================================================
|
|
501
|
+
// Advanced Markdown with Content Filters
|
|
502
|
+
// ========================================================================
|
|
503
|
+
/**
|
|
504
|
+
* Generate markdown with advanced content filtering (PruningFilter, BM25Filter).
|
|
505
|
+
* Filters out low-quality content based on thresholds and relevance.
|
|
506
|
+
*/
|
|
507
|
+
async generateFilteredMarkdown(url, filterOptions = {}) {
|
|
508
|
+
if (!this.available) {
|
|
509
|
+
const isHealthy = await this.checkHealth();
|
|
510
|
+
if (!isHealthy)
|
|
511
|
+
return { success: false, error: 'Crawl4AI not available' };
|
|
512
|
+
}
|
|
513
|
+
try {
|
|
514
|
+
// Build content filter config
|
|
515
|
+
const contentFilter = {
|
|
516
|
+
type: filterOptions.type === 'bm25' ? 'BM25ContentFilter' : 'PruningContentFilter',
|
|
517
|
+
params: {}
|
|
518
|
+
};
|
|
519
|
+
if (filterOptions.type === 'bm25') {
|
|
520
|
+
contentFilter.params = {
|
|
521
|
+
query: filterOptions.query || '',
|
|
522
|
+
threshold: filterOptions.threshold || 0.3
|
|
523
|
+
};
|
|
524
|
+
}
|
|
525
|
+
else {
|
|
526
|
+
// PruningContentFilter
|
|
527
|
+
contentFilter.params = {
|
|
528
|
+
threshold: filterOptions.threshold || 0.48,
|
|
529
|
+
threshold_type: filterOptions.thresholdType || 'fixed',
|
|
530
|
+
min_word_threshold: filterOptions.minWordThreshold || 10
|
|
531
|
+
};
|
|
532
|
+
}
|
|
533
|
+
const response = await axios.post(`${this.baseUrl}/crawl`, {
|
|
534
|
+
urls: [url],
|
|
535
|
+
crawler_config: {
|
|
536
|
+
type: 'CrawlerRunConfig',
|
|
537
|
+
params: {
|
|
538
|
+
markdown_generator: {
|
|
539
|
+
type: 'DefaultMarkdownGenerator',
|
|
540
|
+
params: {
|
|
541
|
+
content_filter: contentFilter
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
}, { timeout: this.timeout });
|
|
547
|
+
if (response.data?.results?.[0]) {
|
|
548
|
+
const result = response.data.results[0];
|
|
549
|
+
return {
|
|
550
|
+
success: true,
|
|
551
|
+
markdown: result.markdown,
|
|
552
|
+
filteredMarkdown: result.markdown?.fit_markdown
|
|
553
|
+
};
|
|
554
|
+
}
|
|
555
|
+
return { success: false, error: 'Markdown generation failed' };
|
|
556
|
+
}
|
|
557
|
+
catch (error) {
|
|
558
|
+
return {
|
|
559
|
+
success: false,
|
|
560
|
+
error: error instanceof Error ? error.message : String(error)
|
|
561
|
+
};
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
// ========================================================================
|
|
565
|
+
// Utility Methods
|
|
566
|
+
// ========================================================================
|
|
567
|
+
/**
|
|
568
|
+
* Check if the service is currently available.
|
|
569
|
+
*/
|
|
570
|
+
isAvailable() {
|
|
571
|
+
return this.available;
|
|
572
|
+
}
|
|
573
|
+
/**
|
|
574
|
+
* Get API schema/documentation.
|
|
575
|
+
*/
|
|
576
|
+
async getSchema() {
|
|
577
|
+
try {
|
|
578
|
+
const response = await axios.get(`${this.baseUrl}/schema`, { timeout: 5000 });
|
|
579
|
+
return response.data;
|
|
580
|
+
}
|
|
581
|
+
catch {
|
|
582
|
+
return null;
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
/**
|
|
586
|
+
* Get Prometheus metrics.
|
|
587
|
+
*/
|
|
588
|
+
async getMetrics() {
|
|
589
|
+
try {
|
|
590
|
+
const response = await axios.get(`${this.baseUrl}/metrics`, { timeout: 5000 });
|
|
591
|
+
return response.data;
|
|
592
|
+
}
|
|
593
|
+
catch {
|
|
594
|
+
return null;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
// ============================================================================
|
|
599
|
+
// Singleton Instance (Optional)
|
|
600
|
+
// ============================================================================
|
|
601
|
+
let sharedClient = null;
|
|
602
|
+
export function getSharedClient(url) {
|
|
603
|
+
if (!sharedClient) {
|
|
604
|
+
sharedClient = new Crawl4AIClient(url);
|
|
605
|
+
}
|
|
606
|
+
return sharedClient;
|
|
607
|
+
}
|
|
608
|
+
//# sourceMappingURL=crawl4ai-client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crawl4ai-client.js","sourceRoot":"","sources":["../src/crawl4ai-client.ts"],"names":[],"mappings":"AAAA,0CAA0C;AAC1C,OAAO,KAAK,MAAM,OAAO,CAAC;AA0B1B,+EAA+E;AAC/E,gBAAgB;AAChB,+EAA+E;AAE/E,MAAM,oBAAoB,GAAG,wBAAwB,CAAC;AACtD,MAAM,eAAe,GAAG,KAAK,CAAC,CAAC,aAAa;AAqJ5C,+EAA+E;AAC/E,kCAAkC;AAClC,+EAA+E;AAE/E,MAAM,OAAO,cAAc;IACjB,OAAO,CAAS;IAChB,OAAO,CAAS;IAChB,SAAS,GAAY,KAAK,CAAC;IAEnC,YAAY,UAAkB,oBAAoB,EAAE,UAAkB,eAAe;QACnF,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC,wBAAwB;QACnE,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,WAAW;QACf,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,OAAO,SAAS,EAAE;gBACzD,OAAO,EAAE,IAAI;aACd,CAAC,CAAC;YACH,IAAI,CAAC,SAAS,GAAG,QAAQ,CAAC,MAAM,KAAK,GAAG,CAAC;YACzC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;gBACnB,OAAO,CAAC,KAAK,CAAC,iCAAiC,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;YACjE,CAAC;YACD,OAAO,IAAI,CAAC,SAAS,CAAC;QACxB,CAAC;QAAC,MAAM,CAAC;YACP,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;YACvB,OAAO,CAAC,IAAI,CAAC,6DAA6D,CAAC,CAAC;YAC5E,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,2EAA2E;IAC3E,2BAA2B;IAC3B,2EAA2E;IAE3E;;;OAGG;IACH,KAAK,CAAC,UAAU,CAAC,GAAW,EAAE,MAAuB;QACnD,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YAC3C,IAAI,CAAC,SAAS;gBAAE,OAAO,IAAI,CAAC;QAC9B,CAAC;QAED,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,GAAG,IAAI,CAAC,OAAO,QAAQ,EACvB;gBACE,IAAI,EAAE,CAAC,GAAG,CAAC;gBACX,cAAc,EAAE;oBACd,IAAI,EAAE,eAAe;oBACrB,MAAM,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE;iBAC3B;gBACD,cAAc,EAAE;oBACd,IAAI,EAAE,kBAAkB;oBACxB,MAAM,EAAE;wBACN,UAAU,EAAE,QAAQ;wBACpB,UAAU,EAAE,MAAM,EAAE,aAAa,IAAI,KAAK;qBAC3C;iBACF;aACF,EACD,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAC1B,CAAC;YAEF,IAAI,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBAChC,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;gBACxC,OAAO;oBACL,OAAO,EAAE,MAAM,CAAC,OAAO,IAAI,KAAK;oBAChC,GAAG,EAAE,MAAM,CAAC,GAAG,IAAI,GAAG;oBACtB,QAAQ,EAAE,MAAM,CAAC,QAAQ;oBACzB,YAAY,EAAE,MAAM,CAAC,YAAY;oBACjC,KAAK,EAAE,MAAM,CAAC,KAAK;oBACnB,KAAK,EAAE,MAAM,CAAC,KAAK;wBACjB,CAAC,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,KAAK,CAAC,QAAQ,IAAI,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,KAAK,CAAC,QAAQ,IAAI,CAAC,EAAE;wBAChF,CAAC,CAAC,SAAS;oBACb,KAAK,EAAE,MAAM,CAAC,KAAK;wBACjB,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,MAAM,IAAI,EAAE,EAAE;wBACvC,CAAC,CAAC,SAAS;oBACb,KAAK,EAAE,MAAM,CAAC,KAAK;iBACpB,CAAC;YACJ,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,gCAAgC,GAAG,GAAG,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACtG,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,mBAAmB,CACvB,IAAc,EACd,MAAuB,EACvB,cAAsB,CAAC;QAEvB,MAAM,OAAO,GAAG,IAAI,GAAG,EAA4B,CAAC;QAEpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC;YAClD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC;YAC7C,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,UAAU,CACxC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,CAC/C,CAAC;YAEF,KAAK,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;gBACzB,MAAM,QAAQ,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;gBAChC,IAAI,QAAQ,CAAC,MAAM,KAAK,WAAW,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;oBACtD,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC;gBACnC,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,2EAA2E;IAC3E,+CAA+C;IAC/C,2EAA2E;IAE3E;;;OAGG;IACH,KAAK,CAAC,gBAAgB,CAAC,GAAW,EAAE,UAAiC,EAAE;QACrE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YAC3C,IAAI,CAAC,SAAS;gBAAE,OAAO,IAAI,CAAC;QAC9B,CAAC;QAED,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,GAAG,IAAI,CAAC,OAAO,KAAK,EACpB;gBACE,GAAG;gBACH,CAAC,EAAE,OAAO,CAAC,MAAM,IAAI,KAAK;gBAC1B,CAAC,EAAE,OAAO,CAAC,KAAK;gBAChB,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;aACnC,EACD,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAC1B,CAAC;YAEF,OAAO,QAAQ,CAAC,IAAI,EAAE,gBAAgB,IAAI,IAAI,CAAC;QACjD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,wCAAwC,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACxG,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,2EAA2E;IAC3E,qBAAqB;IACrB,2EAA2E;IAE3E;;;OAGG;IACH,KAAK,CAAC,iBAAiB,CAAC,GAAW,EAAE,UAA6B,EAAE;QAClE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YAC3C,IAAI,CAAC,SAAS;gBAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC;QAC7E,CAAC;QAED,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,GAAG,IAAI,CAAC,OAAO,aAAa,EAC5B;gBACE,GAAG;gBACH,mBAAmB,EAAE,OAAO,CAAC,OAAO,IAAI,CAAC;gBACzC,WAAW,EAAE,OAAO,CAAC,QAAQ;aAC9B,EACD,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAC1B,CAAC;YAEF,IAAI,QAAQ,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC;gBAC9B,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;YAC7D,CAAC;YACD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,IAAI,qBAAqB,EAAE,CAAC;QAClF,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2EAA2E;IAC3E,iBAAiB;IACjB,2EAA2E;IAE3E;;OAEG;IACH,KAAK,CAAC,WAAW,CAAC,GAAW,EAAE,UAAmB;QAChD,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YAC3C,IAAI,CAAC,SAAS;gBAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC;QAC7E,CAAC;QAED,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,GAAG,IAAI,CAAC,OAAO,MAAM,EACrB;gBACE,GAAG;gBACH,WAAW,EAAE,UAAU;aACxB,EACD,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAC1B,CAAC;YAEF,IAAI,QAAQ,CAAC,IAAI,EAAE,QAAQ,EAAE,CAAC;gBAC5B,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC3D,CAAC;YACD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,IAAI,eAAe,EAAE,CAAC;QAC5E,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2EAA2E;IAC3E,uBAAuB;IACvB,2EAA2E;IAE3E;;;OAGG;IACH,KAAK,CAAC,iBAAiB,CAAC,GAAW,EAAE,OAA2B;QAC9D,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YAC3C,IAAI,CAAC,SAAS;gBAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC;QAC7E,CAAC;QAED,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,GAAG,IAAI,CAAC,OAAO,aAAa,EAC5B;gBACE,GAAG;gBACH,OAAO,EAAE,OAAO,CAAC,OAAO;aACzB,EACD,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAC1B,CAAC;YAEF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,IAAI,EAAE,EAAE,CAAC;QAClE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2EAA2E;IAC3E,kCAAkC;IAClC,2EAA2E;IAE3E;;OAEG;IACH,KAAK,CAAC,cAAc,CAAC,GAAW,EAAE,MAA2B;QAC3D,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YAC3C,IAAI,CAAC,SAAS;gBAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC;QAC7E,CAAC;QAED,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,GAAG,IAAI,CAAC,OAAO,QAAQ,EACvB;gBACE,IAAI,EAAE,CAAC,GAAG,CAAC;gBACX,cAAc,EAAE;oBACd,IAAI,EAAE,kBAAkB;oBACxB,MAAM,EAAE;wBACN,mBAAmB,EAAE;4BACnB,IAAI,EAAE,2BAA2B;4BACjC,MAAM,EAAE;gCACN,MAAM,EAAE;oCACN,IAAI,EAAE,MAAM;oCACZ,KAAK,EAAE,MAAM;iCACd;6BACF;yBACF;qBACF;iBACF;aACF,EACD,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAC1B,CAAC;YAEF,IAAI,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACnD,MAAM,IAAI,GAAG,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,iBAAiB,KAAK,QAAQ;oBACzE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC;oBACxD,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC;gBAC/C,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YACjC,CAAC;YACD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,sBAAsB,EAAE,CAAC;QAC3D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2EAA2E;IAC3E,iCAAiC;IACjC,2EAA2E;IAE3E;;OAEG;IACH,KAAK,CAAC,OAAO,CAAC,GAAW;QACvB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YAC3C,IAAI,CAAC,SAAS;gBAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC;QAC7E,CAAC;QAED,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,GAAG,IAAI,CAAC,OAAO,OAAO,EACtB,EAAE,GAAG,EAAE,EACP,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAC1B,CAAC;YAEF,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,CAAC,IAAI,EAAE,YAAY,IAAI,EAAE,EAAE,CAAC;QACpE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2EAA2E;IAC3E,gBAAgB;IAChB,2EAA2E;IAE3E;;;OAGG;IACH,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,UAA4B,EAAE;QACzD,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YAC3C,IAAI,CAAC,SAAS;gBAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC;QAC7E,CAAC;QAED,IAAI,CAAC;YACH,mCAAmC;YACnC,MAAM,YAAY,GAAG,OAAO,CAAC,QAAQ,IAAI,KAAK,CAAC;YAC/C,MAAM,cAAc,GAA4B;gBAC9C,SAAS,EAAE,OAAO,CAAC,QAAQ,IAAI,CAAC;gBAChC,gBAAgB,EAAE,OAAO,CAAC,eAAe,IAAI,KAAK;gBAClD,SAAS,EAAE,OAAO,CAAC,QAAQ;aAC5B,CAAC;YAEF,+BAA+B;YAC/B,IAAI,YAAY,KAAK,YAAY,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;gBACvD,cAAc,CAAC,UAAU,GAAG;oBAC1B,IAAI,EAAE,wBAAwB;oBAC9B,QAAQ,EAAE,OAAO,CAAC,SAAS,CAAC,QAAQ;oBACpC,MAAM,EAAE,OAAO,CAAC,SAAS,CAAC,MAAM,IAAI,GAAG;iBACxC,CAAC;YACJ,CAAC;iBAAM,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;gBAClC,cAAc,CAAC,eAAe,GAAG,OAAO,CAAC,cAAc,CAAC;YAC1D,CAAC;YAED,+BAA+B;YAC/B,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;gBACxB,cAAc,CAAC,YAAY,GAAG;oBAC5B,IAAI,EAAE,aAAa;oBACnB,OAAO,EAAE,OAAO,CAAC,WAAW,CAAC,OAAO;iBACrC,CAAC;YACJ,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,GAAG,IAAI,CAAC,OAAO,QAAQ,EACvB;gBACE,IAAI,EAAE,CAAC,GAAG,CAAC;gBACX,cAAc,EAAE;oBACd,IAAI,EAAE,kBAAkB;oBACxB,MAAM,EAAE;wBACN,mBAAmB,EAAE;4BACnB,IAAI,EAAE,GAAG,YAAY,CAAC,WAAW,EAAE,mBAAmB;4BACtD,MAAM,EAAE,cAAc;yBACvB;wBACD,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,KAAK;qBAChC;iBACF;aACF,EACD,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC,gCAAgC;aAC/D,CAAC;YAEF,IAAI,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC;gBAC3B,MAAM,OAAO,GAAsB,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC;oBACxE,GAAG,EAAE,CAAC,CAAC,GAAG,IAAI,GAAG;oBACjB,KAAK,EAAE,CAAC,CAAC,QAAQ,EAAE,KAAK,IAAI,CAAC;oBAC7B,KAAK,EAAE,CAAC,CAAC,QAAQ,EAAE,KAAK;oBACxB,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,KAAK;oBAC3B,QAAQ,EAAE,CAAC,CAAC,QAAQ;oBACpB,KAAK,EAAE,CAAC,CAAC,KAAK;oBACd,KAAK,EAAE,CAAC,CAAC,KAAK;iBACf,CAAC,CAAC,CAAC;gBACJ,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;YACpC,CAAC;YAED,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,mBAAmB,EAAE,CAAC;QACxD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2EAA2E;IAC3E,oBAAoB;IACpB,2EAA2E;IAE3E;;;OAGG;IACH,KAAK,CAAC,aAAa,CAAC,GAAW,EAAE,KAAa,EAAE,UAAgC,EAAE;QAChF,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YAC3C,IAAI,CAAC,SAAS;gBAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC;QAC7G,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAA4B;gBACtC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,aAAa;gBAC3C,oBAAoB,EAAE,OAAO,CAAC,mBAAmB,IAAI,GAAG;gBACxD,SAAS,EAAE,OAAO,CAAC,QAAQ,IAAI,EAAE;gBACjC,WAAW,EAAE,OAAO,CAAC,SAAS,IAAI,CAAC;aACpC,CAAC;YAEF,IAAI,OAAO,CAAC,QAAQ,KAAK,WAAW,EAAE,CAAC;gBACrC,MAAM,CAAC,eAAe,GAAG,OAAO,CAAC,cAAc,IAAI,wCAAwC,CAAC;YAC9F,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,GAAG,IAAI,CAAC,OAAO,iBAAiB,EAChC;gBACE,GAAG;gBACH,KAAK;gBACL,MAAM;gBACN,UAAU,EAAE,OAAO,CAAC,SAAS;gBAC7B,UAAU,EAAE,OAAO,CAAC,SAAS;aAC9B,EACD,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,GAAG,EAAE,EAAE,CAAC,uCAAuC;aACvE,CAAC;YAEF,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;gBAClB,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,UAAU,EAAE,QAAQ,CAAC,IAAI,CAAC,UAAU,IAAI,CAAC;oBACzC,YAAY,EAAE,QAAQ,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC;oBAC9C,eAAe,EAAE,QAAQ,CAAC,IAAI,CAAC,gBAAgB;oBAC/C,OAAO,EAAE,QAAQ,CAAC,IAAI,CAAC,OAAO;iBAC/B,CAAC;YACJ,CAAC;YAED,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE,CAAC;QAC5F,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,UAAU,EAAE,CAAC;gBACb,YAAY,EAAE,CAAC;gBACf,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2EAA2E;IAC3E,mBAAmB;IACnB,2EAA2E;IAE3E;;;OAGG;IACH,KAAK,CAAC,gBAAgB,CAAC,GAAW,EAAE,OAA+B;QACjE,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YAC3C,IAAI,CAAC,SAAS;gBAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC;QAC7E,CAAC;QAED,IAAI,CAAC;YACH,gDAAgD;YAChD,MAAM,UAAU,GAA2B;gBACzC,KAAK,EAAE,OAAO;gBACd,UAAU,EAAE,WAAW;gBACvB,QAAQ,EAAE,SAAS;gBACnB,GAAG,EAAE,KAAK;gBACV,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,MAAM;gBACZ,QAAQ,EAAE,UAAU;gBACpB,UAAU,EAAE,YAAY;gBACxB,MAAM,EAAE,QAAQ;gBAChB,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,QAAQ;gBACjB,QAAQ,EAAE,SAAS;gBACnB,SAAS,EAAE,UAAU;gBACrB,SAAS,EAAE,UAAU;gBACrB,SAAS,EAAE,UAAU;gBACrB,cAAc,EAAE,eAAe;gBAC/B,OAAO,EAAE,SAAS;gBAClB,QAAQ,EAAE,SAAS;gBACnB,IAAI,EAAE,MAAM;gBACZ,WAAW,EAAE,YAAY;gBACzB,GAAG,EAAE,KAAK;aACX,CAAC;YAEF,IAAI,gBAAyC,CAAC;YAE9C,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;gBACnB,wBAAwB;gBACxB,gBAAgB,GAAG;oBACjB,IAAI,EAAE,yBAAyB;oBAC/B,MAAM,EAAE,OAAO,CAAC,MAAM;iBACvB,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,oBAAoB;gBACpB,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC;oBAC7C,CAAC,CAAC,OAAO,CAAC,OAAO;oBACjB,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,IAAI,KAAK,CAAC,CAAC;gBAE/B,MAAM,YAAY,GAAG,QAAQ;qBAC1B,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;qBAC/C,MAAM,CAAC,OAAO,CAAC,CAAC;gBAEnB,gBAAgB,GAAG;oBACjB,IAAI,EAAE,yBAAyB;oBAC/B,OAAO,EAAE,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK;iBAClE,CAAC;YACJ,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,GAAG,IAAI,CAAC,OAAO,QAAQ,EACvB;gBACE,IAAI,EAAE,CAAC,GAAG,CAAC;gBACX,cAAc,EAAE;oBACd,IAAI,EAAE,kBAAkB;oBACxB,MAAM,EAAE;wBACN,mBAAmB,EAAE,gBAAgB;qBACtC;iBACF;aACF,EACD,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAC1B,CAAC;YAEF,IAAI,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACnD,MAAM,IAAI,GAAG,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,iBAAiB,KAAK,QAAQ;oBACzE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC;oBACxD,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC;gBAC/C,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAiB,EAAE,CAAC;YACpD,CAAC;YAED,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,sBAAsB,EAAE,CAAC;QAC3D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2EAA2E;IAC3E,yCAAyC;IACzC,2EAA2E;IAE3E;;;OAGG;IACH,KAAK,CAAC,wBAAwB,CAC5B,GAAW,EACX,gBAAsC,EAAE;QAExC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YAC3C,IAAI,CAAC,SAAS;gBAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC;QAC7E,CAAC;QAED,IAAI,CAAC;YACH,8BAA8B;YAC9B,MAAM,aAAa,GAA4B;gBAC7C,IAAI,EAAE,aAAa,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,sBAAsB;gBAClF,MAAM,EAAE,EAAE;aACX,CAAC;YAEF,IAAI,aAAa,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAClC,aAAa,CAAC,MAAM,GAAG;oBACrB,KAAK,EAAE,aAAa,CAAC,KAAK,IAAI,EAAE;oBAChC,SAAS,EAAE,aAAa,CAAC,SAAS,IAAI,GAAG;iBAC1C,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,uBAAuB;gBACvB,aAAa,CAAC,MAAM,GAAG;oBACrB,SAAS,EAAE,aAAa,CAAC,SAAS,IAAI,IAAI;oBAC1C,cAAc,EAAE,aAAa,CAAC,aAAa,IAAI,OAAO;oBACtD,kBAAkB,EAAE,aAAa,CAAC,gBAAgB,IAAI,EAAE;iBACzD,CAAC;YACJ,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,GAAG,IAAI,CAAC,OAAO,QAAQ,EACvB;gBACE,IAAI,EAAE,CAAC,GAAG,CAAC;gBACX,cAAc,EAAE;oBACd,IAAI,EAAE,kBAAkB;oBACxB,MAAM,EAAE;wBACN,kBAAkB,EAAE;4BAClB,IAAI,EAAE,0BAA0B;4BAChC,MAAM,EAAE;gCACN,cAAc,EAAE,aAAa;6BAC9B;yBACF;qBACF;iBACF;aACF,EACD,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAC1B,CAAC;YAEF,IAAI,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBAChC,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;gBACxC,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;oBACzB,gBAAgB,EAAE,MAAM,CAAC,QAAQ,EAAE,YAAY;iBAChD,CAAC;YACJ,CAAC;YAED,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,4BAA4B,EAAE,CAAC;QACjE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,2EAA2E;IAC3E,kBAAkB;IAClB,2EAA2E;IAE3E;;OAEG;IACH,WAAW;QACT,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS;QACb,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,OAAO,SAAS,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;YAC9E,OAAO,QAAQ,CAAC,IAAI,CAAC;QACvB,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACd,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,OAAO,UAAU,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;YAC/E,OAAO,QAAQ,CAAC,IAAc,CAAC;QACjC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;CACF;AAED,+EAA+E;AAC/E,gCAAgC;AAChC,+EAA+E;AAE/E,IAAI,YAAY,GAA0B,IAAI,CAAC;AAE/C,MAAM,UAAU,eAAe,CAAC,GAAY;IAC1C,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,YAAY,GAAG,IAAI,cAAc,CAAC,GAAG,CAAC,CAAC;IACzC,CAAC;IACD,OAAO,YAAY,CAAC;AACtB,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
export interface SearchArgs {
|
|
3
|
+
query: string;
|
|
4
|
+
limit?: number;
|
|
5
|
+
rankingMode?: 'bm25' | 'tfidf' | 'hybrid';
|
|
6
|
+
freshness?: 'day' | 'week' | 'month';
|
|
7
|
+
}
|
|
8
|
+
export interface CrawlArgs {
|
|
9
|
+
urls: string[];
|
|
10
|
+
extractDepth?: 'basic' | 'advanced';
|
|
11
|
+
includeImages?: boolean;
|
|
12
|
+
waitForSelector?: string;
|
|
13
|
+
executeJavascript?: boolean;
|
|
14
|
+
timeout?: number;
|
|
15
|
+
}
|
|
16
|
+
export interface SearchAndCrawlArgs extends SearchArgs {
|
|
17
|
+
extractTopN?: number;
|
|
18
|
+
rerankAfterExtract?: boolean;
|
|
19
|
+
extractDepth?: 'basic' | 'advanced';
|
|
20
|
+
includeImages?: boolean;
|
|
21
|
+
}
|
|
22
|
+
export interface BulkCrawlArgs {
|
|
23
|
+
urls: string[];
|
|
24
|
+
concurrency?: number;
|
|
25
|
+
extractDepth?: 'basic' | 'advanced';
|
|
26
|
+
includeImages?: boolean;
|
|
27
|
+
timeout?: number;
|
|
28
|
+
continueOnError?: boolean;
|
|
29
|
+
}
|
|
30
|
+
export interface SearchWithFiltersArgs extends SearchArgs {
|
|
31
|
+
includeDomains?: string[];
|
|
32
|
+
excludeDomains?: string[];
|
|
33
|
+
site?: string;
|
|
34
|
+
fileType?: 'pdf' | 'doc' | 'xls' | 'ppt';
|
|
35
|
+
resultType?: 'web' | 'news' | 'videos' | 'images';
|
|
36
|
+
language?: string;
|
|
37
|
+
region?: string;
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=index.d.ts.map
|