@mendable/firecrawl 1.18.1 → 1.18.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +395 -24
- package/dist/index.d.cts +222 -5
- package/dist/index.d.ts +222 -5
- package/dist/index.js +395 -24
- package/package.json +1 -1
- package/src/index.ts +184 -6
package/dist/index.d.cts
CHANGED
|
@@ -61,6 +61,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
61
61
|
rawHtml?: string;
|
|
62
62
|
links?: string[];
|
|
63
63
|
extract?: T;
|
|
64
|
+
json?: T;
|
|
64
65
|
screenshot?: string;
|
|
65
66
|
metadata?: FirecrawlDocumentMetadata;
|
|
66
67
|
actions: ActionsSchema;
|
|
@@ -72,7 +73,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
72
73
|
* Defines the options and configurations available for scraping web content.
|
|
73
74
|
*/
|
|
74
75
|
interface CrawlScrapeOptions {
|
|
75
|
-
formats
|
|
76
|
+
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
76
77
|
headers?: Record<string, string>;
|
|
77
78
|
includeTags?: string[];
|
|
78
79
|
excludeTags?: string[];
|
|
@@ -86,6 +87,8 @@ interface CrawlScrapeOptions {
|
|
|
86
87
|
mobile?: boolean;
|
|
87
88
|
skipTlsVerification?: boolean;
|
|
88
89
|
removeBase64Images?: boolean;
|
|
90
|
+
blockAds?: boolean;
|
|
91
|
+
proxy?: "basic" | "stealth";
|
|
89
92
|
}
|
|
90
93
|
type Action = {
|
|
91
94
|
type: "wait";
|
|
@@ -119,6 +122,11 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
119
122
|
schema?: LLMSchema;
|
|
120
123
|
systemPrompt?: string;
|
|
121
124
|
};
|
|
125
|
+
jsonOptions?: {
|
|
126
|
+
prompt?: string;
|
|
127
|
+
schema?: LLMSchema;
|
|
128
|
+
systemPrompt?: string;
|
|
129
|
+
};
|
|
122
130
|
actions?: ActionsSchema;
|
|
123
131
|
}
|
|
124
132
|
interface ActionsResult {
|
|
@@ -150,6 +158,7 @@ interface CrawlParams {
|
|
|
150
158
|
url: string;
|
|
151
159
|
headers?: Record<string, string>;
|
|
152
160
|
metadata?: Record<string, string>;
|
|
161
|
+
events?: ["completed", "failed", "page", "started"][number][];
|
|
153
162
|
};
|
|
154
163
|
deduplicateSimilarURLs?: boolean;
|
|
155
164
|
ignoreQueryParameters?: boolean;
|
|
@@ -213,6 +222,7 @@ interface MapParams {
|
|
|
213
222
|
includeSubdomains?: boolean;
|
|
214
223
|
sitemapOnly?: boolean;
|
|
215
224
|
limit?: number;
|
|
225
|
+
timeout?: number;
|
|
216
226
|
}
|
|
217
227
|
/**
|
|
218
228
|
* Response interface for mapping operations.
|
|
@@ -232,7 +242,11 @@ interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
|
232
242
|
schema?: LLMSchema | object;
|
|
233
243
|
systemPrompt?: string;
|
|
234
244
|
allowExternalLinks?: boolean;
|
|
245
|
+
enableWebSearch?: boolean;
|
|
235
246
|
includeSubdomains?: boolean;
|
|
247
|
+
origin?: string;
|
|
248
|
+
showSources?: boolean;
|
|
249
|
+
scrapeOptions?: CrawlScrapeOptions;
|
|
236
250
|
}
|
|
237
251
|
/**
|
|
238
252
|
* Response interface for extracting information from URLs.
|
|
@@ -243,6 +257,7 @@ interface ExtractResponse<LLMSchema extends zt.ZodSchema = any> {
|
|
|
243
257
|
data: LLMSchema;
|
|
244
258
|
error?: string;
|
|
245
259
|
warning?: string;
|
|
260
|
+
sources?: string[];
|
|
246
261
|
}
|
|
247
262
|
/**
|
|
248
263
|
* Error response interface.
|
|
@@ -258,7 +273,8 @@ interface ErrorResponse {
|
|
|
258
273
|
*/
|
|
259
274
|
declare class FirecrawlError extends Error {
|
|
260
275
|
statusCode: number;
|
|
261
|
-
|
|
276
|
+
details?: any;
|
|
277
|
+
constructor(message: string, statusCode: number, details?: any);
|
|
262
278
|
}
|
|
263
279
|
/**
|
|
264
280
|
* Parameters for search operations.
|
|
@@ -285,6 +301,129 @@ interface SearchResponse {
|
|
|
285
301
|
warning?: string;
|
|
286
302
|
error?: string;
|
|
287
303
|
}
|
|
304
|
+
/**
|
|
305
|
+
* Response interface for crawl/batch scrape error monitoring.
|
|
306
|
+
*/
|
|
307
|
+
interface CrawlErrorsResponse {
|
|
308
|
+
/**
|
|
309
|
+
* Scrapes that errored out + error details
|
|
310
|
+
*/
|
|
311
|
+
errors: {
|
|
312
|
+
id: string;
|
|
313
|
+
timestamp?: string;
|
|
314
|
+
url: string;
|
|
315
|
+
error: string;
|
|
316
|
+
}[];
|
|
317
|
+
/**
|
|
318
|
+
* URLs blocked by robots.txt
|
|
319
|
+
*/
|
|
320
|
+
robotsBlocked: string[];
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Parameters for deep research operations.
|
|
324
|
+
* Defines options for conducting deep research on a topic.
|
|
325
|
+
*/
|
|
326
|
+
interface DeepResearchParams {
|
|
327
|
+
/**
|
|
328
|
+
* Maximum depth of research iterations (1-10)
|
|
329
|
+
* @default 7
|
|
330
|
+
*/
|
|
331
|
+
maxDepth?: number;
|
|
332
|
+
/**
|
|
333
|
+
* Time limit in seconds (30-300)
|
|
334
|
+
* @default 270
|
|
335
|
+
*/
|
|
336
|
+
timeLimit?: number;
|
|
337
|
+
/**
|
|
338
|
+
* Maximum number of URLs to analyze (1-1000)
|
|
339
|
+
* @default 20
|
|
340
|
+
*/
|
|
341
|
+
maxUrls?: number;
|
|
342
|
+
/**
|
|
343
|
+
* Experimental flag for streaming steps
|
|
344
|
+
*/
|
|
345
|
+
__experimental_streamSteps?: boolean;
|
|
346
|
+
}
|
|
347
|
+
/**
|
|
348
|
+
* Response interface for deep research operations.
|
|
349
|
+
*/
|
|
350
|
+
interface DeepResearchResponse {
|
|
351
|
+
success: boolean;
|
|
352
|
+
id: string;
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Status response interface for deep research operations.
|
|
356
|
+
*/
|
|
357
|
+
interface DeepResearchStatusResponse {
|
|
358
|
+
success: boolean;
|
|
359
|
+
data: {
|
|
360
|
+
findings: Array<{
|
|
361
|
+
text: string;
|
|
362
|
+
source: string;
|
|
363
|
+
}>;
|
|
364
|
+
finalAnalysis: string;
|
|
365
|
+
analysis: string;
|
|
366
|
+
completedSteps: number;
|
|
367
|
+
totalSteps: number;
|
|
368
|
+
};
|
|
369
|
+
status: "processing" | "completed" | "failed";
|
|
370
|
+
error?: string;
|
|
371
|
+
expiresAt: string;
|
|
372
|
+
currentDepth: number;
|
|
373
|
+
maxDepth: number;
|
|
374
|
+
activities: Array<{
|
|
375
|
+
type: string;
|
|
376
|
+
status: string;
|
|
377
|
+
message: string;
|
|
378
|
+
timestamp: string;
|
|
379
|
+
depth: number;
|
|
380
|
+
}>;
|
|
381
|
+
sources: Array<{
|
|
382
|
+
url: string;
|
|
383
|
+
title: string;
|
|
384
|
+
description: string;
|
|
385
|
+
}>;
|
|
386
|
+
summaries: string[];
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* Parameters for LLMs.txt generation operations.
|
|
390
|
+
*/
|
|
391
|
+
interface GenerateLLMsTextParams {
|
|
392
|
+
/**
|
|
393
|
+
* Maximum number of URLs to process (1-100)
|
|
394
|
+
* @default 10
|
|
395
|
+
*/
|
|
396
|
+
maxUrls?: number;
|
|
397
|
+
/**
|
|
398
|
+
* Whether to show the full LLMs-full.txt in the response
|
|
399
|
+
* @default false
|
|
400
|
+
*/
|
|
401
|
+
showFullText?: boolean;
|
|
402
|
+
/**
|
|
403
|
+
* Experimental flag for streaming
|
|
404
|
+
*/
|
|
405
|
+
__experimental_stream?: boolean;
|
|
406
|
+
}
|
|
407
|
+
/**
|
|
408
|
+
* Response interface for LLMs.txt generation operations.
|
|
409
|
+
*/
|
|
410
|
+
interface GenerateLLMsTextResponse {
|
|
411
|
+
success: boolean;
|
|
412
|
+
id: string;
|
|
413
|
+
}
|
|
414
|
+
/**
|
|
415
|
+
* Status response interface for LLMs.txt generation operations.
|
|
416
|
+
*/
|
|
417
|
+
interface GenerateLLMsTextStatusResponse {
|
|
418
|
+
success: boolean;
|
|
419
|
+
data: {
|
|
420
|
+
llmstxt: string;
|
|
421
|
+
llmsfulltxt?: string;
|
|
422
|
+
};
|
|
423
|
+
status: "processing" | "completed" | "failed";
|
|
424
|
+
error?: string;
|
|
425
|
+
expiresAt: string;
|
|
426
|
+
}
|
|
288
427
|
/**
|
|
289
428
|
* Main class for interacting with the Firecrawl API.
|
|
290
429
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -326,9 +465,18 @@ declare class FirecrawlApp {
|
|
|
326
465
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
327
466
|
* @param id - The ID of the crawl operation.
|
|
328
467
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
468
|
+
* @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
469
|
+
* @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
|
|
470
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
329
471
|
* @returns The response containing the job status.
|
|
330
472
|
*/
|
|
331
|
-
checkCrawlStatus(id?: string, getAllData?: boolean): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
473
|
+
checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
474
|
+
/**
|
|
475
|
+
* Returns information about crawl errors.
|
|
476
|
+
* @param id - The ID of the crawl operation.
|
|
477
|
+
* @returns Information about crawl errors.
|
|
478
|
+
*/
|
|
479
|
+
checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
332
480
|
/**
|
|
333
481
|
* Cancels a crawl job using the Firecrawl API.
|
|
334
482
|
* @param id - The ID of the crawl operation.
|
|
@@ -373,9 +521,18 @@ declare class FirecrawlApp {
|
|
|
373
521
|
* Checks the status of a batch scrape job using the Firecrawl API.
|
|
374
522
|
* @param id - The ID of the batch scrape operation.
|
|
375
523
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
524
|
+
* @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
525
|
+
* @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
|
|
526
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
376
527
|
* @returns The response containing the job status.
|
|
377
528
|
*/
|
|
378
|
-
checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
529
|
+
checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
530
|
+
/**
|
|
531
|
+
* Returns information about batch scrape errors.
|
|
532
|
+
* @param id - The ID of the batch scrape operation.
|
|
533
|
+
* @returns Information about batch scrape errors.
|
|
534
|
+
*/
|
|
535
|
+
checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
379
536
|
/**
|
|
380
537
|
* Extracts information from URLs using the Firecrawl API.
|
|
381
538
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
@@ -384,6 +541,20 @@ declare class FirecrawlApp {
|
|
|
384
541
|
* @returns The response from the extract operation.
|
|
385
542
|
*/
|
|
386
543
|
extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
544
|
+
/**
|
|
545
|
+
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
546
|
+
* @param url - The URL to extract data from.
|
|
547
|
+
* @param params - Additional parameters for the extract request.
|
|
548
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
549
|
+
* @returns The response from the extract operation.
|
|
550
|
+
*/
|
|
551
|
+
asyncExtract(urls: string[], params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
|
|
552
|
+
/**
|
|
553
|
+
* Retrieves the status of an extract job.
|
|
554
|
+
* @param jobId - The ID of the extract job.
|
|
555
|
+
* @returns The status of the extract job.
|
|
556
|
+
*/
|
|
557
|
+
getExtractStatus(jobId: string): Promise<any>;
|
|
387
558
|
/**
|
|
388
559
|
* Prepares the headers for an API request.
|
|
389
560
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
@@ -427,6 +598,52 @@ declare class FirecrawlApp {
|
|
|
427
598
|
* @param {string} action - The action being performed when the error occurred.
|
|
428
599
|
*/
|
|
429
600
|
handleError(response: AxiosResponse, action: string): void;
|
|
601
|
+
/**
|
|
602
|
+
* Initiates a deep research operation on a given topic and polls until completion.
|
|
603
|
+
* @param topic - The topic to research.
|
|
604
|
+
* @param params - Parameters for the deep research operation.
|
|
605
|
+
* @param onActivity - Optional callback to receive activity updates in real-time.
|
|
606
|
+
* @returns The final research results.
|
|
607
|
+
*/
|
|
608
|
+
__deepResearch(topic: string, params: DeepResearchParams, onActivity?: (activity: {
|
|
609
|
+
type: string;
|
|
610
|
+
status: string;
|
|
611
|
+
message: string;
|
|
612
|
+
timestamp: string;
|
|
613
|
+
depth: number;
|
|
614
|
+
}) => void): Promise<DeepResearchStatusResponse | ErrorResponse>;
|
|
615
|
+
/**
|
|
616
|
+
* Initiates a deep research operation on a given topic without polling.
|
|
617
|
+
* @param params - Parameters for the deep research operation.
|
|
618
|
+
* @returns The response containing the research job ID.
|
|
619
|
+
*/
|
|
620
|
+
__asyncDeepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse>;
|
|
621
|
+
/**
|
|
622
|
+
* Checks the status of a deep research operation.
|
|
623
|
+
* @param id - The ID of the deep research operation.
|
|
624
|
+
* @returns The current status and results of the research operation.
|
|
625
|
+
*/
|
|
626
|
+
__checkDeepResearchStatus(id: string): Promise<DeepResearchStatusResponse | ErrorResponse>;
|
|
627
|
+
/**
|
|
628
|
+
* Generates LLMs.txt for a given URL and polls until completion.
|
|
629
|
+
* @param url - The URL to generate LLMs.txt from.
|
|
630
|
+
* @param params - Parameters for the LLMs.txt generation operation.
|
|
631
|
+
* @returns The final generation results.
|
|
632
|
+
*/
|
|
633
|
+
generateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextStatusResponse | ErrorResponse>;
|
|
634
|
+
/**
|
|
635
|
+
* Initiates a LLMs.txt generation operation without polling.
|
|
636
|
+
* @param url - The URL to generate LLMs.txt from.
|
|
637
|
+
* @param params - Parameters for the LLMs.txt generation operation.
|
|
638
|
+
* @returns The response containing the generation job ID.
|
|
639
|
+
*/
|
|
640
|
+
asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse>;
|
|
641
|
+
/**
|
|
642
|
+
* Checks the status of a LLMs.txt generation operation.
|
|
643
|
+
* @param id - The ID of the LLMs.txt generation operation.
|
|
644
|
+
* @returns The current status and results of the generation operation.
|
|
645
|
+
*/
|
|
646
|
+
checkGenerateLLMsTextStatus(id: string): Promise<GenerateLLMsTextStatusResponse | ErrorResponse>;
|
|
430
647
|
}
|
|
431
648
|
interface CrawlWatcherEvents {
|
|
432
649
|
document: CustomEvent<FirecrawlDocument<undefined>>;
|
|
@@ -449,4 +666,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
449
666
|
close(): void;
|
|
450
667
|
}
|
|
451
668
|
|
|
452
|
-
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
|
669
|
+
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type DeepResearchParams, type DeepResearchResponse, type DeepResearchStatusResponse, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type GenerateLLMsTextParams, type GenerateLLMsTextResponse, type GenerateLLMsTextStatusResponse, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -61,6 +61,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
61
61
|
rawHtml?: string;
|
|
62
62
|
links?: string[];
|
|
63
63
|
extract?: T;
|
|
64
|
+
json?: T;
|
|
64
65
|
screenshot?: string;
|
|
65
66
|
metadata?: FirecrawlDocumentMetadata;
|
|
66
67
|
actions: ActionsSchema;
|
|
@@ -72,7 +73,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
72
73
|
* Defines the options and configurations available for scraping web content.
|
|
73
74
|
*/
|
|
74
75
|
interface CrawlScrapeOptions {
|
|
75
|
-
formats
|
|
76
|
+
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
76
77
|
headers?: Record<string, string>;
|
|
77
78
|
includeTags?: string[];
|
|
78
79
|
excludeTags?: string[];
|
|
@@ -86,6 +87,8 @@ interface CrawlScrapeOptions {
|
|
|
86
87
|
mobile?: boolean;
|
|
87
88
|
skipTlsVerification?: boolean;
|
|
88
89
|
removeBase64Images?: boolean;
|
|
90
|
+
blockAds?: boolean;
|
|
91
|
+
proxy?: "basic" | "stealth";
|
|
89
92
|
}
|
|
90
93
|
type Action = {
|
|
91
94
|
type: "wait";
|
|
@@ -119,6 +122,11 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
119
122
|
schema?: LLMSchema;
|
|
120
123
|
systemPrompt?: string;
|
|
121
124
|
};
|
|
125
|
+
jsonOptions?: {
|
|
126
|
+
prompt?: string;
|
|
127
|
+
schema?: LLMSchema;
|
|
128
|
+
systemPrompt?: string;
|
|
129
|
+
};
|
|
122
130
|
actions?: ActionsSchema;
|
|
123
131
|
}
|
|
124
132
|
interface ActionsResult {
|
|
@@ -150,6 +158,7 @@ interface CrawlParams {
|
|
|
150
158
|
url: string;
|
|
151
159
|
headers?: Record<string, string>;
|
|
152
160
|
metadata?: Record<string, string>;
|
|
161
|
+
events?: ["completed", "failed", "page", "started"][number][];
|
|
153
162
|
};
|
|
154
163
|
deduplicateSimilarURLs?: boolean;
|
|
155
164
|
ignoreQueryParameters?: boolean;
|
|
@@ -213,6 +222,7 @@ interface MapParams {
|
|
|
213
222
|
includeSubdomains?: boolean;
|
|
214
223
|
sitemapOnly?: boolean;
|
|
215
224
|
limit?: number;
|
|
225
|
+
timeout?: number;
|
|
216
226
|
}
|
|
217
227
|
/**
|
|
218
228
|
* Response interface for mapping operations.
|
|
@@ -232,7 +242,11 @@ interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
|
232
242
|
schema?: LLMSchema | object;
|
|
233
243
|
systemPrompt?: string;
|
|
234
244
|
allowExternalLinks?: boolean;
|
|
245
|
+
enableWebSearch?: boolean;
|
|
235
246
|
includeSubdomains?: boolean;
|
|
247
|
+
origin?: string;
|
|
248
|
+
showSources?: boolean;
|
|
249
|
+
scrapeOptions?: CrawlScrapeOptions;
|
|
236
250
|
}
|
|
237
251
|
/**
|
|
238
252
|
* Response interface for extracting information from URLs.
|
|
@@ -243,6 +257,7 @@ interface ExtractResponse<LLMSchema extends zt.ZodSchema = any> {
|
|
|
243
257
|
data: LLMSchema;
|
|
244
258
|
error?: string;
|
|
245
259
|
warning?: string;
|
|
260
|
+
sources?: string[];
|
|
246
261
|
}
|
|
247
262
|
/**
|
|
248
263
|
* Error response interface.
|
|
@@ -258,7 +273,8 @@ interface ErrorResponse {
|
|
|
258
273
|
*/
|
|
259
274
|
declare class FirecrawlError extends Error {
|
|
260
275
|
statusCode: number;
|
|
261
|
-
|
|
276
|
+
details?: any;
|
|
277
|
+
constructor(message: string, statusCode: number, details?: any);
|
|
262
278
|
}
|
|
263
279
|
/**
|
|
264
280
|
* Parameters for search operations.
|
|
@@ -285,6 +301,129 @@ interface SearchResponse {
|
|
|
285
301
|
warning?: string;
|
|
286
302
|
error?: string;
|
|
287
303
|
}
|
|
304
|
+
/**
|
|
305
|
+
* Response interface for crawl/batch scrape error monitoring.
|
|
306
|
+
*/
|
|
307
|
+
interface CrawlErrorsResponse {
|
|
308
|
+
/**
|
|
309
|
+
* Scrapes that errored out + error details
|
|
310
|
+
*/
|
|
311
|
+
errors: {
|
|
312
|
+
id: string;
|
|
313
|
+
timestamp?: string;
|
|
314
|
+
url: string;
|
|
315
|
+
error: string;
|
|
316
|
+
}[];
|
|
317
|
+
/**
|
|
318
|
+
* URLs blocked by robots.txt
|
|
319
|
+
*/
|
|
320
|
+
robotsBlocked: string[];
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Parameters for deep research operations.
|
|
324
|
+
* Defines options for conducting deep research on a topic.
|
|
325
|
+
*/
|
|
326
|
+
interface DeepResearchParams {
|
|
327
|
+
/**
|
|
328
|
+
* Maximum depth of research iterations (1-10)
|
|
329
|
+
* @default 7
|
|
330
|
+
*/
|
|
331
|
+
maxDepth?: number;
|
|
332
|
+
/**
|
|
333
|
+
* Time limit in seconds (30-300)
|
|
334
|
+
* @default 270
|
|
335
|
+
*/
|
|
336
|
+
timeLimit?: number;
|
|
337
|
+
/**
|
|
338
|
+
* Maximum number of URLs to analyze (1-1000)
|
|
339
|
+
* @default 20
|
|
340
|
+
*/
|
|
341
|
+
maxUrls?: number;
|
|
342
|
+
/**
|
|
343
|
+
* Experimental flag for streaming steps
|
|
344
|
+
*/
|
|
345
|
+
__experimental_streamSteps?: boolean;
|
|
346
|
+
}
|
|
347
|
+
/**
|
|
348
|
+
* Response interface for deep research operations.
|
|
349
|
+
*/
|
|
350
|
+
interface DeepResearchResponse {
|
|
351
|
+
success: boolean;
|
|
352
|
+
id: string;
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Status response interface for deep research operations.
|
|
356
|
+
*/
|
|
357
|
+
interface DeepResearchStatusResponse {
|
|
358
|
+
success: boolean;
|
|
359
|
+
data: {
|
|
360
|
+
findings: Array<{
|
|
361
|
+
text: string;
|
|
362
|
+
source: string;
|
|
363
|
+
}>;
|
|
364
|
+
finalAnalysis: string;
|
|
365
|
+
analysis: string;
|
|
366
|
+
completedSteps: number;
|
|
367
|
+
totalSteps: number;
|
|
368
|
+
};
|
|
369
|
+
status: "processing" | "completed" | "failed";
|
|
370
|
+
error?: string;
|
|
371
|
+
expiresAt: string;
|
|
372
|
+
currentDepth: number;
|
|
373
|
+
maxDepth: number;
|
|
374
|
+
activities: Array<{
|
|
375
|
+
type: string;
|
|
376
|
+
status: string;
|
|
377
|
+
message: string;
|
|
378
|
+
timestamp: string;
|
|
379
|
+
depth: number;
|
|
380
|
+
}>;
|
|
381
|
+
sources: Array<{
|
|
382
|
+
url: string;
|
|
383
|
+
title: string;
|
|
384
|
+
description: string;
|
|
385
|
+
}>;
|
|
386
|
+
summaries: string[];
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* Parameters for LLMs.txt generation operations.
|
|
390
|
+
*/
|
|
391
|
+
interface GenerateLLMsTextParams {
|
|
392
|
+
/**
|
|
393
|
+
* Maximum number of URLs to process (1-100)
|
|
394
|
+
* @default 10
|
|
395
|
+
*/
|
|
396
|
+
maxUrls?: number;
|
|
397
|
+
/**
|
|
398
|
+
* Whether to show the full LLMs-full.txt in the response
|
|
399
|
+
* @default false
|
|
400
|
+
*/
|
|
401
|
+
showFullText?: boolean;
|
|
402
|
+
/**
|
|
403
|
+
* Experimental flag for streaming
|
|
404
|
+
*/
|
|
405
|
+
__experimental_stream?: boolean;
|
|
406
|
+
}
|
|
407
|
+
/**
|
|
408
|
+
* Response interface for LLMs.txt generation operations.
|
|
409
|
+
*/
|
|
410
|
+
interface GenerateLLMsTextResponse {
|
|
411
|
+
success: boolean;
|
|
412
|
+
id: string;
|
|
413
|
+
}
|
|
414
|
+
/**
|
|
415
|
+
* Status response interface for LLMs.txt generation operations.
|
|
416
|
+
*/
|
|
417
|
+
interface GenerateLLMsTextStatusResponse {
|
|
418
|
+
success: boolean;
|
|
419
|
+
data: {
|
|
420
|
+
llmstxt: string;
|
|
421
|
+
llmsfulltxt?: string;
|
|
422
|
+
};
|
|
423
|
+
status: "processing" | "completed" | "failed";
|
|
424
|
+
error?: string;
|
|
425
|
+
expiresAt: string;
|
|
426
|
+
}
|
|
288
427
|
/**
|
|
289
428
|
* Main class for interacting with the Firecrawl API.
|
|
290
429
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -326,9 +465,18 @@ declare class FirecrawlApp {
|
|
|
326
465
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
327
466
|
* @param id - The ID of the crawl operation.
|
|
328
467
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
468
|
+
* @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
469
|
+
* @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
|
|
470
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
329
471
|
* @returns The response containing the job status.
|
|
330
472
|
*/
|
|
331
|
-
checkCrawlStatus(id?: string, getAllData?: boolean): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
473
|
+
checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
474
|
+
/**
|
|
475
|
+
* Returns information about crawl errors.
|
|
476
|
+
* @param id - The ID of the crawl operation.
|
|
477
|
+
* @returns Information about crawl errors.
|
|
478
|
+
*/
|
|
479
|
+
checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
332
480
|
/**
|
|
333
481
|
* Cancels a crawl job using the Firecrawl API.
|
|
334
482
|
* @param id - The ID of the crawl operation.
|
|
@@ -373,9 +521,18 @@ declare class FirecrawlApp {
|
|
|
373
521
|
* Checks the status of a batch scrape job using the Firecrawl API.
|
|
374
522
|
* @param id - The ID of the batch scrape operation.
|
|
375
523
|
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
524
|
+
* @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
|
|
525
|
+
* @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
|
|
526
|
+
* @param limit - How many entries to return. Only used when `getAllData = false`.
|
|
376
527
|
* @returns The response containing the job status.
|
|
377
528
|
*/
|
|
378
|
-
checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
529
|
+
checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
530
|
+
/**
|
|
531
|
+
* Returns information about batch scrape errors.
|
|
532
|
+
* @param id - The ID of the batch scrape operation.
|
|
533
|
+
* @returns Information about batch scrape errors.
|
|
534
|
+
*/
|
|
535
|
+
checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
379
536
|
/**
|
|
380
537
|
* Extracts information from URLs using the Firecrawl API.
|
|
381
538
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
@@ -384,6 +541,20 @@ declare class FirecrawlApp {
|
|
|
384
541
|
* @returns The response from the extract operation.
|
|
385
542
|
*/
|
|
386
543
|
extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
|
|
544
|
+
/**
|
|
545
|
+
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
546
|
+
* @param url - The URL to extract data from.
|
|
547
|
+
* @param params - Additional parameters for the extract request.
|
|
548
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
549
|
+
* @returns The response from the extract operation.
|
|
550
|
+
*/
|
|
551
|
+
asyncExtract(urls: string[], params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
|
|
552
|
+
/**
|
|
553
|
+
* Retrieves the status of an extract job.
|
|
554
|
+
* @param jobId - The ID of the extract job.
|
|
555
|
+
* @returns The status of the extract job.
|
|
556
|
+
*/
|
|
557
|
+
getExtractStatus(jobId: string): Promise<any>;
|
|
387
558
|
/**
|
|
388
559
|
* Prepares the headers for an API request.
|
|
389
560
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
@@ -427,6 +598,52 @@ declare class FirecrawlApp {
|
|
|
427
598
|
* @param {string} action - The action being performed when the error occurred.
|
|
428
599
|
*/
|
|
429
600
|
handleError(response: AxiosResponse, action: string): void;
|
|
601
|
+
/**
|
|
602
|
+
* Initiates a deep research operation on a given topic and polls until completion.
|
|
603
|
+
* @param topic - The topic to research.
|
|
604
|
+
* @param params - Parameters for the deep research operation.
|
|
605
|
+
* @param onActivity - Optional callback to receive activity updates in real-time.
|
|
606
|
+
* @returns The final research results.
|
|
607
|
+
*/
|
|
608
|
+
__deepResearch(topic: string, params: DeepResearchParams, onActivity?: (activity: {
|
|
609
|
+
type: string;
|
|
610
|
+
status: string;
|
|
611
|
+
message: string;
|
|
612
|
+
timestamp: string;
|
|
613
|
+
depth: number;
|
|
614
|
+
}) => void): Promise<DeepResearchStatusResponse | ErrorResponse>;
|
|
615
|
+
/**
|
|
616
|
+
* Initiates a deep research operation on a given topic without polling.
|
|
617
|
+
* @param params - Parameters for the deep research operation.
|
|
618
|
+
* @returns The response containing the research job ID.
|
|
619
|
+
*/
|
|
620
|
+
__asyncDeepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse>;
|
|
621
|
+
/**
|
|
622
|
+
* Checks the status of a deep research operation.
|
|
623
|
+
* @param id - The ID of the deep research operation.
|
|
624
|
+
* @returns The current status and results of the research operation.
|
|
625
|
+
*/
|
|
626
|
+
__checkDeepResearchStatus(id: string): Promise<DeepResearchStatusResponse | ErrorResponse>;
|
|
627
|
+
/**
|
|
628
|
+
* Generates LLMs.txt for a given URL and polls until completion.
|
|
629
|
+
* @param url - The URL to generate LLMs.txt from.
|
|
630
|
+
* @param params - Parameters for the LLMs.txt generation operation.
|
|
631
|
+
* @returns The final generation results.
|
|
632
|
+
*/
|
|
633
|
+
generateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextStatusResponse | ErrorResponse>;
|
|
634
|
+
/**
|
|
635
|
+
* Initiates a LLMs.txt generation operation without polling.
|
|
636
|
+
* @param url - The URL to generate LLMs.txt from.
|
|
637
|
+
* @param params - Parameters for the LLMs.txt generation operation.
|
|
638
|
+
* @returns The response containing the generation job ID.
|
|
639
|
+
*/
|
|
640
|
+
asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse>;
|
|
641
|
+
/**
|
|
642
|
+
* Checks the status of a LLMs.txt generation operation.
|
|
643
|
+
* @param id - The ID of the LLMs.txt generation operation.
|
|
644
|
+
* @returns The current status and results of the generation operation.
|
|
645
|
+
*/
|
|
646
|
+
checkGenerateLLMsTextStatus(id: string): Promise<GenerateLLMsTextStatusResponse | ErrorResponse>;
|
|
430
647
|
}
|
|
431
648
|
interface CrawlWatcherEvents {
|
|
432
649
|
document: CustomEvent<FirecrawlDocument<undefined>>;
|
|
@@ -449,4 +666,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
449
666
|
close(): void;
|
|
450
667
|
}
|
|
451
668
|
|
|
452
|
-
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
|
669
|
+
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type DeepResearchParams, type DeepResearchResponse, type DeepResearchStatusResponse, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type GenerateLLMsTextParams, type GenerateLLMsTextResponse, type GenerateLLMsTextStatusResponse, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|