firecrawl 1.14.1 → 1.15.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +72 -0
- package/dist/index.d.cts +38 -2
- package/dist/index.d.ts +38 -2
- package/dist/index.js +72 -0
- package/package.json +1 -1
- package/src/index.ts +107 -1
package/dist/index.cjs
CHANGED
|
@@ -93,6 +93,20 @@ var FirecrawlApp = class {
|
|
|
93
93
|
}
|
|
94
94
|
};
|
|
95
95
|
}
|
|
96
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
97
|
+
let schema = jsonData.jsonOptions.schema;
|
|
98
|
+
try {
|
|
99
|
+
schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
|
|
100
|
+
} catch (error) {
|
|
101
|
+
}
|
|
102
|
+
jsonData = {
|
|
103
|
+
...jsonData,
|
|
104
|
+
jsonOptions: {
|
|
105
|
+
...jsonData.jsonOptions,
|
|
106
|
+
schema
|
|
107
|
+
}
|
|
108
|
+
};
|
|
109
|
+
}
|
|
96
110
|
try {
|
|
97
111
|
const response = await import_axios.default.post(
|
|
98
112
|
this.apiUrl + `/v1/scrape`,
|
|
@@ -314,6 +328,28 @@ var FirecrawlApp = class {
|
|
|
314
328
|
}
|
|
315
329
|
return { success: false, error: "Internal server error." };
|
|
316
330
|
}
|
|
331
|
+
/**
|
|
332
|
+
* Returns information about crawl errors.
|
|
333
|
+
* @param id - The ID of the crawl operation.
|
|
334
|
+
* @returns Information about crawl errors.
|
|
335
|
+
*/
|
|
336
|
+
async checkCrawlErrors(id) {
|
|
337
|
+
const headers = this.prepareHeaders();
|
|
338
|
+
try {
|
|
339
|
+
const response = await this.deleteRequest(
|
|
340
|
+
`${this.apiUrl}/v1/crawl/${id}/errors`,
|
|
341
|
+
headers
|
|
342
|
+
);
|
|
343
|
+
if (response.status === 200) {
|
|
344
|
+
return response.data;
|
|
345
|
+
} else {
|
|
346
|
+
this.handleError(response, "check crawl errors");
|
|
347
|
+
}
|
|
348
|
+
} catch (error) {
|
|
349
|
+
throw new FirecrawlError(error.message, 500);
|
|
350
|
+
}
|
|
351
|
+
return { success: false, error: "Internal server error." };
|
|
352
|
+
}
|
|
317
353
|
/**
|
|
318
354
|
* Cancels a crawl job using the Firecrawl API.
|
|
319
355
|
* @param id - The ID of the crawl operation.
|
|
@@ -402,6 +438,20 @@ var FirecrawlApp = class {
|
|
|
402
438
|
}
|
|
403
439
|
};
|
|
404
440
|
}
|
|
441
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
442
|
+
let schema = jsonData.jsonOptions.schema;
|
|
443
|
+
try {
|
|
444
|
+
schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
|
|
445
|
+
} catch (error) {
|
|
446
|
+
}
|
|
447
|
+
jsonData = {
|
|
448
|
+
...jsonData,
|
|
449
|
+
jsonOptions: {
|
|
450
|
+
...jsonData.jsonOptions,
|
|
451
|
+
schema
|
|
452
|
+
}
|
|
453
|
+
};
|
|
454
|
+
}
|
|
405
455
|
try {
|
|
406
456
|
const response = await this.postRequest(
|
|
407
457
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -532,6 +582,28 @@ var FirecrawlApp = class {
|
|
|
532
582
|
}
|
|
533
583
|
return { success: false, error: "Internal server error." };
|
|
534
584
|
}
|
|
585
|
+
/**
|
|
586
|
+
* Returns information about batch scrape errors.
|
|
587
|
+
* @param id - The ID of the batch scrape operation.
|
|
588
|
+
* @returns Information about batch scrape errors.
|
|
589
|
+
*/
|
|
590
|
+
async checkBatchScrapeErrors(id) {
|
|
591
|
+
const headers = this.prepareHeaders();
|
|
592
|
+
try {
|
|
593
|
+
const response = await this.deleteRequest(
|
|
594
|
+
`${this.apiUrl}/v1/batch/scrape/${id}/errors`,
|
|
595
|
+
headers
|
|
596
|
+
);
|
|
597
|
+
if (response.status === 200) {
|
|
598
|
+
return response.data;
|
|
599
|
+
} else {
|
|
600
|
+
this.handleError(response, "check batch scrape errors");
|
|
601
|
+
}
|
|
602
|
+
} catch (error) {
|
|
603
|
+
throw new FirecrawlError(error.message, 500);
|
|
604
|
+
}
|
|
605
|
+
return { success: false, error: "Internal server error." };
|
|
606
|
+
}
|
|
535
607
|
/**
|
|
536
608
|
* Extracts information from URLs using the Firecrawl API.
|
|
537
609
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
package/dist/index.d.cts
CHANGED
|
@@ -61,6 +61,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
61
61
|
rawHtml?: string;
|
|
62
62
|
links?: string[];
|
|
63
63
|
extract?: T;
|
|
64
|
+
json?: T;
|
|
64
65
|
screenshot?: string;
|
|
65
66
|
metadata?: FirecrawlDocumentMetadata;
|
|
66
67
|
actions: ActionsSchema;
|
|
@@ -72,7 +73,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
72
73
|
* Defines the options and configurations available for scraping web content.
|
|
73
74
|
*/
|
|
74
75
|
interface CrawlScrapeOptions {
|
|
75
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
|
|
76
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
76
77
|
headers?: Record<string, string>;
|
|
77
78
|
includeTags?: string[];
|
|
78
79
|
excludeTags?: string[];
|
|
@@ -119,6 +120,11 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
119
120
|
schema?: LLMSchema;
|
|
120
121
|
systemPrompt?: string;
|
|
121
122
|
};
|
|
123
|
+
jsonOptions?: {
|
|
124
|
+
prompt?: string;
|
|
125
|
+
schema?: LLMSchema;
|
|
126
|
+
systemPrompt?: string;
|
|
127
|
+
};
|
|
122
128
|
actions?: ActionsSchema;
|
|
123
129
|
}
|
|
124
130
|
interface ActionsResult {
|
|
@@ -286,6 +292,24 @@ interface SearchResponse {
|
|
|
286
292
|
warning?: string;
|
|
287
293
|
error?: string;
|
|
288
294
|
}
|
|
295
|
+
/**
|
|
296
|
+
* Response interface for crawl/batch scrape error monitoring.
|
|
297
|
+
*/
|
|
298
|
+
interface CrawlErrorsResponse {
|
|
299
|
+
/**
|
|
300
|
+
* Scrapes that errored out + error details
|
|
301
|
+
*/
|
|
302
|
+
errors: {
|
|
303
|
+
id: string;
|
|
304
|
+
timestamp?: string;
|
|
305
|
+
url: string;
|
|
306
|
+
error: string;
|
|
307
|
+
}[];
|
|
308
|
+
/**
|
|
309
|
+
* URLs blocked by robots.txt
|
|
310
|
+
*/
|
|
311
|
+
robotsBlocked: string[];
|
|
312
|
+
}
|
|
289
313
|
/**
|
|
290
314
|
* Main class for interacting with the Firecrawl API.
|
|
291
315
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -333,6 +357,12 @@ declare class FirecrawlApp {
|
|
|
333
357
|
* @returns The response containing the job status.
|
|
334
358
|
*/
|
|
335
359
|
checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
360
|
+
/**
|
|
361
|
+
* Returns information about crawl errors.
|
|
362
|
+
* @param id - The ID of the crawl operation.
|
|
363
|
+
* @returns Information about crawl errors.
|
|
364
|
+
*/
|
|
365
|
+
checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
336
366
|
/**
|
|
337
367
|
* Cancels a crawl job using the Firecrawl API.
|
|
338
368
|
* @param id - The ID of the crawl operation.
|
|
@@ -383,6 +413,12 @@ declare class FirecrawlApp {
|
|
|
383
413
|
* @returns The response containing the job status.
|
|
384
414
|
*/
|
|
385
415
|
checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
416
|
+
/**
|
|
417
|
+
* Returns information about batch scrape errors.
|
|
418
|
+
* @param id - The ID of the batch scrape operation.
|
|
419
|
+
* @returns Information about batch scrape errors.
|
|
420
|
+
*/
|
|
421
|
+
checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
386
422
|
/**
|
|
387
423
|
* Extracts information from URLs using the Firecrawl API.
|
|
388
424
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
@@ -470,4 +506,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
470
506
|
close(): void;
|
|
471
507
|
}
|
|
472
508
|
|
|
473
|
-
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
|
509
|
+
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -61,6 +61,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
61
61
|
rawHtml?: string;
|
|
62
62
|
links?: string[];
|
|
63
63
|
extract?: T;
|
|
64
|
+
json?: T;
|
|
64
65
|
screenshot?: string;
|
|
65
66
|
metadata?: FirecrawlDocumentMetadata;
|
|
66
67
|
actions: ActionsSchema;
|
|
@@ -72,7 +73,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
72
73
|
* Defines the options and configurations available for scraping web content.
|
|
73
74
|
*/
|
|
74
75
|
interface CrawlScrapeOptions {
|
|
75
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
|
|
76
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
76
77
|
headers?: Record<string, string>;
|
|
77
78
|
includeTags?: string[];
|
|
78
79
|
excludeTags?: string[];
|
|
@@ -119,6 +120,11 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
119
120
|
schema?: LLMSchema;
|
|
120
121
|
systemPrompt?: string;
|
|
121
122
|
};
|
|
123
|
+
jsonOptions?: {
|
|
124
|
+
prompt?: string;
|
|
125
|
+
schema?: LLMSchema;
|
|
126
|
+
systemPrompt?: string;
|
|
127
|
+
};
|
|
122
128
|
actions?: ActionsSchema;
|
|
123
129
|
}
|
|
124
130
|
interface ActionsResult {
|
|
@@ -286,6 +292,24 @@ interface SearchResponse {
|
|
|
286
292
|
warning?: string;
|
|
287
293
|
error?: string;
|
|
288
294
|
}
|
|
295
|
+
/**
|
|
296
|
+
* Response interface for crawl/batch scrape error monitoring.
|
|
297
|
+
*/
|
|
298
|
+
interface CrawlErrorsResponse {
|
|
299
|
+
/**
|
|
300
|
+
* Scrapes that errored out + error details
|
|
301
|
+
*/
|
|
302
|
+
errors: {
|
|
303
|
+
id: string;
|
|
304
|
+
timestamp?: string;
|
|
305
|
+
url: string;
|
|
306
|
+
error: string;
|
|
307
|
+
}[];
|
|
308
|
+
/**
|
|
309
|
+
* URLs blocked by robots.txt
|
|
310
|
+
*/
|
|
311
|
+
robotsBlocked: string[];
|
|
312
|
+
}
|
|
289
313
|
/**
|
|
290
314
|
* Main class for interacting with the Firecrawl API.
|
|
291
315
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -333,6 +357,12 @@ declare class FirecrawlApp {
|
|
|
333
357
|
* @returns The response containing the job status.
|
|
334
358
|
*/
|
|
335
359
|
checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
360
|
+
/**
|
|
361
|
+
* Returns information about crawl errors.
|
|
362
|
+
* @param id - The ID of the crawl operation.
|
|
363
|
+
* @returns Information about crawl errors.
|
|
364
|
+
*/
|
|
365
|
+
checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
336
366
|
/**
|
|
337
367
|
* Cancels a crawl job using the Firecrawl API.
|
|
338
368
|
* @param id - The ID of the crawl operation.
|
|
@@ -383,6 +413,12 @@ declare class FirecrawlApp {
|
|
|
383
413
|
* @returns The response containing the job status.
|
|
384
414
|
*/
|
|
385
415
|
checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
416
|
+
/**
|
|
417
|
+
* Returns information about batch scrape errors.
|
|
418
|
+
* @param id - The ID of the batch scrape operation.
|
|
419
|
+
* @returns Information about batch scrape errors.
|
|
420
|
+
*/
|
|
421
|
+
checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
386
422
|
/**
|
|
387
423
|
* Extracts information from URLs using the Firecrawl API.
|
|
388
424
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
@@ -470,4 +506,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
470
506
|
close(): void;
|
|
471
507
|
}
|
|
472
508
|
|
|
473
|
-
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
|
509
|
+
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
package/dist/index.js
CHANGED
|
@@ -57,6 +57,20 @@ var FirecrawlApp = class {
|
|
|
57
57
|
}
|
|
58
58
|
};
|
|
59
59
|
}
|
|
60
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
61
|
+
let schema = jsonData.jsonOptions.schema;
|
|
62
|
+
try {
|
|
63
|
+
schema = zodToJsonSchema(schema);
|
|
64
|
+
} catch (error) {
|
|
65
|
+
}
|
|
66
|
+
jsonData = {
|
|
67
|
+
...jsonData,
|
|
68
|
+
jsonOptions: {
|
|
69
|
+
...jsonData.jsonOptions,
|
|
70
|
+
schema
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
}
|
|
60
74
|
try {
|
|
61
75
|
const response = await axios.post(
|
|
62
76
|
this.apiUrl + `/v1/scrape`,
|
|
@@ -278,6 +292,28 @@ var FirecrawlApp = class {
|
|
|
278
292
|
}
|
|
279
293
|
return { success: false, error: "Internal server error." };
|
|
280
294
|
}
|
|
295
|
+
/**
|
|
296
|
+
* Returns information about crawl errors.
|
|
297
|
+
* @param id - The ID of the crawl operation.
|
|
298
|
+
* @returns Information about crawl errors.
|
|
299
|
+
*/
|
|
300
|
+
async checkCrawlErrors(id) {
|
|
301
|
+
const headers = this.prepareHeaders();
|
|
302
|
+
try {
|
|
303
|
+
const response = await this.deleteRequest(
|
|
304
|
+
`${this.apiUrl}/v1/crawl/${id}/errors`,
|
|
305
|
+
headers
|
|
306
|
+
);
|
|
307
|
+
if (response.status === 200) {
|
|
308
|
+
return response.data;
|
|
309
|
+
} else {
|
|
310
|
+
this.handleError(response, "check crawl errors");
|
|
311
|
+
}
|
|
312
|
+
} catch (error) {
|
|
313
|
+
throw new FirecrawlError(error.message, 500);
|
|
314
|
+
}
|
|
315
|
+
return { success: false, error: "Internal server error." };
|
|
316
|
+
}
|
|
281
317
|
/**
|
|
282
318
|
* Cancels a crawl job using the Firecrawl API.
|
|
283
319
|
* @param id - The ID of the crawl operation.
|
|
@@ -366,6 +402,20 @@ var FirecrawlApp = class {
|
|
|
366
402
|
}
|
|
367
403
|
};
|
|
368
404
|
}
|
|
405
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
406
|
+
let schema = jsonData.jsonOptions.schema;
|
|
407
|
+
try {
|
|
408
|
+
schema = zodToJsonSchema(schema);
|
|
409
|
+
} catch (error) {
|
|
410
|
+
}
|
|
411
|
+
jsonData = {
|
|
412
|
+
...jsonData,
|
|
413
|
+
jsonOptions: {
|
|
414
|
+
...jsonData.jsonOptions,
|
|
415
|
+
schema
|
|
416
|
+
}
|
|
417
|
+
};
|
|
418
|
+
}
|
|
369
419
|
try {
|
|
370
420
|
const response = await this.postRequest(
|
|
371
421
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -496,6 +546,28 @@ var FirecrawlApp = class {
|
|
|
496
546
|
}
|
|
497
547
|
return { success: false, error: "Internal server error." };
|
|
498
548
|
}
|
|
549
|
+
/**
|
|
550
|
+
* Returns information about batch scrape errors.
|
|
551
|
+
* @param id - The ID of the batch scrape operation.
|
|
552
|
+
* @returns Information about batch scrape errors.
|
|
553
|
+
*/
|
|
554
|
+
async checkBatchScrapeErrors(id) {
|
|
555
|
+
const headers = this.prepareHeaders();
|
|
556
|
+
try {
|
|
557
|
+
const response = await this.deleteRequest(
|
|
558
|
+
`${this.apiUrl}/v1/batch/scrape/${id}/errors`,
|
|
559
|
+
headers
|
|
560
|
+
);
|
|
561
|
+
if (response.status === 200) {
|
|
562
|
+
return response.data;
|
|
563
|
+
} else {
|
|
564
|
+
this.handleError(response, "check batch scrape errors");
|
|
565
|
+
}
|
|
566
|
+
} catch (error) {
|
|
567
|
+
throw new FirecrawlError(error.message, 500);
|
|
568
|
+
}
|
|
569
|
+
return { success: false, error: "Internal server error." };
|
|
570
|
+
}
|
|
499
571
|
/**
|
|
500
572
|
* Extracts information from URLs using the Firecrawl API.
|
|
501
573
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -65,6 +65,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
65
65
|
rawHtml?: string;
|
|
66
66
|
links?: string[];
|
|
67
67
|
extract?: T;
|
|
68
|
+
json?: T;
|
|
68
69
|
screenshot?: string;
|
|
69
70
|
metadata?: FirecrawlDocumentMetadata;
|
|
70
71
|
actions: ActionsSchema;
|
|
@@ -78,7 +79,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
78
79
|
* Defines the options and configurations available for scraping web content.
|
|
79
80
|
*/
|
|
80
81
|
export interface CrawlScrapeOptions {
|
|
81
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
|
|
82
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
82
83
|
headers?: Record<string, string>;
|
|
83
84
|
includeTags?: string[];
|
|
84
85
|
excludeTags?: string[];
|
|
@@ -127,6 +128,11 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
|
|
|
127
128
|
schema?: LLMSchema;
|
|
128
129
|
systemPrompt?: string;
|
|
129
130
|
};
|
|
131
|
+
jsonOptions?:{
|
|
132
|
+
prompt?: string;
|
|
133
|
+
schema?: LLMSchema;
|
|
134
|
+
systemPrompt?: string;
|
|
135
|
+
}
|
|
130
136
|
actions?: ActionsSchema;
|
|
131
137
|
}
|
|
132
138
|
|
|
@@ -314,6 +320,26 @@ export interface SearchResponse {
|
|
|
314
320
|
error?: string;
|
|
315
321
|
}
|
|
316
322
|
|
|
323
|
+
/**
|
|
324
|
+
* Response interface for crawl/batch scrape error monitoring.
|
|
325
|
+
*/
|
|
326
|
+
export interface CrawlErrorsResponse {
|
|
327
|
+
/**
|
|
328
|
+
* Scrapes that errored out + error details
|
|
329
|
+
*/
|
|
330
|
+
errors: {
|
|
331
|
+
id: string,
|
|
332
|
+
timestamp?: string,
|
|
333
|
+
url: string,
|
|
334
|
+
error: string,
|
|
335
|
+
}[];
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* URLs blocked by robots.txt
|
|
339
|
+
*/
|
|
340
|
+
robotsBlocked: string[];
|
|
341
|
+
};
|
|
342
|
+
|
|
317
343
|
/**
|
|
318
344
|
* Main class for interacting with the Firecrawl API.
|
|
319
345
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -373,6 +399,23 @@ export default class FirecrawlApp {
|
|
|
373
399
|
},
|
|
374
400
|
};
|
|
375
401
|
}
|
|
402
|
+
|
|
403
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
404
|
+
let schema = jsonData.jsonOptions.schema;
|
|
405
|
+
// Try parsing the schema as a Zod schema
|
|
406
|
+
try {
|
|
407
|
+
schema = zodToJsonSchema(schema);
|
|
408
|
+
} catch (error) {
|
|
409
|
+
|
|
410
|
+
}
|
|
411
|
+
jsonData = {
|
|
412
|
+
...jsonData,
|
|
413
|
+
jsonOptions: {
|
|
414
|
+
...jsonData.jsonOptions,
|
|
415
|
+
schema: schema,
|
|
416
|
+
},
|
|
417
|
+
};
|
|
418
|
+
}
|
|
376
419
|
try {
|
|
377
420
|
const response: AxiosResponse = await axios.post(
|
|
378
421
|
this.apiUrl + `/v1/scrape`,
|
|
@@ -621,6 +664,29 @@ export default class FirecrawlApp {
|
|
|
621
664
|
return { success: false, error: "Internal server error." };
|
|
622
665
|
}
|
|
623
666
|
|
|
667
|
+
/**
|
|
668
|
+
* Returns information about crawl errors.
|
|
669
|
+
* @param id - The ID of the crawl operation.
|
|
670
|
+
* @returns Information about crawl errors.
|
|
671
|
+
*/
|
|
672
|
+
async checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
|
|
673
|
+
const headers = this.prepareHeaders();
|
|
674
|
+
try {
|
|
675
|
+
const response: AxiosResponse = await this.deleteRequest(
|
|
676
|
+
`${this.apiUrl}/v1/crawl/${id}/errors`,
|
|
677
|
+
headers
|
|
678
|
+
);
|
|
679
|
+
if (response.status === 200) {
|
|
680
|
+
return response.data;
|
|
681
|
+
} else {
|
|
682
|
+
this.handleError(response, "check crawl errors");
|
|
683
|
+
}
|
|
684
|
+
} catch (error: any) {
|
|
685
|
+
throw new FirecrawlError(error.message, 500);
|
|
686
|
+
}
|
|
687
|
+
return { success: false, error: "Internal server error." };
|
|
688
|
+
}
|
|
689
|
+
|
|
624
690
|
/**
|
|
625
691
|
* Cancels a crawl job using the Firecrawl API.
|
|
626
692
|
* @param id - The ID of the crawl operation.
|
|
@@ -729,6 +795,23 @@ export default class FirecrawlApp {
|
|
|
729
795
|
},
|
|
730
796
|
};
|
|
731
797
|
}
|
|
798
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
799
|
+
let schema = jsonData.jsonOptions.schema;
|
|
800
|
+
|
|
801
|
+
// Try parsing the schema as a Zod schema
|
|
802
|
+
try {
|
|
803
|
+
schema = zodToJsonSchema(schema);
|
|
804
|
+
} catch (error) {
|
|
805
|
+
|
|
806
|
+
}
|
|
807
|
+
jsonData = {
|
|
808
|
+
...jsonData,
|
|
809
|
+
jsonOptions: {
|
|
810
|
+
...jsonData.jsonOptions,
|
|
811
|
+
schema: schema,
|
|
812
|
+
},
|
|
813
|
+
};
|
|
814
|
+
}
|
|
732
815
|
try {
|
|
733
816
|
const response: AxiosResponse = await this.postRequest(
|
|
734
817
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -883,6 +966,29 @@ export default class FirecrawlApp {
|
|
|
883
966
|
return { success: false, error: "Internal server error." };
|
|
884
967
|
}
|
|
885
968
|
|
|
969
|
+
/**
|
|
970
|
+
* Returns information about batch scrape errors.
|
|
971
|
+
* @param id - The ID of the batch scrape operation.
|
|
972
|
+
* @returns Information about batch scrape errors.
|
|
973
|
+
*/
|
|
974
|
+
async checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
|
|
975
|
+
const headers = this.prepareHeaders();
|
|
976
|
+
try {
|
|
977
|
+
const response: AxiosResponse = await this.deleteRequest(
|
|
978
|
+
`${this.apiUrl}/v1/batch/scrape/${id}/errors`,
|
|
979
|
+
headers
|
|
980
|
+
);
|
|
981
|
+
if (response.status === 200) {
|
|
982
|
+
return response.data;
|
|
983
|
+
} else {
|
|
984
|
+
this.handleError(response, "check batch scrape errors");
|
|
985
|
+
}
|
|
986
|
+
} catch (error: any) {
|
|
987
|
+
throw new FirecrawlError(error.message, 500);
|
|
988
|
+
}
|
|
989
|
+
return { success: false, error: "Internal server error." };
|
|
990
|
+
}
|
|
991
|
+
|
|
886
992
|
/**
|
|
887
993
|
* Extracts information from URLs using the Firecrawl API.
|
|
888
994
|
* Currently in Beta. Expect breaking changes on future minor versions.
|