firecrawl 1.14.1 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +72 -0
- package/dist/index.d.cts +37 -2
- package/dist/index.d.ts +37 -2
- package/dist/index.js +72 -0
- package/package.json +1 -1
- package/src/index.ts +106 -1
package/dist/index.cjs
CHANGED
|
@@ -93,6 +93,20 @@ var FirecrawlApp = class {
|
|
|
93
93
|
}
|
|
94
94
|
};
|
|
95
95
|
}
|
|
96
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
97
|
+
let schema = jsonData.jsonOptions.schema;
|
|
98
|
+
try {
|
|
99
|
+
schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
|
|
100
|
+
} catch (error) {
|
|
101
|
+
}
|
|
102
|
+
jsonData = {
|
|
103
|
+
...jsonData,
|
|
104
|
+
jsonOptions: {
|
|
105
|
+
...jsonData.jsonOptions,
|
|
106
|
+
schema
|
|
107
|
+
}
|
|
108
|
+
};
|
|
109
|
+
}
|
|
96
110
|
try {
|
|
97
111
|
const response = await import_axios.default.post(
|
|
98
112
|
this.apiUrl + `/v1/scrape`,
|
|
@@ -314,6 +328,28 @@ var FirecrawlApp = class {
|
|
|
314
328
|
}
|
|
315
329
|
return { success: false, error: "Internal server error." };
|
|
316
330
|
}
|
|
331
|
+
/**
|
|
332
|
+
* Returns information about crawl errors.
|
|
333
|
+
* @param id - The ID of the crawl operation.
|
|
334
|
+
* @returns Information about crawl errors.
|
|
335
|
+
*/
|
|
336
|
+
async checkCrawlErrors(id) {
|
|
337
|
+
const headers = this.prepareHeaders();
|
|
338
|
+
try {
|
|
339
|
+
const response = await this.deleteRequest(
|
|
340
|
+
`${this.apiUrl}/v1/crawl/${id}/errors`,
|
|
341
|
+
headers
|
|
342
|
+
);
|
|
343
|
+
if (response.status === 200) {
|
|
344
|
+
return response.data;
|
|
345
|
+
} else {
|
|
346
|
+
this.handleError(response, "check crawl errors");
|
|
347
|
+
}
|
|
348
|
+
} catch (error) {
|
|
349
|
+
throw new FirecrawlError(error.message, 500);
|
|
350
|
+
}
|
|
351
|
+
return { success: false, error: "Internal server error." };
|
|
352
|
+
}
|
|
317
353
|
/**
|
|
318
354
|
* Cancels a crawl job using the Firecrawl API.
|
|
319
355
|
* @param id - The ID of the crawl operation.
|
|
@@ -402,6 +438,20 @@ var FirecrawlApp = class {
|
|
|
402
438
|
}
|
|
403
439
|
};
|
|
404
440
|
}
|
|
441
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
442
|
+
let schema = jsonData.jsonOptions.schema;
|
|
443
|
+
try {
|
|
444
|
+
schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
|
|
445
|
+
} catch (error) {
|
|
446
|
+
}
|
|
447
|
+
jsonData = {
|
|
448
|
+
...jsonData,
|
|
449
|
+
jsonOptions: {
|
|
450
|
+
...jsonData.jsonOptions,
|
|
451
|
+
schema
|
|
452
|
+
}
|
|
453
|
+
};
|
|
454
|
+
}
|
|
405
455
|
try {
|
|
406
456
|
const response = await this.postRequest(
|
|
407
457
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -532,6 +582,28 @@ var FirecrawlApp = class {
|
|
|
532
582
|
}
|
|
533
583
|
return { success: false, error: "Internal server error." };
|
|
534
584
|
}
|
|
585
|
+
/**
|
|
586
|
+
* Returns information about batch scrape errors.
|
|
587
|
+
* @param id - The ID of the batch scrape operation.
|
|
588
|
+
* @returns Information about batch scrape errors.
|
|
589
|
+
*/
|
|
590
|
+
async checkBatchScrapeErrors(id) {
|
|
591
|
+
const headers = this.prepareHeaders();
|
|
592
|
+
try {
|
|
593
|
+
const response = await this.deleteRequest(
|
|
594
|
+
`${this.apiUrl}/v1/batch/scrape/${id}/errors`,
|
|
595
|
+
headers
|
|
596
|
+
);
|
|
597
|
+
if (response.status === 200) {
|
|
598
|
+
return response.data;
|
|
599
|
+
} else {
|
|
600
|
+
this.handleError(response, "check batch scrape errors");
|
|
601
|
+
}
|
|
602
|
+
} catch (error) {
|
|
603
|
+
throw new FirecrawlError(error.message, 500);
|
|
604
|
+
}
|
|
605
|
+
return { success: false, error: "Internal server error." };
|
|
606
|
+
}
|
|
535
607
|
/**
|
|
536
608
|
* Extracts information from URLs using the Firecrawl API.
|
|
537
609
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
package/dist/index.d.cts
CHANGED
|
@@ -72,7 +72,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
72
72
|
* Defines the options and configurations available for scraping web content.
|
|
73
73
|
*/
|
|
74
74
|
interface CrawlScrapeOptions {
|
|
75
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
|
|
75
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
76
76
|
headers?: Record<string, string>;
|
|
77
77
|
includeTags?: string[];
|
|
78
78
|
excludeTags?: string[];
|
|
@@ -119,6 +119,11 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
119
119
|
schema?: LLMSchema;
|
|
120
120
|
systemPrompt?: string;
|
|
121
121
|
};
|
|
122
|
+
json?: {
|
|
123
|
+
prompt?: string;
|
|
124
|
+
schema?: LLMSchema;
|
|
125
|
+
systemPrompt?: string;
|
|
126
|
+
};
|
|
122
127
|
actions?: ActionsSchema;
|
|
123
128
|
}
|
|
124
129
|
interface ActionsResult {
|
|
@@ -286,6 +291,24 @@ interface SearchResponse {
|
|
|
286
291
|
warning?: string;
|
|
287
292
|
error?: string;
|
|
288
293
|
}
|
|
294
|
+
/**
|
|
295
|
+
* Response interface for crawl/batch scrape error monitoring.
|
|
296
|
+
*/
|
|
297
|
+
interface CrawlErrorsResponse {
|
|
298
|
+
/**
|
|
299
|
+
* Scrapes that errored out + error details
|
|
300
|
+
*/
|
|
301
|
+
errors: {
|
|
302
|
+
id: string;
|
|
303
|
+
timestamp?: string;
|
|
304
|
+
url: string;
|
|
305
|
+
error: string;
|
|
306
|
+
}[];
|
|
307
|
+
/**
|
|
308
|
+
* URLs blocked by robots.txt
|
|
309
|
+
*/
|
|
310
|
+
robotsBlocked: string[];
|
|
311
|
+
}
|
|
289
312
|
/**
|
|
290
313
|
* Main class for interacting with the Firecrawl API.
|
|
291
314
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -333,6 +356,12 @@ declare class FirecrawlApp {
|
|
|
333
356
|
* @returns The response containing the job status.
|
|
334
357
|
*/
|
|
335
358
|
checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
359
|
+
/**
|
|
360
|
+
* Returns information about crawl errors.
|
|
361
|
+
* @param id - The ID of the crawl operation.
|
|
362
|
+
* @returns Information about crawl errors.
|
|
363
|
+
*/
|
|
364
|
+
checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
336
365
|
/**
|
|
337
366
|
* Cancels a crawl job using the Firecrawl API.
|
|
338
367
|
* @param id - The ID of the crawl operation.
|
|
@@ -383,6 +412,12 @@ declare class FirecrawlApp {
|
|
|
383
412
|
* @returns The response containing the job status.
|
|
384
413
|
*/
|
|
385
414
|
checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
415
|
+
/**
|
|
416
|
+
* Returns information about batch scrape errors.
|
|
417
|
+
* @param id - The ID of the batch scrape operation.
|
|
418
|
+
* @returns Information about batch scrape errors.
|
|
419
|
+
*/
|
|
420
|
+
checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
386
421
|
/**
|
|
387
422
|
* Extracts information from URLs using the Firecrawl API.
|
|
388
423
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
@@ -470,4 +505,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
470
505
|
close(): void;
|
|
471
506
|
}
|
|
472
507
|
|
|
473
|
-
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
|
508
|
+
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -72,7 +72,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
72
72
|
* Defines the options and configurations available for scraping web content.
|
|
73
73
|
*/
|
|
74
74
|
interface CrawlScrapeOptions {
|
|
75
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
|
|
75
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
76
76
|
headers?: Record<string, string>;
|
|
77
77
|
includeTags?: string[];
|
|
78
78
|
excludeTags?: string[];
|
|
@@ -119,6 +119,11 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
119
119
|
schema?: LLMSchema;
|
|
120
120
|
systemPrompt?: string;
|
|
121
121
|
};
|
|
122
|
+
json?: {
|
|
123
|
+
prompt?: string;
|
|
124
|
+
schema?: LLMSchema;
|
|
125
|
+
systemPrompt?: string;
|
|
126
|
+
};
|
|
122
127
|
actions?: ActionsSchema;
|
|
123
128
|
}
|
|
124
129
|
interface ActionsResult {
|
|
@@ -286,6 +291,24 @@ interface SearchResponse {
|
|
|
286
291
|
warning?: string;
|
|
287
292
|
error?: string;
|
|
288
293
|
}
|
|
294
|
+
/**
|
|
295
|
+
* Response interface for crawl/batch scrape error monitoring.
|
|
296
|
+
*/
|
|
297
|
+
interface CrawlErrorsResponse {
|
|
298
|
+
/**
|
|
299
|
+
* Scrapes that errored out + error details
|
|
300
|
+
*/
|
|
301
|
+
errors: {
|
|
302
|
+
id: string;
|
|
303
|
+
timestamp?: string;
|
|
304
|
+
url: string;
|
|
305
|
+
error: string;
|
|
306
|
+
}[];
|
|
307
|
+
/**
|
|
308
|
+
* URLs blocked by robots.txt
|
|
309
|
+
*/
|
|
310
|
+
robotsBlocked: string[];
|
|
311
|
+
}
|
|
289
312
|
/**
|
|
290
313
|
* Main class for interacting with the Firecrawl API.
|
|
291
314
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -333,6 +356,12 @@ declare class FirecrawlApp {
|
|
|
333
356
|
* @returns The response containing the job status.
|
|
334
357
|
*/
|
|
335
358
|
checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
359
|
+
/**
|
|
360
|
+
* Returns information about crawl errors.
|
|
361
|
+
* @param id - The ID of the crawl operation.
|
|
362
|
+
* @returns Information about crawl errors.
|
|
363
|
+
*/
|
|
364
|
+
checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
336
365
|
/**
|
|
337
366
|
* Cancels a crawl job using the Firecrawl API.
|
|
338
367
|
* @param id - The ID of the crawl operation.
|
|
@@ -383,6 +412,12 @@ declare class FirecrawlApp {
|
|
|
383
412
|
* @returns The response containing the job status.
|
|
384
413
|
*/
|
|
385
414
|
checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
415
|
+
/**
|
|
416
|
+
* Returns information about batch scrape errors.
|
|
417
|
+
* @param id - The ID of the batch scrape operation.
|
|
418
|
+
* @returns Information about batch scrape errors.
|
|
419
|
+
*/
|
|
420
|
+
checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
386
421
|
/**
|
|
387
422
|
* Extracts information from URLs using the Firecrawl API.
|
|
388
423
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
@@ -470,4 +505,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
470
505
|
close(): void;
|
|
471
506
|
}
|
|
472
507
|
|
|
473
|
-
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
|
508
|
+
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
package/dist/index.js
CHANGED
|
@@ -57,6 +57,20 @@ var FirecrawlApp = class {
|
|
|
57
57
|
}
|
|
58
58
|
};
|
|
59
59
|
}
|
|
60
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
61
|
+
let schema = jsonData.jsonOptions.schema;
|
|
62
|
+
try {
|
|
63
|
+
schema = zodToJsonSchema(schema);
|
|
64
|
+
} catch (error) {
|
|
65
|
+
}
|
|
66
|
+
jsonData = {
|
|
67
|
+
...jsonData,
|
|
68
|
+
jsonOptions: {
|
|
69
|
+
...jsonData.jsonOptions,
|
|
70
|
+
schema
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
}
|
|
60
74
|
try {
|
|
61
75
|
const response = await axios.post(
|
|
62
76
|
this.apiUrl + `/v1/scrape`,
|
|
@@ -278,6 +292,28 @@ var FirecrawlApp = class {
|
|
|
278
292
|
}
|
|
279
293
|
return { success: false, error: "Internal server error." };
|
|
280
294
|
}
|
|
295
|
+
/**
|
|
296
|
+
* Returns information about crawl errors.
|
|
297
|
+
* @param id - The ID of the crawl operation.
|
|
298
|
+
* @returns Information about crawl errors.
|
|
299
|
+
*/
|
|
300
|
+
async checkCrawlErrors(id) {
|
|
301
|
+
const headers = this.prepareHeaders();
|
|
302
|
+
try {
|
|
303
|
+
const response = await this.deleteRequest(
|
|
304
|
+
`${this.apiUrl}/v1/crawl/${id}/errors`,
|
|
305
|
+
headers
|
|
306
|
+
);
|
|
307
|
+
if (response.status === 200) {
|
|
308
|
+
return response.data;
|
|
309
|
+
} else {
|
|
310
|
+
this.handleError(response, "check crawl errors");
|
|
311
|
+
}
|
|
312
|
+
} catch (error) {
|
|
313
|
+
throw new FirecrawlError(error.message, 500);
|
|
314
|
+
}
|
|
315
|
+
return { success: false, error: "Internal server error." };
|
|
316
|
+
}
|
|
281
317
|
/**
|
|
282
318
|
* Cancels a crawl job using the Firecrawl API.
|
|
283
319
|
* @param id - The ID of the crawl operation.
|
|
@@ -366,6 +402,20 @@ var FirecrawlApp = class {
|
|
|
366
402
|
}
|
|
367
403
|
};
|
|
368
404
|
}
|
|
405
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
406
|
+
let schema = jsonData.jsonOptions.schema;
|
|
407
|
+
try {
|
|
408
|
+
schema = zodToJsonSchema(schema);
|
|
409
|
+
} catch (error) {
|
|
410
|
+
}
|
|
411
|
+
jsonData = {
|
|
412
|
+
...jsonData,
|
|
413
|
+
jsonOptions: {
|
|
414
|
+
...jsonData.jsonOptions,
|
|
415
|
+
schema
|
|
416
|
+
}
|
|
417
|
+
};
|
|
418
|
+
}
|
|
369
419
|
try {
|
|
370
420
|
const response = await this.postRequest(
|
|
371
421
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -496,6 +546,28 @@ var FirecrawlApp = class {
|
|
|
496
546
|
}
|
|
497
547
|
return { success: false, error: "Internal server error." };
|
|
498
548
|
}
|
|
549
|
+
/**
|
|
550
|
+
* Returns information about batch scrape errors.
|
|
551
|
+
* @param id - The ID of the batch scrape operation.
|
|
552
|
+
* @returns Information about batch scrape errors.
|
|
553
|
+
*/
|
|
554
|
+
async checkBatchScrapeErrors(id) {
|
|
555
|
+
const headers = this.prepareHeaders();
|
|
556
|
+
try {
|
|
557
|
+
const response = await this.deleteRequest(
|
|
558
|
+
`${this.apiUrl}/v1/batch/scrape/${id}/errors`,
|
|
559
|
+
headers
|
|
560
|
+
);
|
|
561
|
+
if (response.status === 200) {
|
|
562
|
+
return response.data;
|
|
563
|
+
} else {
|
|
564
|
+
this.handleError(response, "check batch scrape errors");
|
|
565
|
+
}
|
|
566
|
+
} catch (error) {
|
|
567
|
+
throw new FirecrawlError(error.message, 500);
|
|
568
|
+
}
|
|
569
|
+
return { success: false, error: "Internal server error." };
|
|
570
|
+
}
|
|
499
571
|
/**
|
|
500
572
|
* Extracts information from URLs using the Firecrawl API.
|
|
501
573
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -78,7 +78,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
78
78
|
* Defines the options and configurations available for scraping web content.
|
|
79
79
|
*/
|
|
80
80
|
export interface CrawlScrapeOptions {
|
|
81
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
|
|
81
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
82
82
|
headers?: Record<string, string>;
|
|
83
83
|
includeTags?: string[];
|
|
84
84
|
excludeTags?: string[];
|
|
@@ -127,6 +127,11 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
|
|
|
127
127
|
schema?: LLMSchema;
|
|
128
128
|
systemPrompt?: string;
|
|
129
129
|
};
|
|
130
|
+
json?:{
|
|
131
|
+
prompt?: string;
|
|
132
|
+
schema?: LLMSchema;
|
|
133
|
+
systemPrompt?: string;
|
|
134
|
+
}
|
|
130
135
|
actions?: ActionsSchema;
|
|
131
136
|
}
|
|
132
137
|
|
|
@@ -314,6 +319,26 @@ export interface SearchResponse {
|
|
|
314
319
|
error?: string;
|
|
315
320
|
}
|
|
316
321
|
|
|
322
|
+
/**
|
|
323
|
+
* Response interface for crawl/batch scrape error monitoring.
|
|
324
|
+
*/
|
|
325
|
+
export interface CrawlErrorsResponse {
|
|
326
|
+
/**
|
|
327
|
+
* Scrapes that errored out + error details
|
|
328
|
+
*/
|
|
329
|
+
errors: {
|
|
330
|
+
id: string,
|
|
331
|
+
timestamp?: string,
|
|
332
|
+
url: string,
|
|
333
|
+
error: string,
|
|
334
|
+
}[];
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* URLs blocked by robots.txt
|
|
338
|
+
*/
|
|
339
|
+
robotsBlocked: string[];
|
|
340
|
+
};
|
|
341
|
+
|
|
317
342
|
/**
|
|
318
343
|
* Main class for interacting with the Firecrawl API.
|
|
319
344
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -373,6 +398,23 @@ export default class FirecrawlApp {
|
|
|
373
398
|
},
|
|
374
399
|
};
|
|
375
400
|
}
|
|
401
|
+
|
|
402
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
403
|
+
let schema = jsonData.jsonOptions.schema;
|
|
404
|
+
// Try parsing the schema as a Zod schema
|
|
405
|
+
try {
|
|
406
|
+
schema = zodToJsonSchema(schema);
|
|
407
|
+
} catch (error) {
|
|
408
|
+
|
|
409
|
+
}
|
|
410
|
+
jsonData = {
|
|
411
|
+
...jsonData,
|
|
412
|
+
jsonOptions: {
|
|
413
|
+
...jsonData.jsonOptions,
|
|
414
|
+
schema: schema,
|
|
415
|
+
},
|
|
416
|
+
};
|
|
417
|
+
}
|
|
376
418
|
try {
|
|
377
419
|
const response: AxiosResponse = await axios.post(
|
|
378
420
|
this.apiUrl + `/v1/scrape`,
|
|
@@ -621,6 +663,29 @@ export default class FirecrawlApp {
|
|
|
621
663
|
return { success: false, error: "Internal server error." };
|
|
622
664
|
}
|
|
623
665
|
|
|
666
|
+
/**
|
|
667
|
+
* Returns information about crawl errors.
|
|
668
|
+
* @param id - The ID of the crawl operation.
|
|
669
|
+
* @returns Information about crawl errors.
|
|
670
|
+
*/
|
|
671
|
+
async checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
|
|
672
|
+
const headers = this.prepareHeaders();
|
|
673
|
+
try {
|
|
674
|
+
const response: AxiosResponse = await this.deleteRequest(
|
|
675
|
+
`${this.apiUrl}/v1/crawl/${id}/errors`,
|
|
676
|
+
headers
|
|
677
|
+
);
|
|
678
|
+
if (response.status === 200) {
|
|
679
|
+
return response.data;
|
|
680
|
+
} else {
|
|
681
|
+
this.handleError(response, "check crawl errors");
|
|
682
|
+
}
|
|
683
|
+
} catch (error: any) {
|
|
684
|
+
throw new FirecrawlError(error.message, 500);
|
|
685
|
+
}
|
|
686
|
+
return { success: false, error: "Internal server error." };
|
|
687
|
+
}
|
|
688
|
+
|
|
624
689
|
/**
|
|
625
690
|
* Cancels a crawl job using the Firecrawl API.
|
|
626
691
|
* @param id - The ID of the crawl operation.
|
|
@@ -729,6 +794,23 @@ export default class FirecrawlApp {
|
|
|
729
794
|
},
|
|
730
795
|
};
|
|
731
796
|
}
|
|
797
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
798
|
+
let schema = jsonData.jsonOptions.schema;
|
|
799
|
+
|
|
800
|
+
// Try parsing the schema as a Zod schema
|
|
801
|
+
try {
|
|
802
|
+
schema = zodToJsonSchema(schema);
|
|
803
|
+
} catch (error) {
|
|
804
|
+
|
|
805
|
+
}
|
|
806
|
+
jsonData = {
|
|
807
|
+
...jsonData,
|
|
808
|
+
jsonOptions: {
|
|
809
|
+
...jsonData.jsonOptions,
|
|
810
|
+
schema: schema,
|
|
811
|
+
},
|
|
812
|
+
};
|
|
813
|
+
}
|
|
732
814
|
try {
|
|
733
815
|
const response: AxiosResponse = await this.postRequest(
|
|
734
816
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -883,6 +965,29 @@ export default class FirecrawlApp {
|
|
|
883
965
|
return { success: false, error: "Internal server error." };
|
|
884
966
|
}
|
|
885
967
|
|
|
968
|
+
/**
|
|
969
|
+
* Returns information about batch scrape errors.
|
|
970
|
+
* @param id - The ID of the batch scrape operation.
|
|
971
|
+
* @returns Information about batch scrape errors.
|
|
972
|
+
*/
|
|
973
|
+
async checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
|
|
974
|
+
const headers = this.prepareHeaders();
|
|
975
|
+
try {
|
|
976
|
+
const response: AxiosResponse = await this.deleteRequest(
|
|
977
|
+
`${this.apiUrl}/v1/batch/scrape/${id}/errors`,
|
|
978
|
+
headers
|
|
979
|
+
);
|
|
980
|
+
if (response.status === 200) {
|
|
981
|
+
return response.data;
|
|
982
|
+
} else {
|
|
983
|
+
this.handleError(response, "check batch scrape errors");
|
|
984
|
+
}
|
|
985
|
+
} catch (error: any) {
|
|
986
|
+
throw new FirecrawlError(error.message, 500);
|
|
987
|
+
}
|
|
988
|
+
return { success: false, error: "Internal server error." };
|
|
989
|
+
}
|
|
990
|
+
|
|
886
991
|
/**
|
|
887
992
|
* Extracts information from URLs using the Firecrawl API.
|
|
888
993
|
* Currently in Beta. Expect breaking changes on future minor versions.
|