firecrawl 1.14.0 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +77 -3
- package/dist/index.d.cts +39 -3
- package/dist/index.d.ts +39 -3
- package/dist/index.js +77 -3
- package/package.json +1 -1
- package/src/index.ts +112 -5
package/dist/index.cjs
CHANGED
|
@@ -42,9 +42,11 @@ var import_isows = require("isows");
|
|
|
42
42
|
var import_typescript_event_target = require("typescript-event-target");
|
|
43
43
|
var FirecrawlError = class extends Error {
|
|
44
44
|
statusCode;
|
|
45
|
-
|
|
45
|
+
details;
|
|
46
|
+
constructor(message, statusCode, details) {
|
|
46
47
|
super(message);
|
|
47
48
|
this.statusCode = statusCode;
|
|
49
|
+
this.details = details;
|
|
48
50
|
}
|
|
49
51
|
};
|
|
50
52
|
var FirecrawlApp = class {
|
|
@@ -91,6 +93,20 @@ var FirecrawlApp = class {
|
|
|
91
93
|
}
|
|
92
94
|
};
|
|
93
95
|
}
|
|
96
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
97
|
+
let schema = jsonData.jsonOptions.schema;
|
|
98
|
+
try {
|
|
99
|
+
schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
|
|
100
|
+
} catch (error) {
|
|
101
|
+
}
|
|
102
|
+
jsonData = {
|
|
103
|
+
...jsonData,
|
|
104
|
+
jsonOptions: {
|
|
105
|
+
...jsonData.jsonOptions,
|
|
106
|
+
schema
|
|
107
|
+
}
|
|
108
|
+
};
|
|
109
|
+
}
|
|
94
110
|
try {
|
|
95
111
|
const response = await import_axios.default.post(
|
|
96
112
|
this.apiUrl + `/v1/scrape`,
|
|
@@ -312,6 +328,28 @@ var FirecrawlApp = class {
|
|
|
312
328
|
}
|
|
313
329
|
return { success: false, error: "Internal server error." };
|
|
314
330
|
}
|
|
331
|
+
/**
|
|
332
|
+
* Returns information about crawl errors.
|
|
333
|
+
* @param id - The ID of the crawl operation.
|
|
334
|
+
* @returns Information about crawl errors.
|
|
335
|
+
*/
|
|
336
|
+
async checkCrawlErrors(id) {
|
|
337
|
+
const headers = this.prepareHeaders();
|
|
338
|
+
try {
|
|
339
|
+
const response = await this.deleteRequest(
|
|
340
|
+
`${this.apiUrl}/v1/crawl/${id}/errors`,
|
|
341
|
+
headers
|
|
342
|
+
);
|
|
343
|
+
if (response.status === 200) {
|
|
344
|
+
return response.data;
|
|
345
|
+
} else {
|
|
346
|
+
this.handleError(response, "check crawl errors");
|
|
347
|
+
}
|
|
348
|
+
} catch (error) {
|
|
349
|
+
throw new FirecrawlError(error.message, 500);
|
|
350
|
+
}
|
|
351
|
+
return { success: false, error: "Internal server error." };
|
|
352
|
+
}
|
|
315
353
|
/**
|
|
316
354
|
* Cancels a crawl job using the Firecrawl API.
|
|
317
355
|
* @param id - The ID of the crawl operation.
|
|
@@ -400,6 +438,20 @@ var FirecrawlApp = class {
|
|
|
400
438
|
}
|
|
401
439
|
};
|
|
402
440
|
}
|
|
441
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
442
|
+
let schema = jsonData.jsonOptions.schema;
|
|
443
|
+
try {
|
|
444
|
+
schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
|
|
445
|
+
} catch (error) {
|
|
446
|
+
}
|
|
447
|
+
jsonData = {
|
|
448
|
+
...jsonData,
|
|
449
|
+
jsonOptions: {
|
|
450
|
+
...jsonData.jsonOptions,
|
|
451
|
+
schema
|
|
452
|
+
}
|
|
453
|
+
};
|
|
454
|
+
}
|
|
403
455
|
try {
|
|
404
456
|
const response = await this.postRequest(
|
|
405
457
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -530,6 +582,28 @@ var FirecrawlApp = class {
|
|
|
530
582
|
}
|
|
531
583
|
return { success: false, error: "Internal server error." };
|
|
532
584
|
}
|
|
585
|
+
/**
|
|
586
|
+
* Returns information about batch scrape errors.
|
|
587
|
+
* @param id - The ID of the batch scrape operation.
|
|
588
|
+
* @returns Information about batch scrape errors.
|
|
589
|
+
*/
|
|
590
|
+
async checkBatchScrapeErrors(id) {
|
|
591
|
+
const headers = this.prepareHeaders();
|
|
592
|
+
try {
|
|
593
|
+
const response = await this.deleteRequest(
|
|
594
|
+
`${this.apiUrl}/v1/batch/scrape/${id}/errors`,
|
|
595
|
+
headers
|
|
596
|
+
);
|
|
597
|
+
if (response.status === 200) {
|
|
598
|
+
return response.data;
|
|
599
|
+
} else {
|
|
600
|
+
this.handleError(response, "check batch scrape errors");
|
|
601
|
+
}
|
|
602
|
+
} catch (error) {
|
|
603
|
+
throw new FirecrawlError(error.message, 500);
|
|
604
|
+
}
|
|
605
|
+
return { success: false, error: "Internal server error." };
|
|
606
|
+
}
|
|
533
607
|
/**
|
|
534
608
|
* Extracts information from URLs using the Firecrawl API.
|
|
535
609
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
@@ -587,7 +661,7 @@ var FirecrawlApp = class {
|
|
|
587
661
|
this.handleError(response, "extract");
|
|
588
662
|
}
|
|
589
663
|
} catch (error) {
|
|
590
|
-
throw new FirecrawlError(error.message, 500);
|
|
664
|
+
throw new FirecrawlError(error.message, 500, error.response?.data?.details);
|
|
591
665
|
}
|
|
592
666
|
return { success: false, error: "Internal server error." };
|
|
593
667
|
}
|
|
@@ -623,7 +697,7 @@ var FirecrawlApp = class {
|
|
|
623
697
|
this.handleError(response, "start extract job");
|
|
624
698
|
}
|
|
625
699
|
} catch (error) {
|
|
626
|
-
throw new FirecrawlError(error.message, 500);
|
|
700
|
+
throw new FirecrawlError(error.message, 500, error.response?.data?.details);
|
|
627
701
|
}
|
|
628
702
|
return { success: false, error: "Internal server error." };
|
|
629
703
|
}
|
package/dist/index.d.cts
CHANGED
|
@@ -72,7 +72,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
72
72
|
* Defines the options and configurations available for scraping web content.
|
|
73
73
|
*/
|
|
74
74
|
interface CrawlScrapeOptions {
|
|
75
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
|
|
75
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
76
76
|
headers?: Record<string, string>;
|
|
77
77
|
includeTags?: string[];
|
|
78
78
|
excludeTags?: string[];
|
|
@@ -119,6 +119,11 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
119
119
|
schema?: LLMSchema;
|
|
120
120
|
systemPrompt?: string;
|
|
121
121
|
};
|
|
122
|
+
json?: {
|
|
123
|
+
prompt?: string;
|
|
124
|
+
schema?: LLMSchema;
|
|
125
|
+
systemPrompt?: string;
|
|
126
|
+
};
|
|
122
127
|
actions?: ActionsSchema;
|
|
123
128
|
}
|
|
124
129
|
interface ActionsResult {
|
|
@@ -258,7 +263,8 @@ interface ErrorResponse {
|
|
|
258
263
|
*/
|
|
259
264
|
declare class FirecrawlError extends Error {
|
|
260
265
|
statusCode: number;
|
|
261
|
-
|
|
266
|
+
details?: any;
|
|
267
|
+
constructor(message: string, statusCode: number, details?: any);
|
|
262
268
|
}
|
|
263
269
|
/**
|
|
264
270
|
* Parameters for search operations.
|
|
@@ -285,6 +291,24 @@ interface SearchResponse {
|
|
|
285
291
|
warning?: string;
|
|
286
292
|
error?: string;
|
|
287
293
|
}
|
|
294
|
+
/**
|
|
295
|
+
* Response interface for crawl/batch scrape error monitoring.
|
|
296
|
+
*/
|
|
297
|
+
interface CrawlErrorsResponse {
|
|
298
|
+
/**
|
|
299
|
+
* Scrapes that errored out + error details
|
|
300
|
+
*/
|
|
301
|
+
errors: {
|
|
302
|
+
id: string;
|
|
303
|
+
timestamp?: string;
|
|
304
|
+
url: string;
|
|
305
|
+
error: string;
|
|
306
|
+
}[];
|
|
307
|
+
/**
|
|
308
|
+
* URLs blocked by robots.txt
|
|
309
|
+
*/
|
|
310
|
+
robotsBlocked: string[];
|
|
311
|
+
}
|
|
288
312
|
/**
|
|
289
313
|
* Main class for interacting with the Firecrawl API.
|
|
290
314
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -332,6 +356,12 @@ declare class FirecrawlApp {
|
|
|
332
356
|
* @returns The response containing the job status.
|
|
333
357
|
*/
|
|
334
358
|
checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
359
|
+
/**
|
|
360
|
+
* Returns information about crawl errors.
|
|
361
|
+
* @param id - The ID of the crawl operation.
|
|
362
|
+
* @returns Information about crawl errors.
|
|
363
|
+
*/
|
|
364
|
+
checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
335
365
|
/**
|
|
336
366
|
* Cancels a crawl job using the Firecrawl API.
|
|
337
367
|
* @param id - The ID of the crawl operation.
|
|
@@ -382,6 +412,12 @@ declare class FirecrawlApp {
|
|
|
382
412
|
* @returns The response containing the job status.
|
|
383
413
|
*/
|
|
384
414
|
checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
415
|
+
/**
|
|
416
|
+
* Returns information about batch scrape errors.
|
|
417
|
+
* @param id - The ID of the batch scrape operation.
|
|
418
|
+
* @returns Information about batch scrape errors.
|
|
419
|
+
*/
|
|
420
|
+
checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
385
421
|
/**
|
|
386
422
|
* Extracts information from URLs using the Firecrawl API.
|
|
387
423
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
@@ -469,4 +505,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
469
505
|
close(): void;
|
|
470
506
|
}
|
|
471
507
|
|
|
472
|
-
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
|
508
|
+
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -72,7 +72,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
|
|
|
72
72
|
* Defines the options and configurations available for scraping web content.
|
|
73
73
|
*/
|
|
74
74
|
interface CrawlScrapeOptions {
|
|
75
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
|
|
75
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
76
76
|
headers?: Record<string, string>;
|
|
77
77
|
includeTags?: string[];
|
|
78
78
|
excludeTags?: string[];
|
|
@@ -119,6 +119,11 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
119
119
|
schema?: LLMSchema;
|
|
120
120
|
systemPrompt?: string;
|
|
121
121
|
};
|
|
122
|
+
json?: {
|
|
123
|
+
prompt?: string;
|
|
124
|
+
schema?: LLMSchema;
|
|
125
|
+
systemPrompt?: string;
|
|
126
|
+
};
|
|
122
127
|
actions?: ActionsSchema;
|
|
123
128
|
}
|
|
124
129
|
interface ActionsResult {
|
|
@@ -258,7 +263,8 @@ interface ErrorResponse {
|
|
|
258
263
|
*/
|
|
259
264
|
declare class FirecrawlError extends Error {
|
|
260
265
|
statusCode: number;
|
|
261
|
-
|
|
266
|
+
details?: any;
|
|
267
|
+
constructor(message: string, statusCode: number, details?: any);
|
|
262
268
|
}
|
|
263
269
|
/**
|
|
264
270
|
* Parameters for search operations.
|
|
@@ -285,6 +291,24 @@ interface SearchResponse {
|
|
|
285
291
|
warning?: string;
|
|
286
292
|
error?: string;
|
|
287
293
|
}
|
|
294
|
+
/**
|
|
295
|
+
* Response interface for crawl/batch scrape error monitoring.
|
|
296
|
+
*/
|
|
297
|
+
interface CrawlErrorsResponse {
|
|
298
|
+
/**
|
|
299
|
+
* Scrapes that errored out + error details
|
|
300
|
+
*/
|
|
301
|
+
errors: {
|
|
302
|
+
id: string;
|
|
303
|
+
timestamp?: string;
|
|
304
|
+
url: string;
|
|
305
|
+
error: string;
|
|
306
|
+
}[];
|
|
307
|
+
/**
|
|
308
|
+
* URLs blocked by robots.txt
|
|
309
|
+
*/
|
|
310
|
+
robotsBlocked: string[];
|
|
311
|
+
}
|
|
288
312
|
/**
|
|
289
313
|
* Main class for interacting with the Firecrawl API.
|
|
290
314
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -332,6 +356,12 @@ declare class FirecrawlApp {
|
|
|
332
356
|
* @returns The response containing the job status.
|
|
333
357
|
*/
|
|
334
358
|
checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
359
|
+
/**
|
|
360
|
+
* Returns information about crawl errors.
|
|
361
|
+
* @param id - The ID of the crawl operation.
|
|
362
|
+
* @returns Information about crawl errors.
|
|
363
|
+
*/
|
|
364
|
+
checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
335
365
|
/**
|
|
336
366
|
* Cancels a crawl job using the Firecrawl API.
|
|
337
367
|
* @param id - The ID of the crawl operation.
|
|
@@ -382,6 +412,12 @@ declare class FirecrawlApp {
|
|
|
382
412
|
* @returns The response containing the job status.
|
|
383
413
|
*/
|
|
384
414
|
checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
|
|
415
|
+
/**
|
|
416
|
+
* Returns information about batch scrape errors.
|
|
417
|
+
* @param id - The ID of the batch scrape operation.
|
|
418
|
+
* @returns Information about batch scrape errors.
|
|
419
|
+
*/
|
|
420
|
+
checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
|
|
385
421
|
/**
|
|
386
422
|
* Extracts information from URLs using the Firecrawl API.
|
|
387
423
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
@@ -469,4 +505,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
469
505
|
close(): void;
|
|
470
506
|
}
|
|
471
507
|
|
|
472
|
-
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
|
508
|
+
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
package/dist/index.js
CHANGED
|
@@ -6,9 +6,11 @@ import { WebSocket } from "isows";
|
|
|
6
6
|
import { TypedEventTarget } from "typescript-event-target";
|
|
7
7
|
var FirecrawlError = class extends Error {
|
|
8
8
|
statusCode;
|
|
9
|
-
|
|
9
|
+
details;
|
|
10
|
+
constructor(message, statusCode, details) {
|
|
10
11
|
super(message);
|
|
11
12
|
this.statusCode = statusCode;
|
|
13
|
+
this.details = details;
|
|
12
14
|
}
|
|
13
15
|
};
|
|
14
16
|
var FirecrawlApp = class {
|
|
@@ -55,6 +57,20 @@ var FirecrawlApp = class {
|
|
|
55
57
|
}
|
|
56
58
|
};
|
|
57
59
|
}
|
|
60
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
61
|
+
let schema = jsonData.jsonOptions.schema;
|
|
62
|
+
try {
|
|
63
|
+
schema = zodToJsonSchema(schema);
|
|
64
|
+
} catch (error) {
|
|
65
|
+
}
|
|
66
|
+
jsonData = {
|
|
67
|
+
...jsonData,
|
|
68
|
+
jsonOptions: {
|
|
69
|
+
...jsonData.jsonOptions,
|
|
70
|
+
schema
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
}
|
|
58
74
|
try {
|
|
59
75
|
const response = await axios.post(
|
|
60
76
|
this.apiUrl + `/v1/scrape`,
|
|
@@ -276,6 +292,28 @@ var FirecrawlApp = class {
|
|
|
276
292
|
}
|
|
277
293
|
return { success: false, error: "Internal server error." };
|
|
278
294
|
}
|
|
295
|
+
/**
|
|
296
|
+
* Returns information about crawl errors.
|
|
297
|
+
* @param id - The ID of the crawl operation.
|
|
298
|
+
* @returns Information about crawl errors.
|
|
299
|
+
*/
|
|
300
|
+
async checkCrawlErrors(id) {
|
|
301
|
+
const headers = this.prepareHeaders();
|
|
302
|
+
try {
|
|
303
|
+
const response = await this.deleteRequest(
|
|
304
|
+
`${this.apiUrl}/v1/crawl/${id}/errors`,
|
|
305
|
+
headers
|
|
306
|
+
);
|
|
307
|
+
if (response.status === 200) {
|
|
308
|
+
return response.data;
|
|
309
|
+
} else {
|
|
310
|
+
this.handleError(response, "check crawl errors");
|
|
311
|
+
}
|
|
312
|
+
} catch (error) {
|
|
313
|
+
throw new FirecrawlError(error.message, 500);
|
|
314
|
+
}
|
|
315
|
+
return { success: false, error: "Internal server error." };
|
|
316
|
+
}
|
|
279
317
|
/**
|
|
280
318
|
* Cancels a crawl job using the Firecrawl API.
|
|
281
319
|
* @param id - The ID of the crawl operation.
|
|
@@ -364,6 +402,20 @@ var FirecrawlApp = class {
|
|
|
364
402
|
}
|
|
365
403
|
};
|
|
366
404
|
}
|
|
405
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
406
|
+
let schema = jsonData.jsonOptions.schema;
|
|
407
|
+
try {
|
|
408
|
+
schema = zodToJsonSchema(schema);
|
|
409
|
+
} catch (error) {
|
|
410
|
+
}
|
|
411
|
+
jsonData = {
|
|
412
|
+
...jsonData,
|
|
413
|
+
jsonOptions: {
|
|
414
|
+
...jsonData.jsonOptions,
|
|
415
|
+
schema
|
|
416
|
+
}
|
|
417
|
+
};
|
|
418
|
+
}
|
|
367
419
|
try {
|
|
368
420
|
const response = await this.postRequest(
|
|
369
421
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -494,6 +546,28 @@ var FirecrawlApp = class {
|
|
|
494
546
|
}
|
|
495
547
|
return { success: false, error: "Internal server error." };
|
|
496
548
|
}
|
|
549
|
+
/**
|
|
550
|
+
* Returns information about batch scrape errors.
|
|
551
|
+
* @param id - The ID of the batch scrape operation.
|
|
552
|
+
* @returns Information about batch scrape errors.
|
|
553
|
+
*/
|
|
554
|
+
async checkBatchScrapeErrors(id) {
|
|
555
|
+
const headers = this.prepareHeaders();
|
|
556
|
+
try {
|
|
557
|
+
const response = await this.deleteRequest(
|
|
558
|
+
`${this.apiUrl}/v1/batch/scrape/${id}/errors`,
|
|
559
|
+
headers
|
|
560
|
+
);
|
|
561
|
+
if (response.status === 200) {
|
|
562
|
+
return response.data;
|
|
563
|
+
} else {
|
|
564
|
+
this.handleError(response, "check batch scrape errors");
|
|
565
|
+
}
|
|
566
|
+
} catch (error) {
|
|
567
|
+
throw new FirecrawlError(error.message, 500);
|
|
568
|
+
}
|
|
569
|
+
return { success: false, error: "Internal server error." };
|
|
570
|
+
}
|
|
497
571
|
/**
|
|
498
572
|
* Extracts information from URLs using the Firecrawl API.
|
|
499
573
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
@@ -551,7 +625,7 @@ var FirecrawlApp = class {
|
|
|
551
625
|
this.handleError(response, "extract");
|
|
552
626
|
}
|
|
553
627
|
} catch (error) {
|
|
554
|
-
throw new FirecrawlError(error.message, 500);
|
|
628
|
+
throw new FirecrawlError(error.message, 500, error.response?.data?.details);
|
|
555
629
|
}
|
|
556
630
|
return { success: false, error: "Internal server error." };
|
|
557
631
|
}
|
|
@@ -587,7 +661,7 @@ var FirecrawlApp = class {
|
|
|
587
661
|
this.handleError(response, "start extract job");
|
|
588
662
|
}
|
|
589
663
|
} catch (error) {
|
|
590
|
-
throw new FirecrawlError(error.message, 500);
|
|
664
|
+
throw new FirecrawlError(error.message, 500, error.response?.data?.details);
|
|
591
665
|
}
|
|
592
666
|
return { success: false, error: "Internal server error." };
|
|
593
667
|
}
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -78,7 +78,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
78
78
|
* Defines the options and configurations available for scraping web content.
|
|
79
79
|
*/
|
|
80
80
|
export interface CrawlScrapeOptions {
|
|
81
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
|
|
81
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
|
|
82
82
|
headers?: Record<string, string>;
|
|
83
83
|
includeTags?: string[];
|
|
84
84
|
excludeTags?: string[];
|
|
@@ -127,6 +127,11 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
|
|
|
127
127
|
schema?: LLMSchema;
|
|
128
128
|
systemPrompt?: string;
|
|
129
129
|
};
|
|
130
|
+
json?:{
|
|
131
|
+
prompt?: string;
|
|
132
|
+
schema?: LLMSchema;
|
|
133
|
+
systemPrompt?: string;
|
|
134
|
+
}
|
|
130
135
|
actions?: ActionsSchema;
|
|
131
136
|
}
|
|
132
137
|
|
|
@@ -279,9 +284,11 @@ export interface ErrorResponse {
|
|
|
279
284
|
*/
|
|
280
285
|
export class FirecrawlError extends Error {
|
|
281
286
|
statusCode: number;
|
|
282
|
-
|
|
287
|
+
details?: any;
|
|
288
|
+
constructor(message: string, statusCode: number, details?: any) {
|
|
283
289
|
super(message);
|
|
284
290
|
this.statusCode = statusCode;
|
|
291
|
+
this.details = details;
|
|
285
292
|
}
|
|
286
293
|
}
|
|
287
294
|
|
|
@@ -312,6 +319,26 @@ export interface SearchResponse {
|
|
|
312
319
|
error?: string;
|
|
313
320
|
}
|
|
314
321
|
|
|
322
|
+
/**
|
|
323
|
+
* Response interface for crawl/batch scrape error monitoring.
|
|
324
|
+
*/
|
|
325
|
+
export interface CrawlErrorsResponse {
|
|
326
|
+
/**
|
|
327
|
+
* Scrapes that errored out + error details
|
|
328
|
+
*/
|
|
329
|
+
errors: {
|
|
330
|
+
id: string,
|
|
331
|
+
timestamp?: string,
|
|
332
|
+
url: string,
|
|
333
|
+
error: string,
|
|
334
|
+
}[];
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* URLs blocked by robots.txt
|
|
338
|
+
*/
|
|
339
|
+
robotsBlocked: string[];
|
|
340
|
+
};
|
|
341
|
+
|
|
315
342
|
/**
|
|
316
343
|
* Main class for interacting with the Firecrawl API.
|
|
317
344
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -371,6 +398,23 @@ export default class FirecrawlApp {
|
|
|
371
398
|
},
|
|
372
399
|
};
|
|
373
400
|
}
|
|
401
|
+
|
|
402
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
403
|
+
let schema = jsonData.jsonOptions.schema;
|
|
404
|
+
// Try parsing the schema as a Zod schema
|
|
405
|
+
try {
|
|
406
|
+
schema = zodToJsonSchema(schema);
|
|
407
|
+
} catch (error) {
|
|
408
|
+
|
|
409
|
+
}
|
|
410
|
+
jsonData = {
|
|
411
|
+
...jsonData,
|
|
412
|
+
jsonOptions: {
|
|
413
|
+
...jsonData.jsonOptions,
|
|
414
|
+
schema: schema,
|
|
415
|
+
},
|
|
416
|
+
};
|
|
417
|
+
}
|
|
374
418
|
try {
|
|
375
419
|
const response: AxiosResponse = await axios.post(
|
|
376
420
|
this.apiUrl + `/v1/scrape`,
|
|
@@ -619,6 +663,29 @@ export default class FirecrawlApp {
|
|
|
619
663
|
return { success: false, error: "Internal server error." };
|
|
620
664
|
}
|
|
621
665
|
|
|
666
|
+
/**
|
|
667
|
+
* Returns information about crawl errors.
|
|
668
|
+
* @param id - The ID of the crawl operation.
|
|
669
|
+
* @returns Information about crawl errors.
|
|
670
|
+
*/
|
|
671
|
+
async checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
|
|
672
|
+
const headers = this.prepareHeaders();
|
|
673
|
+
try {
|
|
674
|
+
const response: AxiosResponse = await this.deleteRequest(
|
|
675
|
+
`${this.apiUrl}/v1/crawl/${id}/errors`,
|
|
676
|
+
headers
|
|
677
|
+
);
|
|
678
|
+
if (response.status === 200) {
|
|
679
|
+
return response.data;
|
|
680
|
+
} else {
|
|
681
|
+
this.handleError(response, "check crawl errors");
|
|
682
|
+
}
|
|
683
|
+
} catch (error: any) {
|
|
684
|
+
throw new FirecrawlError(error.message, 500);
|
|
685
|
+
}
|
|
686
|
+
return { success: false, error: "Internal server error." };
|
|
687
|
+
}
|
|
688
|
+
|
|
622
689
|
/**
|
|
623
690
|
* Cancels a crawl job using the Firecrawl API.
|
|
624
691
|
* @param id - The ID of the crawl operation.
|
|
@@ -727,6 +794,23 @@ export default class FirecrawlApp {
|
|
|
727
794
|
},
|
|
728
795
|
};
|
|
729
796
|
}
|
|
797
|
+
if (jsonData?.jsonOptions?.schema) {
|
|
798
|
+
let schema = jsonData.jsonOptions.schema;
|
|
799
|
+
|
|
800
|
+
// Try parsing the schema as a Zod schema
|
|
801
|
+
try {
|
|
802
|
+
schema = zodToJsonSchema(schema);
|
|
803
|
+
} catch (error) {
|
|
804
|
+
|
|
805
|
+
}
|
|
806
|
+
jsonData = {
|
|
807
|
+
...jsonData,
|
|
808
|
+
jsonOptions: {
|
|
809
|
+
...jsonData.jsonOptions,
|
|
810
|
+
schema: schema,
|
|
811
|
+
},
|
|
812
|
+
};
|
|
813
|
+
}
|
|
730
814
|
try {
|
|
731
815
|
const response: AxiosResponse = await this.postRequest(
|
|
732
816
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -881,6 +965,29 @@ export default class FirecrawlApp {
|
|
|
881
965
|
return { success: false, error: "Internal server error." };
|
|
882
966
|
}
|
|
883
967
|
|
|
968
|
+
/**
|
|
969
|
+
* Returns information about batch scrape errors.
|
|
970
|
+
* @param id - The ID of the batch scrape operation.
|
|
971
|
+
* @returns Information about batch scrape errors.
|
|
972
|
+
*/
|
|
973
|
+
async checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
|
|
974
|
+
const headers = this.prepareHeaders();
|
|
975
|
+
try {
|
|
976
|
+
const response: AxiosResponse = await this.deleteRequest(
|
|
977
|
+
`${this.apiUrl}/v1/batch/scrape/${id}/errors`,
|
|
978
|
+
headers
|
|
979
|
+
);
|
|
980
|
+
if (response.status === 200) {
|
|
981
|
+
return response.data;
|
|
982
|
+
} else {
|
|
983
|
+
this.handleError(response, "check batch scrape errors");
|
|
984
|
+
}
|
|
985
|
+
} catch (error: any) {
|
|
986
|
+
throw new FirecrawlError(error.message, 500);
|
|
987
|
+
}
|
|
988
|
+
return { success: false, error: "Internal server error." };
|
|
989
|
+
}
|
|
990
|
+
|
|
884
991
|
/**
|
|
885
992
|
* Extracts information from URLs using the Firecrawl API.
|
|
886
993
|
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
@@ -941,9 +1048,9 @@ export default class FirecrawlApp {
|
|
|
941
1048
|
this.handleError(response, "extract");
|
|
942
1049
|
}
|
|
943
1050
|
} catch (error: any) {
|
|
944
|
-
throw new FirecrawlError(error.message, 500);
|
|
1051
|
+
throw new FirecrawlError(error.message, 500, error.response?.data?.details);
|
|
945
1052
|
}
|
|
946
|
-
return { success: false, error: "Internal server error."
|
|
1053
|
+
return { success: false, error: "Internal server error."};
|
|
947
1054
|
}
|
|
948
1055
|
|
|
949
1056
|
/**
|
|
@@ -985,7 +1092,7 @@ export default class FirecrawlApp {
|
|
|
985
1092
|
this.handleError(response, "start extract job");
|
|
986
1093
|
}
|
|
987
1094
|
} catch (error: any) {
|
|
988
|
-
throw new FirecrawlError(error.message, 500);
|
|
1095
|
+
throw new FirecrawlError(error.message, 500, error.response?.data?.details);
|
|
989
1096
|
}
|
|
990
1097
|
return { success: false, error: "Internal server error." };
|
|
991
1098
|
}
|