firecrawl 1.14.1 → 1.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -93,6 +93,20 @@ var FirecrawlApp = class {
93
93
  }
94
94
  };
95
95
  }
96
+ if (jsonData?.jsonOptions?.schema) {
97
+ let schema = jsonData.jsonOptions.schema;
98
+ try {
99
+ schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
100
+ } catch (error) {
101
+ }
102
+ jsonData = {
103
+ ...jsonData,
104
+ jsonOptions: {
105
+ ...jsonData.jsonOptions,
106
+ schema
107
+ }
108
+ };
109
+ }
96
110
  try {
97
111
  const response = await import_axios.default.post(
98
112
  this.apiUrl + `/v1/scrape`,
@@ -314,6 +328,28 @@ var FirecrawlApp = class {
314
328
  }
315
329
  return { success: false, error: "Internal server error." };
316
330
  }
331
+ /**
332
+ * Returns information about crawl errors.
333
+ * @param id - The ID of the crawl operation.
334
+ * @returns Information about crawl errors.
335
+ */
336
+ async checkCrawlErrors(id) {
337
+ const headers = this.prepareHeaders();
338
+ try {
339
+ const response = await this.deleteRequest(
340
+ `${this.apiUrl}/v1/crawl/${id}/errors`,
341
+ headers
342
+ );
343
+ if (response.status === 200) {
344
+ return response.data;
345
+ } else {
346
+ this.handleError(response, "check crawl errors");
347
+ }
348
+ } catch (error) {
349
+ throw new FirecrawlError(error.message, 500);
350
+ }
351
+ return { success: false, error: "Internal server error." };
352
+ }
317
353
  /**
318
354
  * Cancels a crawl job using the Firecrawl API.
319
355
  * @param id - The ID of the crawl operation.
@@ -402,6 +438,20 @@ var FirecrawlApp = class {
402
438
  }
403
439
  };
404
440
  }
441
+ if (jsonData?.jsonOptions?.schema) {
442
+ let schema = jsonData.jsonOptions.schema;
443
+ try {
444
+ schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
445
+ } catch (error) {
446
+ }
447
+ jsonData = {
448
+ ...jsonData,
449
+ jsonOptions: {
450
+ ...jsonData.jsonOptions,
451
+ schema
452
+ }
453
+ };
454
+ }
405
455
  try {
406
456
  const response = await this.postRequest(
407
457
  this.apiUrl + `/v1/batch/scrape`,
@@ -532,6 +582,28 @@ var FirecrawlApp = class {
532
582
  }
533
583
  return { success: false, error: "Internal server error." };
534
584
  }
585
+ /**
586
+ * Returns information about batch scrape errors.
587
+ * @param id - The ID of the batch scrape operation.
588
+ * @returns Information about batch scrape errors.
589
+ */
590
+ async checkBatchScrapeErrors(id) {
591
+ const headers = this.prepareHeaders();
592
+ try {
593
+ const response = await this.deleteRequest(
594
+ `${this.apiUrl}/v1/batch/scrape/${id}/errors`,
595
+ headers
596
+ );
597
+ if (response.status === 200) {
598
+ return response.data;
599
+ } else {
600
+ this.handleError(response, "check batch scrape errors");
601
+ }
602
+ } catch (error) {
603
+ throw new FirecrawlError(error.message, 500);
604
+ }
605
+ return { success: false, error: "Internal server error." };
606
+ }
535
607
  /**
536
608
  * Extracts information from URLs using the Firecrawl API.
537
609
  * Currently in Beta. Expect breaking changes on future minor versions.
package/dist/index.d.cts CHANGED
@@ -61,6 +61,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
61
61
  rawHtml?: string;
62
62
  links?: string[];
63
63
  extract?: T;
64
+ json?: T;
64
65
  screenshot?: string;
65
66
  metadata?: FirecrawlDocumentMetadata;
66
67
  actions: ActionsSchema;
@@ -72,7 +73,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
72
73
  * Defines the options and configurations available for scraping web content.
73
74
  */
74
75
  interface CrawlScrapeOptions {
75
- formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
76
+ formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
76
77
  headers?: Record<string, string>;
77
78
  includeTags?: string[];
78
79
  excludeTags?: string[];
@@ -119,6 +120,11 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
119
120
  schema?: LLMSchema;
120
121
  systemPrompt?: string;
121
122
  };
123
+ jsonOptions?: {
124
+ prompt?: string;
125
+ schema?: LLMSchema;
126
+ systemPrompt?: string;
127
+ };
122
128
  actions?: ActionsSchema;
123
129
  }
124
130
  interface ActionsResult {
@@ -286,6 +292,24 @@ interface SearchResponse {
286
292
  warning?: string;
287
293
  error?: string;
288
294
  }
295
+ /**
296
+ * Response interface for crawl/batch scrape error monitoring.
297
+ */
298
+ interface CrawlErrorsResponse {
299
+ /**
300
+ * Scrapes that errored out + error details
301
+ */
302
+ errors: {
303
+ id: string;
304
+ timestamp?: string;
305
+ url: string;
306
+ error: string;
307
+ }[];
308
+ /**
309
+ * URLs blocked by robots.txt
310
+ */
311
+ robotsBlocked: string[];
312
+ }
289
313
  /**
290
314
  * Main class for interacting with the Firecrawl API.
291
315
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -333,6 +357,12 @@ declare class FirecrawlApp {
333
357
  * @returns The response containing the job status.
334
358
  */
335
359
  checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
360
+ /**
361
+ * Returns information about crawl errors.
362
+ * @param id - The ID of the crawl operation.
363
+ * @returns Information about crawl errors.
364
+ */
365
+ checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
336
366
  /**
337
367
  * Cancels a crawl job using the Firecrawl API.
338
368
  * @param id - The ID of the crawl operation.
@@ -383,6 +413,12 @@ declare class FirecrawlApp {
383
413
  * @returns The response containing the job status.
384
414
  */
385
415
  checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
416
+ /**
417
+ * Returns information about batch scrape errors.
418
+ * @param id - The ID of the batch scrape operation.
419
+ * @returns Information about batch scrape errors.
420
+ */
421
+ checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
386
422
  /**
387
423
  * Extracts information from URLs using the Firecrawl API.
388
424
  * Currently in Beta. Expect breaking changes on future minor versions.
@@ -470,4 +506,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
470
506
  close(): void;
471
507
  }
472
508
 
473
- export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
509
+ export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
package/dist/index.d.ts CHANGED
@@ -61,6 +61,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
61
61
  rawHtml?: string;
62
62
  links?: string[];
63
63
  extract?: T;
64
+ json?: T;
64
65
  screenshot?: string;
65
66
  metadata?: FirecrawlDocumentMetadata;
66
67
  actions: ActionsSchema;
@@ -72,7 +73,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
72
73
  * Defines the options and configurations available for scraping web content.
73
74
  */
74
75
  interface CrawlScrapeOptions {
75
- formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
76
+ formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
76
77
  headers?: Record<string, string>;
77
78
  includeTags?: string[];
78
79
  excludeTags?: string[];
@@ -119,6 +120,11 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
119
120
  schema?: LLMSchema;
120
121
  systemPrompt?: string;
121
122
  };
123
+ jsonOptions?: {
124
+ prompt?: string;
125
+ schema?: LLMSchema;
126
+ systemPrompt?: string;
127
+ };
122
128
  actions?: ActionsSchema;
123
129
  }
124
130
  interface ActionsResult {
@@ -286,6 +292,24 @@ interface SearchResponse {
286
292
  warning?: string;
287
293
  error?: string;
288
294
  }
295
+ /**
296
+ * Response interface for crawl/batch scrape error monitoring.
297
+ */
298
+ interface CrawlErrorsResponse {
299
+ /**
300
+ * Scrapes that errored out + error details
301
+ */
302
+ errors: {
303
+ id: string;
304
+ timestamp?: string;
305
+ url: string;
306
+ error: string;
307
+ }[];
308
+ /**
309
+ * URLs blocked by robots.txt
310
+ */
311
+ robotsBlocked: string[];
312
+ }
289
313
  /**
290
314
  * Main class for interacting with the Firecrawl API.
291
315
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -333,6 +357,12 @@ declare class FirecrawlApp {
333
357
  * @returns The response containing the job status.
334
358
  */
335
359
  checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
360
+ /**
361
+ * Returns information about crawl errors.
362
+ * @param id - The ID of the crawl operation.
363
+ * @returns Information about crawl errors.
364
+ */
365
+ checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
336
366
  /**
337
367
  * Cancels a crawl job using the Firecrawl API.
338
368
  * @param id - The ID of the crawl operation.
@@ -383,6 +413,12 @@ declare class FirecrawlApp {
383
413
  * @returns The response containing the job status.
384
414
  */
385
415
  checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
416
+ /**
417
+ * Returns information about batch scrape errors.
418
+ * @param id - The ID of the batch scrape operation.
419
+ * @returns Information about batch scrape errors.
420
+ */
421
+ checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
386
422
  /**
387
423
  * Extracts information from URLs using the Firecrawl API.
388
424
  * Currently in Beta. Expect breaking changes on future minor versions.
@@ -470,4 +506,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
470
506
  close(): void;
471
507
  }
472
508
 
473
- export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
509
+ export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
package/dist/index.js CHANGED
@@ -57,6 +57,20 @@ var FirecrawlApp = class {
57
57
  }
58
58
  };
59
59
  }
60
+ if (jsonData?.jsonOptions?.schema) {
61
+ let schema = jsonData.jsonOptions.schema;
62
+ try {
63
+ schema = zodToJsonSchema(schema);
64
+ } catch (error) {
65
+ }
66
+ jsonData = {
67
+ ...jsonData,
68
+ jsonOptions: {
69
+ ...jsonData.jsonOptions,
70
+ schema
71
+ }
72
+ };
73
+ }
60
74
  try {
61
75
  const response = await axios.post(
62
76
  this.apiUrl + `/v1/scrape`,
@@ -278,6 +292,28 @@ var FirecrawlApp = class {
278
292
  }
279
293
  return { success: false, error: "Internal server error." };
280
294
  }
295
+ /**
296
+ * Returns information about crawl errors.
297
+ * @param id - The ID of the crawl operation.
298
+ * @returns Information about crawl errors.
299
+ */
300
+ async checkCrawlErrors(id) {
301
+ const headers = this.prepareHeaders();
302
+ try {
303
+ const response = await this.deleteRequest(
304
+ `${this.apiUrl}/v1/crawl/${id}/errors`,
305
+ headers
306
+ );
307
+ if (response.status === 200) {
308
+ return response.data;
309
+ } else {
310
+ this.handleError(response, "check crawl errors");
311
+ }
312
+ } catch (error) {
313
+ throw new FirecrawlError(error.message, 500);
314
+ }
315
+ return { success: false, error: "Internal server error." };
316
+ }
281
317
  /**
282
318
  * Cancels a crawl job using the Firecrawl API.
283
319
  * @param id - The ID of the crawl operation.
@@ -366,6 +402,20 @@ var FirecrawlApp = class {
366
402
  }
367
403
  };
368
404
  }
405
+ if (jsonData?.jsonOptions?.schema) {
406
+ let schema = jsonData.jsonOptions.schema;
407
+ try {
408
+ schema = zodToJsonSchema(schema);
409
+ } catch (error) {
410
+ }
411
+ jsonData = {
412
+ ...jsonData,
413
+ jsonOptions: {
414
+ ...jsonData.jsonOptions,
415
+ schema
416
+ }
417
+ };
418
+ }
369
419
  try {
370
420
  const response = await this.postRequest(
371
421
  this.apiUrl + `/v1/batch/scrape`,
@@ -496,6 +546,28 @@ var FirecrawlApp = class {
496
546
  }
497
547
  return { success: false, error: "Internal server error." };
498
548
  }
549
+ /**
550
+ * Returns information about batch scrape errors.
551
+ * @param id - The ID of the batch scrape operation.
552
+ * @returns Information about batch scrape errors.
553
+ */
554
+ async checkBatchScrapeErrors(id) {
555
+ const headers = this.prepareHeaders();
556
+ try {
557
+ const response = await this.deleteRequest(
558
+ `${this.apiUrl}/v1/batch/scrape/${id}/errors`,
559
+ headers
560
+ );
561
+ if (response.status === 200) {
562
+ return response.data;
563
+ } else {
564
+ this.handleError(response, "check batch scrape errors");
565
+ }
566
+ } catch (error) {
567
+ throw new FirecrawlError(error.message, 500);
568
+ }
569
+ return { success: false, error: "Internal server error." };
570
+ }
499
571
  /**
500
572
  * Extracts information from URLs using the Firecrawl API.
501
573
  * Currently in Beta. Expect breaking changes on future minor versions.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "1.14.1",
3
+ "version": "1.15.2",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
package/src/index.ts CHANGED
@@ -65,6 +65,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
65
65
  rawHtml?: string;
66
66
  links?: string[];
67
67
  extract?: T;
68
+ json?: T;
68
69
  screenshot?: string;
69
70
  metadata?: FirecrawlDocumentMetadata;
70
71
  actions: ActionsSchema;
@@ -78,7 +79,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
78
79
  * Defines the options and configurations available for scraping web content.
79
80
  */
80
81
  export interface CrawlScrapeOptions {
81
- formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
82
+ formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
82
83
  headers?: Record<string, string>;
83
84
  includeTags?: string[];
84
85
  excludeTags?: string[];
@@ -127,6 +128,11 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
127
128
  schema?: LLMSchema;
128
129
  systemPrompt?: string;
129
130
  };
131
+ jsonOptions?:{
132
+ prompt?: string;
133
+ schema?: LLMSchema;
134
+ systemPrompt?: string;
135
+ }
130
136
  actions?: ActionsSchema;
131
137
  }
132
138
 
@@ -314,6 +320,26 @@ export interface SearchResponse {
314
320
  error?: string;
315
321
  }
316
322
 
323
+ /**
324
+ * Response interface for crawl/batch scrape error monitoring.
325
+ */
326
+ export interface CrawlErrorsResponse {
327
+ /**
328
+ * Scrapes that errored out + error details
329
+ */
330
+ errors: {
331
+ id: string,
332
+ timestamp?: string,
333
+ url: string,
334
+ error: string,
335
+ }[];
336
+
337
+ /**
338
+ * URLs blocked by robots.txt
339
+ */
340
+ robotsBlocked: string[];
341
+ };
342
+
317
343
  /**
318
344
  * Main class for interacting with the Firecrawl API.
319
345
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -373,6 +399,23 @@ export default class FirecrawlApp {
373
399
  },
374
400
  };
375
401
  }
402
+
403
+ if (jsonData?.jsonOptions?.schema) {
404
+ let schema = jsonData.jsonOptions.schema;
405
+ // Try parsing the schema as a Zod schema
406
+ try {
407
+ schema = zodToJsonSchema(schema);
408
+ } catch (error) {
409
+
410
+ }
411
+ jsonData = {
412
+ ...jsonData,
413
+ jsonOptions: {
414
+ ...jsonData.jsonOptions,
415
+ schema: schema,
416
+ },
417
+ };
418
+ }
376
419
  try {
377
420
  const response: AxiosResponse = await axios.post(
378
421
  this.apiUrl + `/v1/scrape`,
@@ -621,6 +664,29 @@ export default class FirecrawlApp {
621
664
  return { success: false, error: "Internal server error." };
622
665
  }
623
666
 
667
+ /**
668
+ * Returns information about crawl errors.
669
+ * @param id - The ID of the crawl operation.
670
+ * @returns Information about crawl errors.
671
+ */
672
+ async checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
673
+ const headers = this.prepareHeaders();
674
+ try {
675
+ const response: AxiosResponse = await this.deleteRequest(
676
+ `${this.apiUrl}/v1/crawl/${id}/errors`,
677
+ headers
678
+ );
679
+ if (response.status === 200) {
680
+ return response.data;
681
+ } else {
682
+ this.handleError(response, "check crawl errors");
683
+ }
684
+ } catch (error: any) {
685
+ throw new FirecrawlError(error.message, 500);
686
+ }
687
+ return { success: false, error: "Internal server error." };
688
+ }
689
+
624
690
  /**
625
691
  * Cancels a crawl job using the Firecrawl API.
626
692
  * @param id - The ID of the crawl operation.
@@ -729,6 +795,23 @@ export default class FirecrawlApp {
729
795
  },
730
796
  };
731
797
  }
798
+ if (jsonData?.jsonOptions?.schema) {
799
+ let schema = jsonData.jsonOptions.schema;
800
+
801
+ // Try parsing the schema as a Zod schema
802
+ try {
803
+ schema = zodToJsonSchema(schema);
804
+ } catch (error) {
805
+
806
+ }
807
+ jsonData = {
808
+ ...jsonData,
809
+ jsonOptions: {
810
+ ...jsonData.jsonOptions,
811
+ schema: schema,
812
+ },
813
+ };
814
+ }
732
815
  try {
733
816
  const response: AxiosResponse = await this.postRequest(
734
817
  this.apiUrl + `/v1/batch/scrape`,
@@ -883,6 +966,29 @@ export default class FirecrawlApp {
883
966
  return { success: false, error: "Internal server error." };
884
967
  }
885
968
 
969
+ /**
970
+ * Returns information about batch scrape errors.
971
+ * @param id - The ID of the batch scrape operation.
972
+ * @returns Information about batch scrape errors.
973
+ */
974
+ async checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
975
+ const headers = this.prepareHeaders();
976
+ try {
977
+ const response: AxiosResponse = await this.deleteRequest(
978
+ `${this.apiUrl}/v1/batch/scrape/${id}/errors`,
979
+ headers
980
+ );
981
+ if (response.status === 200) {
982
+ return response.data;
983
+ } else {
984
+ this.handleError(response, "check batch scrape errors");
985
+ }
986
+ } catch (error: any) {
987
+ throw new FirecrawlError(error.message, 500);
988
+ }
989
+ return { success: false, error: "Internal server error." };
990
+ }
991
+
886
992
  /**
887
993
  * Extracts information from URLs using the Firecrawl API.
888
994
  * Currently in Beta. Expect breaking changes on future minor versions.