firecrawl 1.14.1 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -93,6 +93,20 @@ var FirecrawlApp = class {
93
93
  }
94
94
  };
95
95
  }
96
+ if (jsonData?.jsonOptions?.schema) {
97
+ let schema = jsonData.jsonOptions.schema;
98
+ try {
99
+ schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
100
+ } catch (error) {
101
+ }
102
+ jsonData = {
103
+ ...jsonData,
104
+ jsonOptions: {
105
+ ...jsonData.jsonOptions,
106
+ schema
107
+ }
108
+ };
109
+ }
96
110
  try {
97
111
  const response = await import_axios.default.post(
98
112
  this.apiUrl + `/v1/scrape`,
@@ -314,6 +328,28 @@ var FirecrawlApp = class {
314
328
  }
315
329
  return { success: false, error: "Internal server error." };
316
330
  }
331
+ /**
332
+ * Returns information about crawl errors.
333
+ * @param id - The ID of the crawl operation.
334
+ * @returns Information about crawl errors.
335
+ */
336
+ async checkCrawlErrors(id) {
337
+ const headers = this.prepareHeaders();
338
+ try {
339
+ const response = await this.deleteRequest(
340
+ `${this.apiUrl}/v1/crawl/${id}/errors`,
341
+ headers
342
+ );
343
+ if (response.status === 200) {
344
+ return response.data;
345
+ } else {
346
+ this.handleError(response, "check crawl errors");
347
+ }
348
+ } catch (error) {
349
+ throw new FirecrawlError(error.message, 500);
350
+ }
351
+ return { success: false, error: "Internal server error." };
352
+ }
317
353
  /**
318
354
  * Cancels a crawl job using the Firecrawl API.
319
355
  * @param id - The ID of the crawl operation.
@@ -402,6 +438,20 @@ var FirecrawlApp = class {
402
438
  }
403
439
  };
404
440
  }
441
+ if (jsonData?.jsonOptions?.schema) {
442
+ let schema = jsonData.jsonOptions.schema;
443
+ try {
444
+ schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
445
+ } catch (error) {
446
+ }
447
+ jsonData = {
448
+ ...jsonData,
449
+ jsonOptions: {
450
+ ...jsonData.jsonOptions,
451
+ schema
452
+ }
453
+ };
454
+ }
405
455
  try {
406
456
  const response = await this.postRequest(
407
457
  this.apiUrl + `/v1/batch/scrape`,
@@ -532,6 +582,28 @@ var FirecrawlApp = class {
532
582
  }
533
583
  return { success: false, error: "Internal server error." };
534
584
  }
585
+ /**
586
+ * Returns information about batch scrape errors.
587
+ * @param id - The ID of the batch scrape operation.
588
+ * @returns Information about batch scrape errors.
589
+ */
590
+ async checkBatchScrapeErrors(id) {
591
+ const headers = this.prepareHeaders();
592
+ try {
593
+ const response = await this.deleteRequest(
594
+ `${this.apiUrl}/v1/batch/scrape/${id}/errors`,
595
+ headers
596
+ );
597
+ if (response.status === 200) {
598
+ return response.data;
599
+ } else {
600
+ this.handleError(response, "check batch scrape errors");
601
+ }
602
+ } catch (error) {
603
+ throw new FirecrawlError(error.message, 500);
604
+ }
605
+ return { success: false, error: "Internal server error." };
606
+ }
535
607
  /**
536
608
  * Extracts information from URLs using the Firecrawl API.
537
609
  * Currently in Beta. Expect breaking changes on future minor versions.
package/dist/index.d.cts CHANGED
@@ -72,7 +72,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
72
72
  * Defines the options and configurations available for scraping web content.
73
73
  */
74
74
  interface CrawlScrapeOptions {
75
- formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
75
+ formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
76
76
  headers?: Record<string, string>;
77
77
  includeTags?: string[];
78
78
  excludeTags?: string[];
@@ -119,6 +119,11 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
119
119
  schema?: LLMSchema;
120
120
  systemPrompt?: string;
121
121
  };
122
+ json?: {
123
+ prompt?: string;
124
+ schema?: LLMSchema;
125
+ systemPrompt?: string;
126
+ };
122
127
  actions?: ActionsSchema;
123
128
  }
124
129
  interface ActionsResult {
@@ -286,6 +291,24 @@ interface SearchResponse {
286
291
  warning?: string;
287
292
  error?: string;
288
293
  }
294
+ /**
295
+ * Response interface for crawl/batch scrape error monitoring.
296
+ */
297
+ interface CrawlErrorsResponse {
298
+ /**
299
+ * Scrapes that errored out + error details
300
+ */
301
+ errors: {
302
+ id: string;
303
+ timestamp?: string;
304
+ url: string;
305
+ error: string;
306
+ }[];
307
+ /**
308
+ * URLs blocked by robots.txt
309
+ */
310
+ robotsBlocked: string[];
311
+ }
289
312
  /**
290
313
  * Main class for interacting with the Firecrawl API.
291
314
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -333,6 +356,12 @@ declare class FirecrawlApp {
333
356
  * @returns The response containing the job status.
334
357
  */
335
358
  checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
359
+ /**
360
+ * Returns information about crawl errors.
361
+ * @param id - The ID of the crawl operation.
362
+ * @returns Information about crawl errors.
363
+ */
364
+ checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
336
365
  /**
337
366
  * Cancels a crawl job using the Firecrawl API.
338
367
  * @param id - The ID of the crawl operation.
@@ -383,6 +412,12 @@ declare class FirecrawlApp {
383
412
  * @returns The response containing the job status.
384
413
  */
385
414
  checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
415
+ /**
416
+ * Returns information about batch scrape errors.
417
+ * @param id - The ID of the batch scrape operation.
418
+ * @returns Information about batch scrape errors.
419
+ */
420
+ checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
386
421
  /**
387
422
  * Extracts information from URLs using the Firecrawl API.
388
423
  * Currently in Beta. Expect breaking changes on future minor versions.
@@ -470,4 +505,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
470
505
  close(): void;
471
506
  }
472
507
 
473
- export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
508
+ export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
package/dist/index.d.ts CHANGED
@@ -72,7 +72,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
72
72
  * Defines the options and configurations available for scraping web content.
73
73
  */
74
74
  interface CrawlScrapeOptions {
75
- formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
75
+ formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
76
76
  headers?: Record<string, string>;
77
77
  includeTags?: string[];
78
78
  excludeTags?: string[];
@@ -119,6 +119,11 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
119
119
  schema?: LLMSchema;
120
120
  systemPrompt?: string;
121
121
  };
122
+ json?: {
123
+ prompt?: string;
124
+ schema?: LLMSchema;
125
+ systemPrompt?: string;
126
+ };
122
127
  actions?: ActionsSchema;
123
128
  }
124
129
  interface ActionsResult {
@@ -286,6 +291,24 @@ interface SearchResponse {
286
291
  warning?: string;
287
292
  error?: string;
288
293
  }
294
+ /**
295
+ * Response interface for crawl/batch scrape error monitoring.
296
+ */
297
+ interface CrawlErrorsResponse {
298
+ /**
299
+ * Scrapes that errored out + error details
300
+ */
301
+ errors: {
302
+ id: string;
303
+ timestamp?: string;
304
+ url: string;
305
+ error: string;
306
+ }[];
307
+ /**
308
+ * URLs blocked by robots.txt
309
+ */
310
+ robotsBlocked: string[];
311
+ }
289
312
  /**
290
313
  * Main class for interacting with the Firecrawl API.
291
314
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -333,6 +356,12 @@ declare class FirecrawlApp {
333
356
  * @returns The response containing the job status.
334
357
  */
335
358
  checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
359
+ /**
360
+ * Returns information about crawl errors.
361
+ * @param id - The ID of the crawl operation.
362
+ * @returns Information about crawl errors.
363
+ */
364
+ checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
336
365
  /**
337
366
  * Cancels a crawl job using the Firecrawl API.
338
367
  * @param id - The ID of the crawl operation.
@@ -383,6 +412,12 @@ declare class FirecrawlApp {
383
412
  * @returns The response containing the job status.
384
413
  */
385
414
  checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
415
+ /**
416
+ * Returns information about batch scrape errors.
417
+ * @param id - The ID of the batch scrape operation.
418
+ * @returns Information about batch scrape errors.
419
+ */
420
+ checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
386
421
  /**
387
422
  * Extracts information from URLs using the Firecrawl API.
388
423
  * Currently in Beta. Expect breaking changes on future minor versions.
@@ -470,4 +505,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
470
505
  close(): void;
471
506
  }
472
507
 
473
- export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
508
+ export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
package/dist/index.js CHANGED
@@ -57,6 +57,20 @@ var FirecrawlApp = class {
57
57
  }
58
58
  };
59
59
  }
60
+ if (jsonData?.jsonOptions?.schema) {
61
+ let schema = jsonData.jsonOptions.schema;
62
+ try {
63
+ schema = zodToJsonSchema(schema);
64
+ } catch (error) {
65
+ }
66
+ jsonData = {
67
+ ...jsonData,
68
+ jsonOptions: {
69
+ ...jsonData.jsonOptions,
70
+ schema
71
+ }
72
+ };
73
+ }
60
74
  try {
61
75
  const response = await axios.post(
62
76
  this.apiUrl + `/v1/scrape`,
@@ -278,6 +292,28 @@ var FirecrawlApp = class {
278
292
  }
279
293
  return { success: false, error: "Internal server error." };
280
294
  }
295
+ /**
296
+ * Returns information about crawl errors.
297
+ * @param id - The ID of the crawl operation.
298
+ * @returns Information about crawl errors.
299
+ */
300
+ async checkCrawlErrors(id) {
301
+ const headers = this.prepareHeaders();
302
+ try {
303
+ const response = await this.deleteRequest(
304
+ `${this.apiUrl}/v1/crawl/${id}/errors`,
305
+ headers
306
+ );
307
+ if (response.status === 200) {
308
+ return response.data;
309
+ } else {
310
+ this.handleError(response, "check crawl errors");
311
+ }
312
+ } catch (error) {
313
+ throw new FirecrawlError(error.message, 500);
314
+ }
315
+ return { success: false, error: "Internal server error." };
316
+ }
281
317
  /**
282
318
  * Cancels a crawl job using the Firecrawl API.
283
319
  * @param id - The ID of the crawl operation.
@@ -366,6 +402,20 @@ var FirecrawlApp = class {
366
402
  }
367
403
  };
368
404
  }
405
+ if (jsonData?.jsonOptions?.schema) {
406
+ let schema = jsonData.jsonOptions.schema;
407
+ try {
408
+ schema = zodToJsonSchema(schema);
409
+ } catch (error) {
410
+ }
411
+ jsonData = {
412
+ ...jsonData,
413
+ jsonOptions: {
414
+ ...jsonData.jsonOptions,
415
+ schema
416
+ }
417
+ };
418
+ }
369
419
  try {
370
420
  const response = await this.postRequest(
371
421
  this.apiUrl + `/v1/batch/scrape`,
@@ -496,6 +546,28 @@ var FirecrawlApp = class {
496
546
  }
497
547
  return { success: false, error: "Internal server error." };
498
548
  }
549
+ /**
550
+ * Returns information about batch scrape errors.
551
+ * @param id - The ID of the batch scrape operation.
552
+ * @returns Information about batch scrape errors.
553
+ */
554
+ async checkBatchScrapeErrors(id) {
555
+ const headers = this.prepareHeaders();
556
+ try {
557
+ const response = await this.deleteRequest(
558
+ `${this.apiUrl}/v1/batch/scrape/${id}/errors`,
559
+ headers
560
+ );
561
+ if (response.status === 200) {
562
+ return response.data;
563
+ } else {
564
+ this.handleError(response, "check batch scrape errors");
565
+ }
566
+ } catch (error) {
567
+ throw new FirecrawlError(error.message, 500);
568
+ }
569
+ return { success: false, error: "Internal server error." };
570
+ }
499
571
  /**
500
572
  * Extracts information from URLs using the Firecrawl API.
501
573
  * Currently in Beta. Expect breaking changes on future minor versions.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "1.14.1",
3
+ "version": "1.15.0",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
package/src/index.ts CHANGED
@@ -78,7 +78,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
78
78
  * Defines the options and configurations available for scraping web content.
79
79
  */
80
80
  export interface CrawlScrapeOptions {
81
- formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
81
+ formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
82
82
  headers?: Record<string, string>;
83
83
  includeTags?: string[];
84
84
  excludeTags?: string[];
@@ -127,6 +127,11 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
127
127
  schema?: LLMSchema;
128
128
  systemPrompt?: string;
129
129
  };
130
+ json?:{
131
+ prompt?: string;
132
+ schema?: LLMSchema;
133
+ systemPrompt?: string;
134
+ }
130
135
  actions?: ActionsSchema;
131
136
  }
132
137
 
@@ -314,6 +319,26 @@ export interface SearchResponse {
314
319
  error?: string;
315
320
  }
316
321
 
322
+ /**
323
+ * Response interface for crawl/batch scrape error monitoring.
324
+ */
325
+ export interface CrawlErrorsResponse {
326
+ /**
327
+ * Scrapes that errored out + error details
328
+ */
329
+ errors: {
330
+ id: string,
331
+ timestamp?: string,
332
+ url: string,
333
+ error: string,
334
+ }[];
335
+
336
+ /**
337
+ * URLs blocked by robots.txt
338
+ */
339
+ robotsBlocked: string[];
340
+ };
341
+
317
342
  /**
318
343
  * Main class for interacting with the Firecrawl API.
319
344
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -373,6 +398,23 @@ export default class FirecrawlApp {
373
398
  },
374
399
  };
375
400
  }
401
+
402
+ if (jsonData?.jsonOptions?.schema) {
403
+ let schema = jsonData.jsonOptions.schema;
404
+ // Try parsing the schema as a Zod schema
405
+ try {
406
+ schema = zodToJsonSchema(schema);
407
+ } catch (error) {
408
+
409
+ }
410
+ jsonData = {
411
+ ...jsonData,
412
+ jsonOptions: {
413
+ ...jsonData.jsonOptions,
414
+ schema: schema,
415
+ },
416
+ };
417
+ }
376
418
  try {
377
419
  const response: AxiosResponse = await axios.post(
378
420
  this.apiUrl + `/v1/scrape`,
@@ -621,6 +663,29 @@ export default class FirecrawlApp {
621
663
  return { success: false, error: "Internal server error." };
622
664
  }
623
665
 
666
+ /**
667
+ * Returns information about crawl errors.
668
+ * @param id - The ID of the crawl operation.
669
+ * @returns Information about crawl errors.
670
+ */
671
+ async checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
672
+ const headers = this.prepareHeaders();
673
+ try {
674
+ const response: AxiosResponse = await this.deleteRequest(
675
+ `${this.apiUrl}/v1/crawl/${id}/errors`,
676
+ headers
677
+ );
678
+ if (response.status === 200) {
679
+ return response.data;
680
+ } else {
681
+ this.handleError(response, "check crawl errors");
682
+ }
683
+ } catch (error: any) {
684
+ throw new FirecrawlError(error.message, 500);
685
+ }
686
+ return { success: false, error: "Internal server error." };
687
+ }
688
+
624
689
  /**
625
690
  * Cancels a crawl job using the Firecrawl API.
626
691
  * @param id - The ID of the crawl operation.
@@ -729,6 +794,23 @@ export default class FirecrawlApp {
729
794
  },
730
795
  };
731
796
  }
797
+ if (jsonData?.jsonOptions?.schema) {
798
+ let schema = jsonData.jsonOptions.schema;
799
+
800
+ // Try parsing the schema as a Zod schema
801
+ try {
802
+ schema = zodToJsonSchema(schema);
803
+ } catch (error) {
804
+
805
+ }
806
+ jsonData = {
807
+ ...jsonData,
808
+ jsonOptions: {
809
+ ...jsonData.jsonOptions,
810
+ schema: schema,
811
+ },
812
+ };
813
+ }
732
814
  try {
733
815
  const response: AxiosResponse = await this.postRequest(
734
816
  this.apiUrl + `/v1/batch/scrape`,
@@ -883,6 +965,29 @@ export default class FirecrawlApp {
883
965
  return { success: false, error: "Internal server error." };
884
966
  }
885
967
 
968
+ /**
969
+ * Returns information about batch scrape errors.
970
+ * @param id - The ID of the batch scrape operation.
971
+ * @returns Information about batch scrape errors.
972
+ */
973
+ async checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse> {
974
+ const headers = this.prepareHeaders();
975
+ try {
976
+ const response: AxiosResponse = await this.deleteRequest(
977
+ `${this.apiUrl}/v1/batch/scrape/${id}/errors`,
978
+ headers
979
+ );
980
+ if (response.status === 200) {
981
+ return response.data;
982
+ } else {
983
+ this.handleError(response, "check batch scrape errors");
984
+ }
985
+ } catch (error: any) {
986
+ throw new FirecrawlError(error.message, 500);
987
+ }
988
+ return { success: false, error: "Internal server error." };
989
+ }
990
+
886
991
  /**
887
992
  * Extracts information from URLs using the Firecrawl API.
888
993
  * Currently in Beta. Expect breaking changes on future minor versions.