firecrawl 1.11.1 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -36,6 +36,7 @@ __export(src_exports, {
36
36
  });
37
37
  module.exports = __toCommonJS(src_exports);
38
38
  var import_axios = __toESM(require("axios"), 1);
39
+ var zt = __toESM(require("zod"), 1);
39
40
  var import_zod_to_json_schema = require("zod-to-json-schema");
40
41
  var import_isows = require("isows");
41
42
  var import_typescript_event_target = require("typescript-event-target");
@@ -263,23 +264,35 @@ var FirecrawlApp = class {
263
264
  if ("data" in statusData) {
264
265
  let data = statusData.data;
265
266
  while (typeof statusData === "object" && "next" in statusData) {
267
+ if (data.length === 0) {
268
+ break;
269
+ }
266
270
  statusData = (await this.getRequest(statusData.next, headers)).data;
267
271
  data = data.concat(statusData.data);
268
272
  }
269
273
  allData = data;
270
274
  }
271
275
  }
272
- return {
276
+ let resp = {
273
277
  success: response.data.success,
274
278
  status: response.data.status,
275
279
  total: response.data.total,
276
280
  completed: response.data.completed,
277
281
  creditsUsed: response.data.creditsUsed,
278
282
  expiresAt: new Date(response.data.expiresAt),
279
- next: response.data.next,
280
- data: allData,
281
- error: response.data.error
283
+ data: allData
282
284
  };
285
+ if (!response.data.success && response.data.error) {
286
+ resp = {
287
+ ...resp,
288
+ success: false,
289
+ error: response.data.error
290
+ };
291
+ }
292
+ if (response.data.next) {
293
+ resp.next = response.data.next;
294
+ }
295
+ return resp;
283
296
  } else {
284
297
  this.handleError(response, "check crawl status");
285
298
  }
@@ -458,23 +471,35 @@ var FirecrawlApp = class {
458
471
  if ("data" in statusData) {
459
472
  let data = statusData.data;
460
473
  while (typeof statusData === "object" && "next" in statusData) {
474
+ if (data.length === 0) {
475
+ break;
476
+ }
461
477
  statusData = (await this.getRequest(statusData.next, headers)).data;
462
478
  data = data.concat(statusData.data);
463
479
  }
464
480
  allData = data;
465
481
  }
466
482
  }
467
- return {
483
+ let resp = {
468
484
  success: response.data.success,
469
485
  status: response.data.status,
470
486
  total: response.data.total,
471
487
  completed: response.data.completed,
472
488
  creditsUsed: response.data.creditsUsed,
473
489
  expiresAt: new Date(response.data.expiresAt),
474
- next: response.data.next,
475
- data: allData,
476
- error: response.data.error
490
+ data: allData
477
491
  };
492
+ if (!response.data.success && response.data.error) {
493
+ resp = {
494
+ ...resp,
495
+ success: false,
496
+ error: response.data.error
497
+ };
498
+ }
499
+ if (response.data.next) {
500
+ resp.next = response.data.next;
501
+ }
502
+ return resp;
478
503
  } else {
479
504
  this.handleError(response, "check batch scrape status");
480
505
  }
@@ -495,28 +520,47 @@ var FirecrawlApp = class {
495
520
  let jsonData = { urls, ...params };
496
521
  let jsonSchema;
497
522
  try {
498
- jsonSchema = params?.schema ? (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema) : void 0;
523
+ if (!params?.schema) {
524
+ jsonSchema = void 0;
525
+ } else if (params.schema instanceof zt.ZodType) {
526
+ jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema);
527
+ } else {
528
+ jsonSchema = params.schema;
529
+ }
499
530
  } catch (error) {
500
- throw new FirecrawlError("Invalid schema. Use a valid Zod schema.", 400);
531
+ throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
501
532
  }
502
533
  try {
503
534
  const response = await this.postRequest(
504
535
  this.apiUrl + `/v1/extract`,
505
- { ...jsonData, schema: jsonSchema },
536
+ { ...jsonData, schema: jsonSchema, origin: "api-sdk" },
506
537
  headers
507
538
  );
508
539
  if (response.status === 200) {
509
- const responseData = response.data;
510
- if (responseData.success) {
511
- return {
512
- success: true,
513
- data: responseData.data,
514
- warning: responseData.warning,
515
- error: responseData.error
516
- };
517
- } else {
518
- throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
519
- }
540
+ const jobId = response.data.id;
541
+ let extractStatus;
542
+ do {
543
+ const statusResponse = await this.getRequest(
544
+ `${this.apiUrl}/v1/extract/${jobId}`,
545
+ headers
546
+ );
547
+ extractStatus = statusResponse.data;
548
+ if (extractStatus.status === "completed") {
549
+ if (extractStatus.success) {
550
+ return {
551
+ success: true,
552
+ data: extractStatus.data,
553
+ warning: extractStatus.warning,
554
+ error: extractStatus.error
555
+ };
556
+ } else {
557
+ throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
558
+ }
559
+ } else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
560
+ throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
561
+ }
562
+ await new Promise((resolve) => setTimeout(resolve, 1e3));
563
+ } while (extractStatus.status !== "completed");
520
564
  } else {
521
565
  this.handleError(response, "extract");
522
566
  }
@@ -525,6 +569,62 @@ var FirecrawlApp = class {
525
569
  }
526
570
  return { success: false, error: "Internal server error." };
527
571
  }
572
+ /**
573
+ * Initiates an asynchronous extract job for a URL using the Firecrawl API.
574
+ * @param url - The URL to extract data from.
575
+ * @param params - Additional parameters for the extract request.
576
+ * @param idempotencyKey - Optional idempotency key for the request.
577
+ * @returns The response from the extract operation.
578
+ */
579
+ async asyncExtract(url, params, idempotencyKey) {
580
+ const headers = this.prepareHeaders(idempotencyKey);
581
+ let jsonData = { url, ...params };
582
+ let jsonSchema;
583
+ try {
584
+ if (params?.schema instanceof zt.ZodType) {
585
+ jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema);
586
+ } else {
587
+ jsonSchema = params?.schema;
588
+ }
589
+ } catch (error) {
590
+ throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
591
+ }
592
+ try {
593
+ const response = await this.postRequest(
594
+ this.apiUrl + `/v1/extract`,
595
+ { ...jsonData, schema: jsonSchema },
596
+ headers
597
+ );
598
+ if (response.status === 200) {
599
+ return response.data;
600
+ } else {
601
+ this.handleError(response, "start extract job");
602
+ }
603
+ } catch (error) {
604
+ throw new FirecrawlError(error.message, 500);
605
+ }
606
+ return { success: false, error: "Internal server error." };
607
+ }
608
+ /**
609
+ * Retrieves the status of an extract job.
610
+ * @param jobId - The ID of the extract job.
611
+ * @returns The status of the extract job.
612
+ */
613
+ async getExtractStatus(jobId) {
614
+ try {
615
+ const response = await this.getRequest(
616
+ `${this.apiUrl}/v1/extract/${jobId}`,
617
+ this.prepareHeaders()
618
+ );
619
+ if (response.status === 200) {
620
+ return response.data;
621
+ } else {
622
+ this.handleError(response, "get extract status");
623
+ }
624
+ } catch (error) {
625
+ throw new FirecrawlError(error.message, 500);
626
+ }
627
+ }
528
628
  /**
529
629
  * Prepares the headers for an API request.
530
630
  * @param idempotencyKey - Optional key to ensure idempotency.
@@ -602,6 +702,9 @@ var FirecrawlApp = class {
602
702
  if ("data" in statusData) {
603
703
  let data = statusData.data;
604
704
  while (typeof statusData === "object" && "next" in statusData) {
705
+ if (data.length === 0) {
706
+ break;
707
+ }
605
708
  statusResponse = await this.getRequest(statusData.next, headers);
606
709
  statusData = statusResponse.data;
607
710
  data = data.concat(statusData.data);
package/dist/index.d.cts CHANGED
@@ -229,7 +229,7 @@ interface MapResponse {
229
229
  */
230
230
  interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
231
231
  prompt?: string;
232
- schema?: LLMSchema;
232
+ schema?: LLMSchema | object;
233
233
  systemPrompt?: string;
234
234
  allowExternalLinks?: boolean;
235
235
  includeSubdomains?: boolean;
@@ -384,6 +384,20 @@ declare class FirecrawlApp {
384
384
  * @returns The response from the extract operation.
385
385
  */
386
386
  extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
387
+ /**
388
+ * Initiates an asynchronous extract job for a URL using the Firecrawl API.
389
+ * @param url - The URL to extract data from.
390
+ * @param params - Additional parameters for the extract request.
391
+ * @param idempotencyKey - Optional idempotency key for the request.
392
+ * @returns The response from the extract operation.
393
+ */
394
+ asyncExtract(url: string, params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
395
+ /**
396
+ * Retrieves the status of an extract job.
397
+ * @param jobId - The ID of the extract job.
398
+ * @returns The status of the extract job.
399
+ */
400
+ getExtractStatus(jobId: string): Promise<any>;
387
401
  /**
388
402
  * Prepares the headers for an API request.
389
403
  * @param idempotencyKey - Optional key to ensure idempotency.
package/dist/index.d.ts CHANGED
@@ -229,7 +229,7 @@ interface MapResponse {
229
229
  */
230
230
  interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
231
231
  prompt?: string;
232
- schema?: LLMSchema;
232
+ schema?: LLMSchema | object;
233
233
  systemPrompt?: string;
234
234
  allowExternalLinks?: boolean;
235
235
  includeSubdomains?: boolean;
@@ -384,6 +384,20 @@ declare class FirecrawlApp {
384
384
  * @returns The response from the extract operation.
385
385
  */
386
386
  extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
387
+ /**
388
+ * Initiates an asynchronous extract job for a URL using the Firecrawl API.
389
+ * @param url - The URL to extract data from.
390
+ * @param params - Additional parameters for the extract request.
391
+ * @param idempotencyKey - Optional idempotency key for the request.
392
+ * @returns The response from the extract operation.
393
+ */
394
+ asyncExtract(url: string, params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
395
+ /**
396
+ * Retrieves the status of an extract job.
397
+ * @param jobId - The ID of the extract job.
398
+ * @returns The status of the extract job.
399
+ */
400
+ getExtractStatus(jobId: string): Promise<any>;
387
401
  /**
388
402
  * Prepares the headers for an API request.
389
403
  * @param idempotencyKey - Optional key to ensure idempotency.
package/dist/index.js CHANGED
@@ -1,5 +1,6 @@
1
1
  // src/index.ts
2
2
  import axios, { AxiosError } from "axios";
3
+ import * as zt from "zod";
3
4
  import { zodToJsonSchema } from "zod-to-json-schema";
4
5
  import { WebSocket } from "isows";
5
6
  import { TypedEventTarget } from "typescript-event-target";
@@ -227,23 +228,35 @@ var FirecrawlApp = class {
227
228
  if ("data" in statusData) {
228
229
  let data = statusData.data;
229
230
  while (typeof statusData === "object" && "next" in statusData) {
231
+ if (data.length === 0) {
232
+ break;
233
+ }
230
234
  statusData = (await this.getRequest(statusData.next, headers)).data;
231
235
  data = data.concat(statusData.data);
232
236
  }
233
237
  allData = data;
234
238
  }
235
239
  }
236
- return {
240
+ let resp = {
237
241
  success: response.data.success,
238
242
  status: response.data.status,
239
243
  total: response.data.total,
240
244
  completed: response.data.completed,
241
245
  creditsUsed: response.data.creditsUsed,
242
246
  expiresAt: new Date(response.data.expiresAt),
243
- next: response.data.next,
244
- data: allData,
245
- error: response.data.error
247
+ data: allData
246
248
  };
249
+ if (!response.data.success && response.data.error) {
250
+ resp = {
251
+ ...resp,
252
+ success: false,
253
+ error: response.data.error
254
+ };
255
+ }
256
+ if (response.data.next) {
257
+ resp.next = response.data.next;
258
+ }
259
+ return resp;
247
260
  } else {
248
261
  this.handleError(response, "check crawl status");
249
262
  }
@@ -422,23 +435,35 @@ var FirecrawlApp = class {
422
435
  if ("data" in statusData) {
423
436
  let data = statusData.data;
424
437
  while (typeof statusData === "object" && "next" in statusData) {
438
+ if (data.length === 0) {
439
+ break;
440
+ }
425
441
  statusData = (await this.getRequest(statusData.next, headers)).data;
426
442
  data = data.concat(statusData.data);
427
443
  }
428
444
  allData = data;
429
445
  }
430
446
  }
431
- return {
447
+ let resp = {
432
448
  success: response.data.success,
433
449
  status: response.data.status,
434
450
  total: response.data.total,
435
451
  completed: response.data.completed,
436
452
  creditsUsed: response.data.creditsUsed,
437
453
  expiresAt: new Date(response.data.expiresAt),
438
- next: response.data.next,
439
- data: allData,
440
- error: response.data.error
454
+ data: allData
441
455
  };
456
+ if (!response.data.success && response.data.error) {
457
+ resp = {
458
+ ...resp,
459
+ success: false,
460
+ error: response.data.error
461
+ };
462
+ }
463
+ if (response.data.next) {
464
+ resp.next = response.data.next;
465
+ }
466
+ return resp;
442
467
  } else {
443
468
  this.handleError(response, "check batch scrape status");
444
469
  }
@@ -459,28 +484,47 @@ var FirecrawlApp = class {
459
484
  let jsonData = { urls, ...params };
460
485
  let jsonSchema;
461
486
  try {
462
- jsonSchema = params?.schema ? zodToJsonSchema(params.schema) : void 0;
487
+ if (!params?.schema) {
488
+ jsonSchema = void 0;
489
+ } else if (params.schema instanceof zt.ZodType) {
490
+ jsonSchema = zodToJsonSchema(params.schema);
491
+ } else {
492
+ jsonSchema = params.schema;
493
+ }
463
494
  } catch (error) {
464
- throw new FirecrawlError("Invalid schema. Use a valid Zod schema.", 400);
495
+ throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
465
496
  }
466
497
  try {
467
498
  const response = await this.postRequest(
468
499
  this.apiUrl + `/v1/extract`,
469
- { ...jsonData, schema: jsonSchema },
500
+ { ...jsonData, schema: jsonSchema, origin: "api-sdk" },
470
501
  headers
471
502
  );
472
503
  if (response.status === 200) {
473
- const responseData = response.data;
474
- if (responseData.success) {
475
- return {
476
- success: true,
477
- data: responseData.data,
478
- warning: responseData.warning,
479
- error: responseData.error
480
- };
481
- } else {
482
- throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
483
- }
504
+ const jobId = response.data.id;
505
+ let extractStatus;
506
+ do {
507
+ const statusResponse = await this.getRequest(
508
+ `${this.apiUrl}/v1/extract/${jobId}`,
509
+ headers
510
+ );
511
+ extractStatus = statusResponse.data;
512
+ if (extractStatus.status === "completed") {
513
+ if (extractStatus.success) {
514
+ return {
515
+ success: true,
516
+ data: extractStatus.data,
517
+ warning: extractStatus.warning,
518
+ error: extractStatus.error
519
+ };
520
+ } else {
521
+ throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
522
+ }
523
+ } else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
524
+ throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
525
+ }
526
+ await new Promise((resolve) => setTimeout(resolve, 1e3));
527
+ } while (extractStatus.status !== "completed");
484
528
  } else {
485
529
  this.handleError(response, "extract");
486
530
  }
@@ -489,6 +533,62 @@ var FirecrawlApp = class {
489
533
  }
490
534
  return { success: false, error: "Internal server error." };
491
535
  }
536
+ /**
537
+ * Initiates an asynchronous extract job for a URL using the Firecrawl API.
538
+ * @param url - The URL to extract data from.
539
+ * @param params - Additional parameters for the extract request.
540
+ * @param idempotencyKey - Optional idempotency key for the request.
541
+ * @returns The response from the extract operation.
542
+ */
543
+ async asyncExtract(url, params, idempotencyKey) {
544
+ const headers = this.prepareHeaders(idempotencyKey);
545
+ let jsonData = { url, ...params };
546
+ let jsonSchema;
547
+ try {
548
+ if (params?.schema instanceof zt.ZodType) {
549
+ jsonSchema = zodToJsonSchema(params.schema);
550
+ } else {
551
+ jsonSchema = params?.schema;
552
+ }
553
+ } catch (error) {
554
+ throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
555
+ }
556
+ try {
557
+ const response = await this.postRequest(
558
+ this.apiUrl + `/v1/extract`,
559
+ { ...jsonData, schema: jsonSchema },
560
+ headers
561
+ );
562
+ if (response.status === 200) {
563
+ return response.data;
564
+ } else {
565
+ this.handleError(response, "start extract job");
566
+ }
567
+ } catch (error) {
568
+ throw new FirecrawlError(error.message, 500);
569
+ }
570
+ return { success: false, error: "Internal server error." };
571
+ }
572
+ /**
573
+ * Retrieves the status of an extract job.
574
+ * @param jobId - The ID of the extract job.
575
+ * @returns The status of the extract job.
576
+ */
577
+ async getExtractStatus(jobId) {
578
+ try {
579
+ const response = await this.getRequest(
580
+ `${this.apiUrl}/v1/extract/${jobId}`,
581
+ this.prepareHeaders()
582
+ );
583
+ if (response.status === 200) {
584
+ return response.data;
585
+ } else {
586
+ this.handleError(response, "get extract status");
587
+ }
588
+ } catch (error) {
589
+ throw new FirecrawlError(error.message, 500);
590
+ }
591
+ }
492
592
  /**
493
593
  * Prepares the headers for an API request.
494
594
  * @param idempotencyKey - Optional key to ensure idempotency.
@@ -566,6 +666,9 @@ var FirecrawlApp = class {
566
666
  if ("data" in statusData) {
567
667
  let data = statusData.data;
568
668
  while (typeof statusData === "object" && "next" in statusData) {
669
+ if (data.length === 0) {
670
+ break;
671
+ }
569
672
  statusResponse = await this.getRequest(statusData.next, headers);
570
673
  statusData = statusResponse.data;
571
674
  data = data.concat(statusData.data);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "1.11.1",
3
+ "version": "1.12.0",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",