firecrawl 1.18.1 → 1.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -6,9 +6,11 @@ import { WebSocket } from "isows";
6
6
  import { TypedEventTarget } from "typescript-event-target";
7
7
  var FirecrawlError = class extends Error {
8
8
  statusCode;
9
- constructor(message, statusCode) {
9
+ details;
10
+ constructor(message, statusCode, details) {
10
11
  super(message);
11
12
  this.statusCode = statusCode;
13
+ this.details = details;
12
14
  }
13
15
  };
14
16
  var FirecrawlApp = class {
@@ -55,6 +57,20 @@ var FirecrawlApp = class {
55
57
  }
56
58
  };
57
59
  }
60
+ if (jsonData?.jsonOptions?.schema) {
61
+ let schema = jsonData.jsonOptions.schema;
62
+ try {
63
+ schema = zodToJsonSchema(schema);
64
+ } catch (error) {
65
+ }
66
+ jsonData = {
67
+ ...jsonData,
68
+ jsonOptions: {
69
+ ...jsonData.jsonOptions,
70
+ schema
71
+ }
72
+ };
73
+ }
58
74
  try {
59
75
  const response = await axios.post(
60
76
  this.apiUrl + `/v1/scrape`,
@@ -209,16 +225,26 @@ var FirecrawlApp = class {
209
225
  * Checks the status of a crawl job using the Firecrawl API.
210
226
  * @param id - The ID of the crawl operation.
211
227
  * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
228
+ * @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
229
+ * @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
230
+ * @param limit - How many entries to return. Only used when `getAllData = false`.
212
231
  * @returns The response containing the job status.
213
232
  */
214
- async checkCrawlStatus(id, getAllData = false) {
233
+ async checkCrawlStatus(id, getAllData = false, nextURL, skip, limit) {
215
234
  if (!id) {
216
235
  throw new FirecrawlError("No crawl ID provided", 400);
217
236
  }
218
237
  const headers = this.prepareHeaders();
238
+ const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/crawl/${id}`);
239
+ if (skip !== void 0) {
240
+ targetURL.searchParams.set("skip", skip.toString());
241
+ }
242
+ if (limit !== void 0) {
243
+ targetURL.searchParams.set("limit", limit.toString());
244
+ }
219
245
  try {
220
246
  const response = await this.getRequest(
221
- `${this.apiUrl}/v1/crawl/${id}`,
247
+ targetURL.href,
222
248
  headers
223
249
  );
224
250
  if (response.status === 200) {
@@ -243,6 +269,7 @@ var FirecrawlApp = class {
243
269
  total: response.data.total,
244
270
  completed: response.data.completed,
245
271
  creditsUsed: response.data.creditsUsed,
272
+ next: getAllData ? void 0 : response.data.next,
246
273
  expiresAt: new Date(response.data.expiresAt),
247
274
  data: allData
248
275
  };
@@ -265,6 +292,28 @@ var FirecrawlApp = class {
265
292
  }
266
293
  return { success: false, error: "Internal server error." };
267
294
  }
295
+ /**
296
+ * Returns information about crawl errors.
297
+ * @param id - The ID of the crawl operation.
298
+ * @returns Information about crawl errors.
299
+ */
300
+ async checkCrawlErrors(id) {
301
+ const headers = this.prepareHeaders();
302
+ try {
303
+ const response = await this.deleteRequest(
304
+ `${this.apiUrl}/v1/crawl/${id}/errors`,
305
+ headers
306
+ );
307
+ if (response.status === 200) {
308
+ return response.data;
309
+ } else {
310
+ this.handleError(response, "check crawl errors");
311
+ }
312
+ } catch (error) {
313
+ throw new FirecrawlError(error.message, 500);
314
+ }
315
+ return { success: false, error: "Internal server error." };
316
+ }
268
317
  /**
269
318
  * Cancels a crawl job using the Firecrawl API.
270
319
  * @param id - The ID of the crawl operation.
@@ -353,6 +402,20 @@ var FirecrawlApp = class {
353
402
  }
354
403
  };
355
404
  }
405
+ if (jsonData?.jsonOptions?.schema) {
406
+ let schema = jsonData.jsonOptions.schema;
407
+ try {
408
+ schema = zodToJsonSchema(schema);
409
+ } catch (error) {
410
+ }
411
+ jsonData = {
412
+ ...jsonData,
413
+ jsonOptions: {
414
+ ...jsonData.jsonOptions,
415
+ schema
416
+ }
417
+ };
418
+ }
356
419
  try {
357
420
  const response = await this.postRequest(
358
421
  this.apiUrl + `/v1/batch/scrape`,
@@ -416,16 +479,26 @@ var FirecrawlApp = class {
416
479
  * Checks the status of a batch scrape job using the Firecrawl API.
417
480
  * @param id - The ID of the batch scrape operation.
418
481
  * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
482
+ * @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
483
+ * @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
484
+ * @param limit - How many entries to return. Only used when `getAllData = false`.
419
485
  * @returns The response containing the job status.
420
486
  */
421
- async checkBatchScrapeStatus(id, getAllData = false) {
487
+ async checkBatchScrapeStatus(id, getAllData = false, nextURL, skip, limit) {
422
488
  if (!id) {
423
489
  throw new FirecrawlError("No batch scrape ID provided", 400);
424
490
  }
425
491
  const headers = this.prepareHeaders();
492
+ const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/batch/scrape/${id}`);
493
+ if (skip !== void 0) {
494
+ targetURL.searchParams.set("skip", skip.toString());
495
+ }
496
+ if (limit !== void 0) {
497
+ targetURL.searchParams.set("limit", limit.toString());
498
+ }
426
499
  try {
427
500
  const response = await this.getRequest(
428
- `${this.apiUrl}/v1/batch/scrape/${id}`,
501
+ targetURL.href,
429
502
  headers
430
503
  );
431
504
  if (response.status === 200) {
@@ -450,6 +523,7 @@ var FirecrawlApp = class {
450
523
  total: response.data.total,
451
524
  completed: response.data.completed,
452
525
  creditsUsed: response.data.creditsUsed,
526
+ next: getAllData ? void 0 : response.data.next,
453
527
  expiresAt: new Date(response.data.expiresAt),
454
528
  data: allData
455
529
  };
@@ -472,6 +546,28 @@ var FirecrawlApp = class {
472
546
  }
473
547
  return { success: false, error: "Internal server error." };
474
548
  }
549
+ /**
550
+ * Returns information about batch scrape errors.
551
+ * @param id - The ID of the batch scrape operation.
552
+ * @returns Information about batch scrape errors.
553
+ */
554
+ async checkBatchScrapeErrors(id) {
555
+ const headers = this.prepareHeaders();
556
+ try {
557
+ const response = await this.deleteRequest(
558
+ `${this.apiUrl}/v1/batch/scrape/${id}/errors`,
559
+ headers
560
+ );
561
+ if (response.status === 200) {
562
+ return response.data;
563
+ } else {
564
+ this.handleError(response, "check batch scrape errors");
565
+ }
566
+ } catch (error) {
567
+ throw new FirecrawlError(error.message, 500);
568
+ }
569
+ return { success: false, error: "Internal server error." };
570
+ }
475
571
  /**
476
572
  * Extracts information from URLs using the Firecrawl API.
477
573
  * Currently in Beta. Expect breaking changes on future minor versions.
@@ -497,29 +593,99 @@ var FirecrawlApp = class {
497
593
  try {
498
594
  const response = await this.postRequest(
499
595
  this.apiUrl + `/v1/extract`,
500
- { ...jsonData, schema: jsonSchema },
596
+ { ...jsonData, schema: jsonSchema, origin: params?.origin || "api-sdk" },
501
597
  headers
502
598
  );
503
599
  if (response.status === 200) {
504
- const responseData = response.data;
505
- if (responseData.success) {
506
- return {
507
- success: true,
508
- data: responseData.data,
509
- warning: responseData.warning,
510
- error: responseData.error
511
- };
512
- } else {
513
- throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
514
- }
600
+ const jobId = response.data.id;
601
+ let extractStatus;
602
+ do {
603
+ const statusResponse = await this.getRequest(
604
+ `${this.apiUrl}/v1/extract/${jobId}`,
605
+ headers
606
+ );
607
+ extractStatus = statusResponse.data;
608
+ if (extractStatus.status === "completed") {
609
+ if (extractStatus.success) {
610
+ return {
611
+ success: true,
612
+ data: extractStatus.data,
613
+ warning: extractStatus.warning,
614
+ error: extractStatus.error,
615
+ sources: extractStatus?.sources || void 0
616
+ };
617
+ } else {
618
+ throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
619
+ }
620
+ } else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
621
+ throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
622
+ }
623
+ await new Promise((resolve) => setTimeout(resolve, 1e3));
624
+ } while (extractStatus.status !== "completed");
515
625
  } else {
516
626
  this.handleError(response, "extract");
517
627
  }
518
628
  } catch (error) {
519
- throw new FirecrawlError(error.message, 500);
629
+ throw new FirecrawlError(error.message, 500, error.response?.data?.details);
630
+ }
631
+ return { success: false, error: "Internal server error." };
632
+ }
633
+ /**
634
+ * Initiates an asynchronous extract job for a URL using the Firecrawl API.
635
+ * @param url - The URL to extract data from.
636
+ * @param params - Additional parameters for the extract request.
637
+ * @param idempotencyKey - Optional idempotency key for the request.
638
+ * @returns The response from the extract operation.
639
+ */
640
+ async asyncExtract(urls, params, idempotencyKey) {
641
+ const headers = this.prepareHeaders(idempotencyKey);
642
+ let jsonData = { urls, ...params };
643
+ let jsonSchema;
644
+ try {
645
+ if (params?.schema instanceof zt.ZodType) {
646
+ jsonSchema = zodToJsonSchema(params.schema);
647
+ } else {
648
+ jsonSchema = params?.schema;
649
+ }
650
+ } catch (error) {
651
+ throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
652
+ }
653
+ try {
654
+ const response = await this.postRequest(
655
+ this.apiUrl + `/v1/extract`,
656
+ { ...jsonData, schema: jsonSchema },
657
+ headers
658
+ );
659
+ if (response.status === 200) {
660
+ return response.data;
661
+ } else {
662
+ this.handleError(response, "start extract job");
663
+ }
664
+ } catch (error) {
665
+ throw new FirecrawlError(error.message, 500, error.response?.data?.details);
520
666
  }
521
667
  return { success: false, error: "Internal server error." };
522
668
  }
669
+ /**
670
+ * Retrieves the status of an extract job.
671
+ * @param jobId - The ID of the extract job.
672
+ * @returns The status of the extract job.
673
+ */
674
+ async getExtractStatus(jobId) {
675
+ try {
676
+ const response = await this.getRequest(
677
+ `${this.apiUrl}/v1/extract/${jobId}`,
678
+ this.prepareHeaders()
679
+ );
680
+ if (response.status === 200) {
681
+ return response.data;
682
+ } else {
683
+ this.handleError(response, "get extract status");
684
+ }
685
+ } catch (error) {
686
+ throw new FirecrawlError(error.message, 500);
687
+ }
688
+ }
523
689
  /**
524
690
  * Prepares the headers for an API request.
525
691
  * @param idempotencyKey - Optional key to ensure idempotency.
@@ -634,11 +800,13 @@ var FirecrawlApp = class {
634
800
  * @param {string} action - The action being performed when the error occurred.
635
801
  */
636
802
  handleError(response, action) {
637
- if ([402, 408, 409, 500].includes(response.status)) {
803
+ if ([400, 402, 408, 409, 500].includes(response.status)) {
638
804
  const errorMessage = response.data.error || "Unknown error occurred";
805
+ const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
639
806
  throw new FirecrawlError(
640
- `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`,
641
- response.status
807
+ `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}${details}`,
808
+ response.status,
809
+ response?.data?.details
642
810
  );
643
811
  } else {
644
812
  throw new FirecrawlError(
@@ -647,6 +815,198 @@ var FirecrawlApp = class {
647
815
  );
648
816
  }
649
817
  }
818
+ /**
819
+ * Initiates a deep research operation on a given topic and polls until completion.
820
+ * @param params - Parameters for the deep research operation.
821
+ * @returns The final research results.
822
+ */
823
+ async __deepResearch(topic, params) {
824
+ try {
825
+ const response = await this.__asyncDeepResearch(topic, params);
826
+ if (!response.success || "error" in response) {
827
+ return { success: false, error: "error" in response ? response.error : "Unknown error" };
828
+ }
829
+ if (!response.id) {
830
+ throw new FirecrawlError(`Failed to start research. No job ID returned.`, 500);
831
+ }
832
+ const jobId = response.id;
833
+ let researchStatus;
834
+ while (true) {
835
+ researchStatus = await this.__checkDeepResearchStatus(jobId);
836
+ if ("error" in researchStatus && !researchStatus.success) {
837
+ return researchStatus;
838
+ }
839
+ if (researchStatus.status === "completed") {
840
+ return researchStatus;
841
+ }
842
+ if (researchStatus.status === "failed") {
843
+ throw new FirecrawlError(
844
+ `Research job ${researchStatus.status}. Error: ${researchStatus.error}`,
845
+ 500
846
+ );
847
+ }
848
+ if (researchStatus.status !== "processing") {
849
+ break;
850
+ }
851
+ await new Promise((resolve) => setTimeout(resolve, 2e3));
852
+ }
853
+ return { success: false, error: "Research job terminated unexpectedly" };
854
+ } catch (error) {
855
+ throw new FirecrawlError(error.message, 500, error.response?.data?.details);
856
+ }
857
+ }
858
+ /**
859
+ * Initiates a deep research operation on a given topic without polling.
860
+ * @param params - Parameters for the deep research operation.
861
+ * @returns The response containing the research job ID.
862
+ */
863
+ async __asyncDeepResearch(topic, params) {
864
+ const headers = this.prepareHeaders();
865
+ try {
866
+ const response = await this.postRequest(
867
+ `${this.apiUrl}/v1/deep-research`,
868
+ { topic, ...params },
869
+ headers
870
+ );
871
+ if (response.status === 200) {
872
+ return response.data;
873
+ } else {
874
+ this.handleError(response, "start deep research");
875
+ }
876
+ } catch (error) {
877
+ if (error.response?.data?.error) {
878
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
879
+ } else {
880
+ throw new FirecrawlError(error.message, 500);
881
+ }
882
+ }
883
+ return { success: false, error: "Internal server error." };
884
+ }
885
+ /**
886
+ * Checks the status of a deep research operation.
887
+ * @param id - The ID of the deep research operation.
888
+ * @returns The current status and results of the research operation.
889
+ */
890
+ async __checkDeepResearchStatus(id) {
891
+ const headers = this.prepareHeaders();
892
+ try {
893
+ const response = await this.getRequest(
894
+ `${this.apiUrl}/v1/deep-research/${id}`,
895
+ headers
896
+ );
897
+ if (response.status === 200) {
898
+ return response.data;
899
+ } else if (response.status === 404) {
900
+ throw new FirecrawlError("Deep research job not found", 404);
901
+ } else {
902
+ this.handleError(response, "check deep research status");
903
+ }
904
+ } catch (error) {
905
+ if (error.response?.data?.error) {
906
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
907
+ } else {
908
+ throw new FirecrawlError(error.message, 500);
909
+ }
910
+ }
911
+ return { success: false, error: "Internal server error." };
912
+ }
913
+ /**
914
+ * Generates LLMs.txt for a given URL and polls until completion.
915
+ * @param url - The URL to generate LLMs.txt from.
916
+ * @param params - Parameters for the LLMs.txt generation operation.
917
+ * @returns The final generation results.
918
+ */
919
+ async generateLLMsText(url, params) {
920
+ try {
921
+ const response = await this.asyncGenerateLLMsText(url, params);
922
+ if (!response.success || "error" in response) {
923
+ return { success: false, error: "error" in response ? response.error : "Unknown error" };
924
+ }
925
+ if (!response.id) {
926
+ throw new FirecrawlError(`Failed to start LLMs.txt generation. No job ID returned.`, 500);
927
+ }
928
+ const jobId = response.id;
929
+ let generationStatus;
930
+ while (true) {
931
+ generationStatus = await this.checkGenerateLLMsTextStatus(jobId);
932
+ if ("error" in generationStatus && !generationStatus.success) {
933
+ return generationStatus;
934
+ }
935
+ if (generationStatus.status === "completed") {
936
+ return generationStatus;
937
+ }
938
+ if (generationStatus.status === "failed") {
939
+ throw new FirecrawlError(
940
+ `LLMs.txt generation job ${generationStatus.status}. Error: ${generationStatus.error}`,
941
+ 500
942
+ );
943
+ }
944
+ if (generationStatus.status !== "processing") {
945
+ break;
946
+ }
947
+ await new Promise((resolve) => setTimeout(resolve, 2e3));
948
+ }
949
+ return { success: false, error: "LLMs.txt generation job terminated unexpectedly" };
950
+ } catch (error) {
951
+ throw new FirecrawlError(error.message, 500, error.response?.data?.details);
952
+ }
953
+ }
954
+ /**
955
+ * Initiates a LLMs.txt generation operation without polling.
956
+ * @param url - The URL to generate LLMs.txt from.
957
+ * @param params - Parameters for the LLMs.txt generation operation.
958
+ * @returns The response containing the generation job ID.
959
+ */
960
+ async asyncGenerateLLMsText(url, params) {
961
+ const headers = this.prepareHeaders();
962
+ try {
963
+ const response = await this.postRequest(
964
+ `${this.apiUrl}/v1/llmstxt`,
965
+ { url, ...params },
966
+ headers
967
+ );
968
+ if (response.status === 200) {
969
+ return response.data;
970
+ } else {
971
+ this.handleError(response, "start LLMs.txt generation");
972
+ }
973
+ } catch (error) {
974
+ if (error.response?.data?.error) {
975
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
976
+ } else {
977
+ throw new FirecrawlError(error.message, 500);
978
+ }
979
+ }
980
+ return { success: false, error: "Internal server error." };
981
+ }
982
+ /**
983
+ * Checks the status of a LLMs.txt generation operation.
984
+ * @param id - The ID of the LLMs.txt generation operation.
985
+ * @returns The current status and results of the generation operation.
986
+ */
987
+ async checkGenerateLLMsTextStatus(id) {
988
+ const headers = this.prepareHeaders();
989
+ try {
990
+ const response = await this.getRequest(
991
+ `${this.apiUrl}/v1/llmstxt/${id}`,
992
+ headers
993
+ );
994
+ if (response.status === 200) {
995
+ return response.data;
996
+ } else if (response.status === 404) {
997
+ throw new FirecrawlError("LLMs.txt generation job not found", 404);
998
+ } else {
999
+ this.handleError(response, "check LLMs.txt generation status");
1000
+ }
1001
+ } catch (error) {
1002
+ if (error.response?.data?.error) {
1003
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
1004
+ } else {
1005
+ throw new FirecrawlError(error.message, 500);
1006
+ }
1007
+ }
1008
+ return { success: false, error: "Internal server error." };
1009
+ }
650
1010
  };
651
1011
  var CrawlWatcher = class extends TypedEventTarget {
652
1012
  ws;
@@ -656,7 +1016,8 @@ var CrawlWatcher = class extends TypedEventTarget {
656
1016
  constructor(id, app) {
657
1017
  super();
658
1018
  this.id = id;
659
- this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
1019
+ const wsUrl = app.apiUrl.replace(/^http/, "ws");
1020
+ this.ws = new WebSocket(`${wsUrl}/v1/crawl/${id}`, app.apiKey);
660
1021
  this.status = "scraping";
661
1022
  this.data = [];
662
1023
  const messageHandler = (msg) => {
package/dump.rdb ADDED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "1.18.1",
3
+ "version": "1.18.2",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
package/src/index.ts CHANGED
@@ -413,6 +413,48 @@ export interface DeepResearchStatusResponse {
413
413
  summaries: string[];
414
414
  }
415
415
 
416
+ /**
417
+ * Parameters for LLMs.txt generation operations.
418
+ */
419
+ export interface GenerateLLMsTextParams {
420
+ /**
421
+ * Maximum number of URLs to process (1-100)
422
+ * @default 10
423
+ */
424
+ maxUrls?: number;
425
+ /**
426
+ * Whether to show the full LLMs-full.txt in the response
427
+ * @default false
428
+ */
429
+ showFullText?: boolean;
430
+ /**
431
+ * Experimental flag for streaming
432
+ */
433
+ __experimental_stream?: boolean;
434
+ }
435
+
436
+ /**
437
+ * Response interface for LLMs.txt generation operations.
438
+ */
439
+ export interface GenerateLLMsTextResponse {
440
+ success: boolean;
441
+ id: string;
442
+ }
443
+
444
+ /**
445
+ * Status response interface for LLMs.txt generation operations.
446
+ */
447
+ export interface GenerateLLMsTextStatusResponse {
448
+ success: boolean;
449
+ data: {
450
+ llmstxt: string;
451
+ llmsfulltxt?: string;
452
+ };
453
+ status: "processing" | "completed" | "failed";
454
+ error?: string;
455
+ expiresAt: string;
456
+ }
457
+
416
458
  /**
417
459
  * Main class for interacting with the Firecrawl API.
418
460
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -1459,6 +1501,118 @@ export default class FirecrawlApp {
1459
1501
  }
1460
1502
  return { success: false, error: "Internal server error." };
1461
1503
  }
1504
+
1505
+ /**
1506
+ * Generates LLMs.txt for a given URL and polls until completion.
1507
+ * @param url - The URL to generate LLMs.txt from.
1508
+ * @param params - Parameters for the LLMs.txt generation operation.
1509
+ * @returns The final generation results.
1510
+ */
1511
+ async generateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
1512
+ try {
1513
+ const response = await this.asyncGenerateLLMsText(url, params);
1514
+
1515
+ if (!response.success || 'error' in response) {
1516
+ return { success: false, error: 'error' in response ? response.error : 'Unknown error' };
1517
+ }
1518
+
1519
+ if (!response.id) {
1520
+ throw new FirecrawlError(`Failed to start LLMs.txt generation. No job ID returned.`, 500);
1521
+ }
1522
+
1523
+ const jobId = response.id;
1524
+ let generationStatus;
1525
+
1526
+ while (true) {
1527
+ generationStatus = await this.checkGenerateLLMsTextStatus(jobId);
1528
+
1529
+ if ('error' in generationStatus && !generationStatus.success) {
1530
+ return generationStatus;
1531
+ }
1532
+
1533
+ if (generationStatus.status === "completed") {
1534
+ return generationStatus;
1535
+ }
1536
+
1537
+ if (generationStatus.status === "failed") {
1538
+ throw new FirecrawlError(
1539
+ `LLMs.txt generation job ${generationStatus.status}. Error: ${generationStatus.error}`,
1540
+ 500
1541
+ );
1542
+ }
1543
+
1544
+ if (generationStatus.status !== "processing") {
1545
+ break;
1546
+ }
1547
+
1548
+ await new Promise(resolve => setTimeout(resolve, 2000));
1549
+ }
1550
+
1551
+ return { success: false, error: "LLMs.txt generation job terminated unexpectedly" };
1552
+ } catch (error: any) {
1553
+ throw new FirecrawlError(error.message, 500, error.response?.data?.details);
1554
+ }
1555
+ }
1556
+
1557
+ /**
1558
+ * Initiates a LLMs.txt generation operation without polling.
1559
+ * @param url - The URL to generate LLMs.txt from.
1560
+ * @param params - Parameters for the LLMs.txt generation operation.
1561
+ * @returns The response containing the generation job ID.
1562
+ */
1563
+ async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
1564
+ const headers = this.prepareHeaders();
1565
+ try {
1566
+ const response: AxiosResponse = await this.postRequest(
1567
+ `${this.apiUrl}/v1/llmstxt`,
1568
+ { url, ...params },
1569
+ headers
1570
+ );
1571
+
1572
+ if (response.status === 200) {
1573
+ return response.data;
1574
+ } else {
1575
+ this.handleError(response, "start LLMs.txt generation");
1576
+ }
1577
+ } catch (error: any) {
1578
+ if (error.response?.data?.error) {
1579
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
1580
+ } else {
1581
+ throw new FirecrawlError(error.message, 500);
1582
+ }
1583
+ }
1584
+ return { success: false, error: "Internal server error." };
1585
+ }
1586
+
1587
+ /**
1588
+ * Checks the status of a LLMs.txt generation operation.
1589
+ * @param id - The ID of the LLMs.txt generation operation.
1590
+ * @returns The current status and results of the generation operation.
1591
+ */
1592
+ async checkGenerateLLMsTextStatus(id: string): Promise<GenerateLLMsTextStatusResponse | ErrorResponse> {
1593
+ const headers = this.prepareHeaders();
1594
+ try {
1595
+ const response: AxiosResponse = await this.getRequest(
1596
+ `${this.apiUrl}/v1/llmstxt/${id}`,
1597
+ headers
1598
+ );
1599
+
1600
+ if (response.status === 200) {
1601
+ return response.data;
1602
+ } else if (response.status === 404) {
1603
+ throw new FirecrawlError("LLMs.txt generation job not found", 404);
1604
+ } else {
1605
+ this.handleError(response, "check LLMs.txt generation status");
1606
+ }
1607
+ } catch (error: any) {
1608
+ if (error.response?.data?.error) {
1609
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
1610
+ } else {
1611
+ throw new FirecrawlError(error.message, 500);
1612
+ }
1613
+ }
1614
+ return { success: false, error: "Internal server error." };
1615
+ }
1462
1616
  }
1463
1617
 
1464
1618
  interface CrawlWatcherEvents {