firecrawl 1.10.0 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -49,20 +49,16 @@ var FirecrawlError = class extends Error {
49
49
  var FirecrawlApp = class {
50
50
  apiKey;
51
51
  apiUrl;
52
- isCloudService(url) {
53
- return url.includes("api.firecrawl.dev");
54
- }
55
52
  /**
56
53
  * Initializes a new instance of the FirecrawlApp class.
57
54
  * @param config - Configuration options for the FirecrawlApp instance.
58
55
  */
59
56
  constructor({ apiKey = null, apiUrl = null }) {
60
- const baseUrl = apiUrl || "https://api.firecrawl.dev";
61
- if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
57
+ if (typeof apiKey !== "string") {
62
58
  throw new FirecrawlError("No API key provided", 401);
63
59
  }
64
- this.apiKey = apiKey || "";
65
- this.apiUrl = baseUrl;
60
+ this.apiKey = apiKey;
61
+ this.apiUrl = apiUrl || "https://api.firecrawl.dev";
66
62
  }
67
63
  /**
68
64
  * Scrapes a URL using the Firecrawl API.
@@ -202,7 +198,7 @@ var FirecrawlApp = class {
202
198
  let statusData = response.data;
203
199
  if ("data" in statusData) {
204
200
  let data = statusData.data;
205
- while ("next" in statusData) {
201
+ while (typeof statusData === "object" && "next" in statusData) {
206
202
  statusData = (await this.getRequest(statusData.next, headers)).data;
207
203
  data = data.concat(statusData.data);
208
204
  }
@@ -299,9 +295,9 @@ var FirecrawlApp = class {
299
295
  * @param webhook - Optional webhook for the batch scrape.
300
296
  * @returns The response from the crawl operation.
301
297
  */
302
- async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
298
+ async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
303
299
  const headers = this.prepareHeaders(idempotencyKey);
304
- let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
300
+ let jsonData = { urls, ...params };
305
301
  if (jsonData?.extract?.schema) {
306
302
  let schema = jsonData.extract.schema;
307
303
  try {
@@ -337,9 +333,9 @@ var FirecrawlApp = class {
337
333
  }
338
334
  return { success: false, error: "Internal server error." };
339
335
  }
340
- async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
336
+ async asyncBatchScrapeUrls(urls, params, idempotencyKey) {
341
337
  const headers = this.prepareHeaders(idempotencyKey);
342
- let jsonData = { urls, webhook, ignoreInvalidURLs, ...params ?? {} };
338
+ let jsonData = { urls, ...params ?? {} };
343
339
  try {
344
340
  const response = await this.postRequest(
345
341
  this.apiUrl + `/v1/batch/scrape`,
@@ -367,8 +363,8 @@ var FirecrawlApp = class {
367
363
  * @param idempotencyKey - Optional idempotency key for the request.
368
364
  * @returns A CrawlWatcher instance to monitor the crawl job.
369
365
  */
370
- async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
371
- const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
366
+ async batchScrapeUrlsAndWatch(urls, params, idempotencyKey) {
367
+ const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
372
368
  if (crawl.success && crawl.id) {
373
369
  const id = crawl.id;
374
370
  return new CrawlWatcher(id, this);
@@ -397,7 +393,7 @@ var FirecrawlApp = class {
397
393
  let statusData = response.data;
398
394
  if ("data" in statusData) {
399
395
  let data = statusData.data;
400
- while ("next" in statusData) {
396
+ while (typeof statusData === "object" && "next" in statusData) {
401
397
  statusData = (await this.getRequest(statusData.next, headers)).data;
402
398
  data = data.concat(statusData.data);
403
399
  }
@@ -533,40 +529,44 @@ var FirecrawlApp = class {
533
529
  * @returns The final job status or data.
534
530
  */
535
531
  async monitorJobStatus(id, headers, checkInterval) {
536
- while (true) {
537
- let statusResponse = await this.getRequest(
538
- `${this.apiUrl}/v1/crawl/${id}`,
539
- headers
540
- );
541
- if (statusResponse.status === 200) {
542
- let statusData = statusResponse.data;
543
- if (statusData.status === "completed") {
544
- if ("data" in statusData) {
545
- let data = statusData.data;
546
- while ("next" in statusData) {
547
- statusResponse = await this.getRequest(statusData.next, headers);
548
- statusData = statusResponse.data;
549
- data = data.concat(statusData.data);
532
+ try {
533
+ while (true) {
534
+ let statusResponse = await this.getRequest(
535
+ `${this.apiUrl}/v1/crawl/${id}`,
536
+ headers
537
+ );
538
+ if (statusResponse.status === 200) {
539
+ let statusData = statusResponse.data;
540
+ if (statusData.status === "completed") {
541
+ if ("data" in statusData) {
542
+ let data = statusData.data;
543
+ while (typeof statusData === "object" && "next" in statusData) {
544
+ statusResponse = await this.getRequest(statusData.next, headers);
545
+ statusData = statusResponse.data;
546
+ data = data.concat(statusData.data);
547
+ }
548
+ statusData.data = data;
549
+ return statusData;
550
+ } else {
551
+ throw new FirecrawlError("Crawl job completed but no data was returned", 500);
550
552
  }
551
- statusData.data = data;
552
- return statusData;
553
+ } else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
554
+ checkInterval = Math.max(checkInterval, 2);
555
+ await new Promise(
556
+ (resolve) => setTimeout(resolve, checkInterval * 1e3)
557
+ );
553
558
  } else {
554
- throw new FirecrawlError("Crawl job completed but no data was returned", 500);
559
+ throw new FirecrawlError(
560
+ `Crawl job failed or was stopped. Status: ${statusData.status}`,
561
+ 500
562
+ );
555
563
  }
556
- } else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
557
- checkInterval = Math.max(checkInterval, 2);
558
- await new Promise(
559
- (resolve) => setTimeout(resolve, checkInterval * 1e3)
560
- );
561
564
  } else {
562
- throw new FirecrawlError(
563
- `Crawl job failed or was stopped. Status: ${statusData.status}`,
564
- 500
565
- );
565
+ this.handleError(statusResponse, "check crawl status");
566
566
  }
567
- } else {
568
- this.handleError(statusResponse, "check crawl status");
569
567
  }
568
+ } catch (error) {
569
+ throw new FirecrawlError(error, 500);
570
570
  }
571
571
  }
572
572
  /**
@@ -593,10 +593,8 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
593
593
  ws;
594
594
  data;
595
595
  status;
596
- id;
597
596
  constructor(id, app) {
598
597
  super();
599
- this.id = id;
600
598
  this.ws = new import_isows.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
601
599
  this.status = "scraping";
602
600
  this.data = [];
@@ -606,8 +604,7 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
606
604
  this.dispatchTypedEvent("done", new CustomEvent("done", {
607
605
  detail: {
608
606
  status: this.status,
609
- data: this.data,
610
- id: this.id
607
+ data: this.data
611
608
  }
612
609
  }));
613
610
  } else if (msg.type === "error") {
@@ -616,8 +613,7 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
616
613
  detail: {
617
614
  status: this.status,
618
615
  data: this.data,
619
- error: msg.error,
620
- id: this.id
616
+ error: msg.error
621
617
  }
622
618
  }));
623
619
  } else if (msg.type === "catchup") {
@@ -625,18 +621,12 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
625
621
  this.data.push(...msg.data.data ?? []);
626
622
  for (const doc of this.data) {
627
623
  this.dispatchTypedEvent("document", new CustomEvent("document", {
628
- detail: {
629
- ...doc,
630
- id: this.id
631
- }
624
+ detail: doc
632
625
  }));
633
626
  }
634
627
  } else if (msg.type === "document") {
635
628
  this.dispatchTypedEvent("document", new CustomEvent("document", {
636
- detail: {
637
- ...msg.data,
638
- id: this.id
639
- }
629
+ detail: msg.data
640
630
  }));
641
631
  }
642
632
  };
@@ -645,20 +635,12 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
645
635
  this.ws.close();
646
636
  return;
647
637
  }
648
- try {
649
- const msg = JSON.parse(ev.data);
650
- messageHandler(msg);
651
- } catch (error) {
652
- console.error("Error on message", error);
653
- }
638
+ const msg = JSON.parse(ev.data);
639
+ messageHandler(msg);
654
640
  }).bind(this);
655
641
  this.ws.onclose = ((ev) => {
656
- try {
657
- const msg = JSON.parse(ev.reason);
658
- messageHandler(msg);
659
- } catch (error) {
660
- console.error("Error on close", error);
661
- }
642
+ const msg = JSON.parse(ev.reason);
643
+ messageHandler(msg);
662
644
  }).bind(this);
663
645
  this.ws.onerror = ((_) => {
664
646
  this.status = "failed";
@@ -666,8 +648,7 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
666
648
  detail: {
667
649
  status: this.status,
668
650
  data: this.data,
669
- error: "WebSocket error",
670
- id: this.id
651
+ error: "WebSocket error"
671
652
  }
672
653
  }));
673
654
  }).bind(this);
package/dist/index.d.cts CHANGED
@@ -171,7 +171,6 @@ interface BatchScrapeResponse {
171
171
  url?: string;
172
172
  success: true;
173
173
  error?: string;
174
- invalidURLs?: string[];
175
174
  }
176
175
  /**
177
176
  * Response interface for job status checks.
@@ -226,11 +225,10 @@ interface MapResponse {
226
225
  * Defines options for extracting information from URLs.
227
226
  */
228
227
  interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
229
- prompt?: string;
228
+ prompt: string;
230
229
  schema?: LLMSchema;
231
230
  systemPrompt?: string;
232
231
  allowExternalLinks?: boolean;
233
- includeSubdomains?: boolean;
234
232
  }
235
233
  /**
236
234
  * Response interface for extracting information from URLs.
@@ -265,7 +263,6 @@ declare class FirecrawlError extends Error {
265
263
  declare class FirecrawlApp {
266
264
  apiKey: string;
267
265
  apiUrl: string;
268
- private isCloudService;
269
266
  /**
270
267
  * Initializes a new instance of the FirecrawlApp class.
271
268
  * @param config - Configuration options for the FirecrawlApp instance.
@@ -332,8 +329,8 @@ declare class FirecrawlApp {
332
329
  * @param webhook - Optional webhook for the batch scrape.
333
330
  * @returns The response from the crawl operation.
334
331
  */
335
- batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
336
- asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
332
+ batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"]): Promise<BatchScrapeStatusResponse | ErrorResponse>;
333
+ asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
337
334
  /**
338
335
  * Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
339
336
  * @param urls - The URL to scrape.
@@ -341,7 +338,7 @@ declare class FirecrawlApp {
341
338
  * @param idempotencyKey - Optional idempotency key for the request.
342
339
  * @returns A CrawlWatcher instance to monitor the crawl job.
343
340
  */
344
- batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<CrawlWatcher>;
341
+ batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<CrawlWatcher>;
345
342
  /**
346
343
  * Checks the status of a batch scrape job using the Firecrawl API.
347
344
  * @param id - The ID of the batch scrape operation.
@@ -417,7 +414,6 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
417
414
  private ws;
418
415
  data: FirecrawlDocument<undefined>[];
419
416
  status: CrawlStatusResponse["status"];
420
- id: string;
421
417
  constructor(id: string, app: FirecrawlApp);
422
418
  close(): void;
423
419
  }
package/dist/index.d.ts CHANGED
@@ -171,7 +171,6 @@ interface BatchScrapeResponse {
171
171
  url?: string;
172
172
  success: true;
173
173
  error?: string;
174
- invalidURLs?: string[];
175
174
  }
176
175
  /**
177
176
  * Response interface for job status checks.
@@ -226,11 +225,10 @@ interface MapResponse {
226
225
  * Defines options for extracting information from URLs.
227
226
  */
228
227
  interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
229
- prompt?: string;
228
+ prompt: string;
230
229
  schema?: LLMSchema;
231
230
  systemPrompt?: string;
232
231
  allowExternalLinks?: boolean;
233
- includeSubdomains?: boolean;
234
232
  }
235
233
  /**
236
234
  * Response interface for extracting information from URLs.
@@ -265,7 +263,6 @@ declare class FirecrawlError extends Error {
265
263
  declare class FirecrawlApp {
266
264
  apiKey: string;
267
265
  apiUrl: string;
268
- private isCloudService;
269
266
  /**
270
267
  * Initializes a new instance of the FirecrawlApp class.
271
268
  * @param config - Configuration options for the FirecrawlApp instance.
@@ -332,8 +329,8 @@ declare class FirecrawlApp {
332
329
  * @param webhook - Optional webhook for the batch scrape.
333
330
  * @returns The response from the crawl operation.
334
331
  */
335
- batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
336
- asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
332
+ batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"]): Promise<BatchScrapeStatusResponse | ErrorResponse>;
333
+ asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
337
334
  /**
338
335
  * Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
339
336
  * @param urls - The URL to scrape.
@@ -341,7 +338,7 @@ declare class FirecrawlApp {
341
338
  * @param idempotencyKey - Optional idempotency key for the request.
342
339
  * @returns A CrawlWatcher instance to monitor the crawl job.
343
340
  */
344
- batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<CrawlWatcher>;
341
+ batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<CrawlWatcher>;
345
342
  /**
346
343
  * Checks the status of a batch scrape job using the Firecrawl API.
347
344
  * @param id - The ID of the batch scrape operation.
@@ -417,7 +414,6 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
417
414
  private ws;
418
415
  data: FirecrawlDocument<undefined>[];
419
416
  status: CrawlStatusResponse["status"];
420
- id: string;
421
417
  constructor(id: string, app: FirecrawlApp);
422
418
  close(): void;
423
419
  }
package/dist/index.js CHANGED
@@ -13,20 +13,16 @@ var FirecrawlError = class extends Error {
13
13
  var FirecrawlApp = class {
14
14
  apiKey;
15
15
  apiUrl;
16
- isCloudService(url) {
17
- return url.includes("api.firecrawl.dev");
18
- }
19
16
  /**
20
17
  * Initializes a new instance of the FirecrawlApp class.
21
18
  * @param config - Configuration options for the FirecrawlApp instance.
22
19
  */
23
20
  constructor({ apiKey = null, apiUrl = null }) {
24
- const baseUrl = apiUrl || "https://api.firecrawl.dev";
25
- if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
21
+ if (typeof apiKey !== "string") {
26
22
  throw new FirecrawlError("No API key provided", 401);
27
23
  }
28
- this.apiKey = apiKey || "";
29
- this.apiUrl = baseUrl;
24
+ this.apiKey = apiKey;
25
+ this.apiUrl = apiUrl || "https://api.firecrawl.dev";
30
26
  }
31
27
  /**
32
28
  * Scrapes a URL using the Firecrawl API.
@@ -166,7 +162,7 @@ var FirecrawlApp = class {
166
162
  let statusData = response.data;
167
163
  if ("data" in statusData) {
168
164
  let data = statusData.data;
169
- while ("next" in statusData) {
165
+ while (typeof statusData === "object" && "next" in statusData) {
170
166
  statusData = (await this.getRequest(statusData.next, headers)).data;
171
167
  data = data.concat(statusData.data);
172
168
  }
@@ -263,9 +259,9 @@ var FirecrawlApp = class {
263
259
  * @param webhook - Optional webhook for the batch scrape.
264
260
  * @returns The response from the crawl operation.
265
261
  */
266
- async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
262
+ async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
267
263
  const headers = this.prepareHeaders(idempotencyKey);
268
- let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
264
+ let jsonData = { urls, ...params };
269
265
  if (jsonData?.extract?.schema) {
270
266
  let schema = jsonData.extract.schema;
271
267
  try {
@@ -301,9 +297,9 @@ var FirecrawlApp = class {
301
297
  }
302
298
  return { success: false, error: "Internal server error." };
303
299
  }
304
- async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
300
+ async asyncBatchScrapeUrls(urls, params, idempotencyKey) {
305
301
  const headers = this.prepareHeaders(idempotencyKey);
306
- let jsonData = { urls, webhook, ignoreInvalidURLs, ...params ?? {} };
302
+ let jsonData = { urls, ...params ?? {} };
307
303
  try {
308
304
  const response = await this.postRequest(
309
305
  this.apiUrl + `/v1/batch/scrape`,
@@ -331,8 +327,8 @@ var FirecrawlApp = class {
331
327
  * @param idempotencyKey - Optional idempotency key for the request.
332
328
  * @returns A CrawlWatcher instance to monitor the crawl job.
333
329
  */
334
- async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
335
- const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
330
+ async batchScrapeUrlsAndWatch(urls, params, idempotencyKey) {
331
+ const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
336
332
  if (crawl.success && crawl.id) {
337
333
  const id = crawl.id;
338
334
  return new CrawlWatcher(id, this);
@@ -361,7 +357,7 @@ var FirecrawlApp = class {
361
357
  let statusData = response.data;
362
358
  if ("data" in statusData) {
363
359
  let data = statusData.data;
364
- while ("next" in statusData) {
360
+ while (typeof statusData === "object" && "next" in statusData) {
365
361
  statusData = (await this.getRequest(statusData.next, headers)).data;
366
362
  data = data.concat(statusData.data);
367
363
  }
@@ -497,40 +493,44 @@ var FirecrawlApp = class {
497
493
  * @returns The final job status or data.
498
494
  */
499
495
  async monitorJobStatus(id, headers, checkInterval) {
500
- while (true) {
501
- let statusResponse = await this.getRequest(
502
- `${this.apiUrl}/v1/crawl/${id}`,
503
- headers
504
- );
505
- if (statusResponse.status === 200) {
506
- let statusData = statusResponse.data;
507
- if (statusData.status === "completed") {
508
- if ("data" in statusData) {
509
- let data = statusData.data;
510
- while ("next" in statusData) {
511
- statusResponse = await this.getRequest(statusData.next, headers);
512
- statusData = statusResponse.data;
513
- data = data.concat(statusData.data);
496
+ try {
497
+ while (true) {
498
+ let statusResponse = await this.getRequest(
499
+ `${this.apiUrl}/v1/crawl/${id}`,
500
+ headers
501
+ );
502
+ if (statusResponse.status === 200) {
503
+ let statusData = statusResponse.data;
504
+ if (statusData.status === "completed") {
505
+ if ("data" in statusData) {
506
+ let data = statusData.data;
507
+ while (typeof statusData === "object" && "next" in statusData) {
508
+ statusResponse = await this.getRequest(statusData.next, headers);
509
+ statusData = statusResponse.data;
510
+ data = data.concat(statusData.data);
511
+ }
512
+ statusData.data = data;
513
+ return statusData;
514
+ } else {
515
+ throw new FirecrawlError("Crawl job completed but no data was returned", 500);
514
516
  }
515
- statusData.data = data;
516
- return statusData;
517
+ } else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
518
+ checkInterval = Math.max(checkInterval, 2);
519
+ await new Promise(
520
+ (resolve) => setTimeout(resolve, checkInterval * 1e3)
521
+ );
517
522
  } else {
518
- throw new FirecrawlError("Crawl job completed but no data was returned", 500);
523
+ throw new FirecrawlError(
524
+ `Crawl job failed or was stopped. Status: ${statusData.status}`,
525
+ 500
526
+ );
519
527
  }
520
- } else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
521
- checkInterval = Math.max(checkInterval, 2);
522
- await new Promise(
523
- (resolve) => setTimeout(resolve, checkInterval * 1e3)
524
- );
525
528
  } else {
526
- throw new FirecrawlError(
527
- `Crawl job failed or was stopped. Status: ${statusData.status}`,
528
- 500
529
- );
529
+ this.handleError(statusResponse, "check crawl status");
530
530
  }
531
- } else {
532
- this.handleError(statusResponse, "check crawl status");
533
531
  }
532
+ } catch (error) {
533
+ throw new FirecrawlError(error, 500);
534
534
  }
535
535
  }
536
536
  /**
@@ -557,10 +557,8 @@ var CrawlWatcher = class extends TypedEventTarget {
557
557
  ws;
558
558
  data;
559
559
  status;
560
- id;
561
560
  constructor(id, app) {
562
561
  super();
563
- this.id = id;
564
562
  this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
565
563
  this.status = "scraping";
566
564
  this.data = [];
@@ -570,8 +568,7 @@ var CrawlWatcher = class extends TypedEventTarget {
570
568
  this.dispatchTypedEvent("done", new CustomEvent("done", {
571
569
  detail: {
572
570
  status: this.status,
573
- data: this.data,
574
- id: this.id
571
+ data: this.data
575
572
  }
576
573
  }));
577
574
  } else if (msg.type === "error") {
@@ -580,8 +577,7 @@ var CrawlWatcher = class extends TypedEventTarget {
580
577
  detail: {
581
578
  status: this.status,
582
579
  data: this.data,
583
- error: msg.error,
584
- id: this.id
580
+ error: msg.error
585
581
  }
586
582
  }));
587
583
  } else if (msg.type === "catchup") {
@@ -589,18 +585,12 @@ var CrawlWatcher = class extends TypedEventTarget {
589
585
  this.data.push(...msg.data.data ?? []);
590
586
  for (const doc of this.data) {
591
587
  this.dispatchTypedEvent("document", new CustomEvent("document", {
592
- detail: {
593
- ...doc,
594
- id: this.id
595
- }
588
+ detail: doc
596
589
  }));
597
590
  }
598
591
  } else if (msg.type === "document") {
599
592
  this.dispatchTypedEvent("document", new CustomEvent("document", {
600
- detail: {
601
- ...msg.data,
602
- id: this.id
603
- }
593
+ detail: msg.data
604
594
  }));
605
595
  }
606
596
  };
@@ -609,20 +599,12 @@ var CrawlWatcher = class extends TypedEventTarget {
609
599
  this.ws.close();
610
600
  return;
611
601
  }
612
- try {
613
- const msg = JSON.parse(ev.data);
614
- messageHandler(msg);
615
- } catch (error) {
616
- console.error("Error on message", error);
617
- }
602
+ const msg = JSON.parse(ev.data);
603
+ messageHandler(msg);
618
604
  }).bind(this);
619
605
  this.ws.onclose = ((ev) => {
620
- try {
621
- const msg = JSON.parse(ev.reason);
622
- messageHandler(msg);
623
- } catch (error) {
624
- console.error("Error on close", error);
625
- }
606
+ const msg = JSON.parse(ev.reason);
607
+ messageHandler(msg);
626
608
  }).bind(this);
627
609
  this.ws.onerror = ((_) => {
628
610
  this.status = "failed";
@@ -630,8 +612,7 @@ var CrawlWatcher = class extends TypedEventTarget {
630
612
  detail: {
631
613
  status: this.status,
632
614
  data: this.data,
633
- error: "WebSocket error",
634
- id: this.id
615
+ error: "WebSocket error"
635
616
  }
636
617
  }));
637
618
  }).bind(this);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "1.10.0",
3
+ "version": "1.10.1",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -1,9 +1,9 @@
1
- import { describe, expect, jest, test } from '@jest/globals';
2
-
3
- import FirecrawlApp from '../index';
1
+ import { describe, test, expect, jest } from '@jest/globals';
4
2
  import axios from 'axios';
5
- import { join } from 'path';
3
+ import FirecrawlApp from '../index';
4
+
6
5
  import { readFile } from 'fs/promises';
6
+ import { join } from 'path';
7
7
 
8
8
  // Mock jest and set the type
9
9
  jest.mock('axios');
@@ -14,22 +14,13 @@ async function loadFixture(name: string): Promise<string> {
14
14
  return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8')
15
15
  }
16
16
 
17
- const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
18
-
19
17
  describe('the firecrawl JS SDK', () => {
20
18
 
21
- test('Should require an API key only for cloud service', async () => {
22
- if (API_URL.includes('api.firecrawl.dev')) {
23
- // Should throw for cloud service
24
- expect(() => {
25
- new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
26
- }).toThrow('No API key provided');
27
- } else {
28
- // Should not throw for self-hosted
29
- expect(() => {
30
- new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
31
- }).not.toThrow();
32
- }
19
+ test('Should require an API key to instantiate FirecrawlApp', async () => {
20
+ const fn = () => {
21
+ new FirecrawlApp({ apiKey: undefined });
22
+ };
23
+ expect(fn).toThrow('No API key provided');
33
24
  });
34
25
 
35
26
  test('Should return scraped data from a /scrape API call', async () => {
@@ -9,28 +9,15 @@ const TEST_API_KEY = process.env.TEST_API_KEY;
9
9
  const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
10
10
 
11
11
  describe('FirecrawlApp E2E Tests', () => {
12
- test.concurrent('should throw error for no API key only for cloud service', async () => {
13
- if (API_URL.includes('api.firecrawl.dev')) {
14
- // Should throw for cloud service
15
- expect(() => {
16
- new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
17
- }).toThrow("No API key provided");
18
- } else {
19
- // Should not throw for self-hosted
20
- expect(() => {
21
- new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
22
- }).not.toThrow();
23
- }
12
+ test.concurrent('should throw error for no API key', async () => {
13
+ expect(() => {
14
+ new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
15
+ }).toThrow("No API key provided");
24
16
  });
25
17
 
26
18
  test.concurrent('should throw error for invalid API key on scrape', async () => {
27
- if (API_URL.includes('api.firecrawl.dev')) {
28
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
29
- await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 404");
30
- } else {
31
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
32
- await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
33
- }
19
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
20
+ await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
34
21
  });
35
22
 
36
23
  test.concurrent('should throw error for blocklisted URL on scrape', async () => {
@@ -168,13 +155,14 @@ describe('FirecrawlApp E2E Tests', () => {
168
155
  }, 30000); // 30 seconds timeout
169
156
 
170
157
  test.concurrent('should throw error for invalid API key on crawl', async () => {
171
- if (API_URL.includes('api.firecrawl.dev')) {
172
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
173
- await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
174
- } else {
175
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
176
- await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
177
- }
158
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
159
+ await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
160
+ });
161
+
162
+ test.concurrent('should throw error for blocklisted URL on crawl', async () => {
163
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
164
+ const blocklistedUrl = "https://twitter.com/fake-test";
165
+ await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.");
178
166
  });
179
167
 
180
168
  test.concurrent('should return successful response for crawl and wait for completion', async () => {
@@ -349,13 +337,8 @@ describe('FirecrawlApp E2E Tests', () => {
349
337
  }, 60000); // 60 seconds timeout
350
338
 
351
339
  test.concurrent('should throw error for invalid API key on map', async () => {
352
- if (API_URL.includes('api.firecrawl.dev')) {
353
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
354
- await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
355
- } else {
356
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
357
- await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
358
- }
340
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
341
+ await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
359
342
  });
360
343
 
361
344
  test.concurrent('should throw error for blocklisted URL on map', async () => {
@@ -372,7 +355,8 @@ describe('FirecrawlApp E2E Tests', () => {
372
355
  }, 30000); // 30 seconds timeout
373
356
 
374
357
  test.concurrent('should return successful response for valid map', async () => {
375
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
358
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
359
+ const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
376
360
  expect(response).not.toBeNull();
377
361
 
378
362
  expect(response.links?.length).toBeGreaterThan(0);
package/src/index.ts CHANGED
@@ -183,7 +183,6 @@ export interface BatchScrapeResponse {
183
183
  url?: string;
184
184
  success: true;
185
185
  error?: string;
186
- invalidURLs?: string[];
187
186
  }
188
187
 
189
188
  /**
@@ -243,11 +242,10 @@ export interface MapResponse {
243
242
  * Defines options for extracting information from URLs.
244
243
  */
245
244
  export interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
246
- prompt?: string;
245
+ prompt: string;
247
246
  schema?: LLMSchema;
248
247
  systemPrompt?: string;
249
248
  allowExternalLinks?: boolean;
250
- includeSubdomains?: boolean;
251
249
  }
252
250
 
253
251
  /**
@@ -290,23 +288,17 @@ export default class FirecrawlApp {
290
288
  public apiKey: string;
291
289
  public apiUrl: string;
292
290
 
293
- private isCloudService(url: string): boolean {
294
- return url.includes('api.firecrawl.dev');
295
- }
296
-
297
291
  /**
298
292
  * Initializes a new instance of the FirecrawlApp class.
299
293
  * @param config - Configuration options for the FirecrawlApp instance.
300
294
  */
301
295
  constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
302
- const baseUrl = apiUrl || "https://api.firecrawl.dev";
303
-
304
- if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
296
+ if (typeof apiKey !== "string") {
305
297
  throw new FirecrawlError("No API key provided", 401);
306
298
  }
307
299
 
308
- this.apiKey = apiKey || '';
309
- this.apiUrl = baseUrl;
300
+ this.apiKey = apiKey;
301
+ this.apiUrl = apiUrl || "https://api.firecrawl.dev";
310
302
  }
311
303
 
312
304
  /**
@@ -470,7 +462,7 @@ export default class FirecrawlApp {
470
462
  let statusData = response.data
471
463
  if ("data" in statusData) {
472
464
  let data = statusData.data;
473
- while ('next' in statusData) {
465
+ while (typeof statusData === 'object' && 'next' in statusData) {
474
466
  statusData = (await this.getRequest(statusData.next, headers)).data;
475
467
  data = data.concat(statusData.data);
476
468
  }
@@ -584,10 +576,9 @@ export default class FirecrawlApp {
584
576
  pollInterval: number = 2,
585
577
  idempotencyKey?: string,
586
578
  webhook?: CrawlParams["webhook"],
587
- ignoreInvalidURLs?: boolean,
588
579
  ): Promise<BatchScrapeStatusResponse | ErrorResponse> {
589
580
  const headers = this.prepareHeaders(idempotencyKey);
590
- let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params };
581
+ let jsonData: any = { urls, ...params };
591
582
  if (jsonData?.extract?.schema) {
592
583
  let schema = jsonData.extract.schema;
593
584
 
@@ -630,12 +621,10 @@ export default class FirecrawlApp {
630
621
  async asyncBatchScrapeUrls(
631
622
  urls: string[],
632
623
  params?: ScrapeParams,
633
- idempotencyKey?: string,
634
- webhook?: CrawlParams["webhook"],
635
- ignoreInvalidURLs?: boolean,
624
+ idempotencyKey?: string
636
625
  ): Promise<BatchScrapeResponse | ErrorResponse> {
637
626
  const headers = this.prepareHeaders(idempotencyKey);
638
- let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...(params ?? {}) };
627
+ let jsonData: any = { urls, ...(params ?? {}) };
639
628
  try {
640
629
  const response: AxiosResponse = await this.postRequest(
641
630
  this.apiUrl + `/v1/batch/scrape`,
@@ -668,10 +657,8 @@ export default class FirecrawlApp {
668
657
  urls: string[],
669
658
  params?: ScrapeParams,
670
659
  idempotencyKey?: string,
671
- webhook?: CrawlParams["webhook"],
672
- ignoreInvalidURLs?: boolean,
673
660
  ) {
674
- const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
661
+ const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
675
662
 
676
663
  if (crawl.success && crawl.id) {
677
664
  const id = crawl.id;
@@ -704,7 +691,7 @@ export default class FirecrawlApp {
704
691
  let statusData = response.data
705
692
  if ("data" in statusData) {
706
693
  let data = statusData.data;
707
- while ('next' in statusData) {
694
+ while (typeof statusData === 'object' && 'next' in statusData) {
708
695
  statusData = (await this.getRequest(statusData.next, headers)).data;
709
696
  data = data.concat(statusData.data);
710
697
  }
@@ -863,42 +850,46 @@ export default class FirecrawlApp {
863
850
  headers: AxiosRequestHeaders,
864
851
  checkInterval: number
865
852
  ): Promise<CrawlStatusResponse | ErrorResponse> {
866
- while (true) {
867
- let statusResponse: AxiosResponse = await this.getRequest(
868
- `${this.apiUrl}/v1/crawl/${id}`,
869
- headers
870
- );
871
- if (statusResponse.status === 200) {
872
- let statusData = statusResponse.data;
873
- if (statusData.status === "completed") {
874
- if ("data" in statusData) {
875
- let data = statusData.data;
876
- while ('next' in statusData) {
877
- statusResponse = await this.getRequest(statusData.next, headers);
878
- statusData = statusResponse.data;
879
- data = data.concat(statusData.data);
853
+ try {
854
+ while (true) {
855
+ let statusResponse: AxiosResponse = await this.getRequest(
856
+ `${this.apiUrl}/v1/crawl/${id}`,
857
+ headers
858
+ );
859
+ if (statusResponse.status === 200) {
860
+ let statusData = statusResponse.data;
861
+ if (statusData.status === "completed") {
862
+ if ("data" in statusData) {
863
+ let data = statusData.data;
864
+ while (typeof statusData === 'object' && 'next' in statusData) {
865
+ statusResponse = await this.getRequest(statusData.next, headers);
866
+ statusData = statusResponse.data;
867
+ data = data.concat(statusData.data);
868
+ }
869
+ statusData.data = data;
870
+ return statusData;
871
+ } else {
872
+ throw new FirecrawlError("Crawl job completed but no data was returned", 500);
880
873
  }
881
- statusData.data = data;
882
- return statusData;
883
- } else {
884
- throw new FirecrawlError("Crawl job completed but no data was returned", 500);
885
- }
886
- } else if (
887
- ["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
888
- ) {
889
- checkInterval = Math.max(checkInterval, 2);
890
- await new Promise((resolve) =>
891
- setTimeout(resolve, checkInterval * 1000)
892
- );
874
+ } else if (
875
+ ["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
876
+ ) {
877
+ checkInterval = Math.max(checkInterval, 2);
878
+ await new Promise((resolve) =>
879
+ setTimeout(resolve, checkInterval * 1000)
880
+ );
881
+ } else {
882
+ throw new FirecrawlError(
883
+ `Crawl job failed or was stopped. Status: ${statusData.status}`,
884
+ 500
885
+ );
886
+ }
893
887
  } else {
894
- throw new FirecrawlError(
895
- `Crawl job failed or was stopped. Status: ${statusData.status}`,
896
- 500
897
- );
888
+ this.handleError(statusResponse, "check crawl status");
898
889
  }
899
- } else {
900
- this.handleError(statusResponse, "check crawl status");
901
890
  }
891
+ } catch (error: any) {
892
+ throw new FirecrawlError(error, 500);
902
893
  }
903
894
  }
904
895
 
@@ -941,11 +932,9 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
941
932
  private ws: WebSocket;
942
933
  public data: FirecrawlDocument<undefined>[];
943
934
  public status: CrawlStatusResponse["status"];
944
- public id: string;
945
935
 
946
936
  constructor(id: string, app: FirecrawlApp) {
947
937
  super();
948
- this.id = id;
949
938
  this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
950
939
  this.status = "scraping";
951
940
  this.data = [];
@@ -976,7 +965,6 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
976
965
  detail: {
977
966
  status: this.status,
978
967
  data: this.data,
979
- id: this.id,
980
968
  },
981
969
  }));
982
970
  } else if (msg.type === "error") {
@@ -986,7 +974,6 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
986
974
  status: this.status,
987
975
  data: this.data,
988
976
  error: msg.error,
989
- id: this.id,
990
977
  },
991
978
  }));
992
979
  } else if (msg.type === "catchup") {
@@ -994,18 +981,12 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
994
981
  this.data.push(...(msg.data.data ?? []));
995
982
  for (const doc of this.data) {
996
983
  this.dispatchTypedEvent("document", new CustomEvent("document", {
997
- detail: {
998
- ...doc,
999
- id: this.id,
1000
- },
984
+ detail: doc,
1001
985
  }));
1002
986
  }
1003
987
  } else if (msg.type === "document") {
1004
988
  this.dispatchTypedEvent("document", new CustomEvent("document", {
1005
- detail: {
1006
- ...msg.data,
1007
- id: this.id,
1008
- },
989
+ detail: msg.data,
1009
990
  }));
1010
991
  }
1011
992
  }
@@ -1015,21 +996,14 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
1015
996
  this.ws.close();
1016
997
  return;
1017
998
  }
1018
- try {
1019
- const msg = JSON.parse(ev.data) as Message;
1020
- messageHandler(msg);
1021
- } catch (error) {
1022
- console.error("Error on message", error);
1023
- }
999
+
1000
+ const msg = JSON.parse(ev.data) as Message;
1001
+ messageHandler(msg);
1024
1002
  }).bind(this);
1025
1003
 
1026
1004
  this.ws.onclose = ((ev: CloseEvent) => {
1027
- try {
1028
- const msg = JSON.parse(ev.reason) as Message;
1029
- messageHandler(msg);
1030
- } catch (error) {
1031
- console.error("Error on close", error);
1032
- }
1005
+ const msg = JSON.parse(ev.reason) as Message;
1006
+ messageHandler(msg);
1033
1007
  }).bind(this);
1034
1008
 
1035
1009
  this.ws.onerror = ((_: Event) => {
@@ -1039,7 +1013,6 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
1039
1013
  status: this.status,
1040
1014
  data: this.data,
1041
1015
  error: "WebSocket error",
1042
- id: this.id,
1043
1016
  },
1044
1017
  }));
1045
1018
  }).bind(this);