firecrawl 1.9.7 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -198,7 +198,7 @@ var FirecrawlApp = class {
198
198
  let statusData = response.data;
199
199
  if ("data" in statusData) {
200
200
  let data = statusData.data;
201
- while ("next" in statusData) {
201
+ while (typeof statusData === "object" && "next" in statusData) {
202
202
  statusData = (await this.getRequest(statusData.next, headers)).data;
203
203
  data = data.concat(statusData.data);
204
204
  }
@@ -295,9 +295,9 @@ var FirecrawlApp = class {
295
295
  * @param webhook - Optional webhook for the batch scrape.
296
296
  * @returns The response from the crawl operation.
297
297
  */
298
- async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
298
+ async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
299
299
  const headers = this.prepareHeaders(idempotencyKey);
300
- let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
300
+ let jsonData = { urls, ...params };
301
301
  if (jsonData?.extract?.schema) {
302
302
  let schema = jsonData.extract.schema;
303
303
  try {
@@ -333,9 +333,9 @@ var FirecrawlApp = class {
333
333
  }
334
334
  return { success: false, error: "Internal server error." };
335
335
  }
336
- async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
336
+ async asyncBatchScrapeUrls(urls, params, idempotencyKey) {
337
337
  const headers = this.prepareHeaders(idempotencyKey);
338
- let jsonData = { urls, webhook, ignoreInvalidURLs, ...params ?? {} };
338
+ let jsonData = { urls, ...params ?? {} };
339
339
  try {
340
340
  const response = await this.postRequest(
341
341
  this.apiUrl + `/v1/batch/scrape`,
@@ -363,8 +363,8 @@ var FirecrawlApp = class {
363
363
  * @param idempotencyKey - Optional idempotency key for the request.
364
364
  * @returns A CrawlWatcher instance to monitor the crawl job.
365
365
  */
366
- async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
367
- const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
366
+ async batchScrapeUrlsAndWatch(urls, params, idempotencyKey) {
367
+ const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
368
368
  if (crawl.success && crawl.id) {
369
369
  const id = crawl.id;
370
370
  return new CrawlWatcher(id, this);
@@ -393,7 +393,7 @@ var FirecrawlApp = class {
393
393
  let statusData = response.data;
394
394
  if ("data" in statusData) {
395
395
  let data = statusData.data;
396
- while ("next" in statusData) {
396
+ while (typeof statusData === "object" && "next" in statusData) {
397
397
  statusData = (await this.getRequest(statusData.next, headers)).data;
398
398
  data = data.concat(statusData.data);
399
399
  }
@@ -529,40 +529,44 @@ var FirecrawlApp = class {
529
529
  * @returns The final job status or data.
530
530
  */
531
531
  async monitorJobStatus(id, headers, checkInterval) {
532
- while (true) {
533
- let statusResponse = await this.getRequest(
534
- `${this.apiUrl}/v1/crawl/${id}`,
535
- headers
536
- );
537
- if (statusResponse.status === 200) {
538
- let statusData = statusResponse.data;
539
- if (statusData.status === "completed") {
540
- if ("data" in statusData) {
541
- let data = statusData.data;
542
- while ("next" in statusData) {
543
- statusResponse = await this.getRequest(statusData.next, headers);
544
- statusData = statusResponse.data;
545
- data = data.concat(statusData.data);
532
+ try {
533
+ while (true) {
534
+ let statusResponse = await this.getRequest(
535
+ `${this.apiUrl}/v1/crawl/${id}`,
536
+ headers
537
+ );
538
+ if (statusResponse.status === 200) {
539
+ let statusData = statusResponse.data;
540
+ if (statusData.status === "completed") {
541
+ if ("data" in statusData) {
542
+ let data = statusData.data;
543
+ while (typeof statusData === "object" && "next" in statusData) {
544
+ statusResponse = await this.getRequest(statusData.next, headers);
545
+ statusData = statusResponse.data;
546
+ data = data.concat(statusData.data);
547
+ }
548
+ statusData.data = data;
549
+ return statusData;
550
+ } else {
551
+ throw new FirecrawlError("Crawl job completed but no data was returned", 500);
546
552
  }
547
- statusData.data = data;
548
- return statusData;
553
+ } else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
554
+ checkInterval = Math.max(checkInterval, 2);
555
+ await new Promise(
556
+ (resolve) => setTimeout(resolve, checkInterval * 1e3)
557
+ );
549
558
  } else {
550
- throw new FirecrawlError("Crawl job completed but no data was returned", 500);
559
+ throw new FirecrawlError(
560
+ `Crawl job failed or was stopped. Status: ${statusData.status}`,
561
+ 500
562
+ );
551
563
  }
552
- } else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
553
- checkInterval = Math.max(checkInterval, 2);
554
- await new Promise(
555
- (resolve) => setTimeout(resolve, checkInterval * 1e3)
556
- );
557
564
  } else {
558
- throw new FirecrawlError(
559
- `Crawl job failed or was stopped. Status: ${statusData.status}`,
560
- 500
561
- );
565
+ this.handleError(statusResponse, "check crawl status");
562
566
  }
563
- } else {
564
- this.handleError(statusResponse, "check crawl status");
565
567
  }
568
+ } catch (error) {
569
+ throw new FirecrawlError(error, 500);
566
570
  }
567
571
  }
568
572
  /**
@@ -589,10 +593,8 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
589
593
  ws;
590
594
  data;
591
595
  status;
592
- id;
593
596
  constructor(id, app) {
594
597
  super();
595
- this.id = id;
596
598
  this.ws = new import_isows.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
597
599
  this.status = "scraping";
598
600
  this.data = [];
@@ -602,8 +604,7 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
602
604
  this.dispatchTypedEvent("done", new CustomEvent("done", {
603
605
  detail: {
604
606
  status: this.status,
605
- data: this.data,
606
- id: this.id
607
+ data: this.data
607
608
  }
608
609
  }));
609
610
  } else if (msg.type === "error") {
@@ -612,8 +613,7 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
612
613
  detail: {
613
614
  status: this.status,
614
615
  data: this.data,
615
- error: msg.error,
616
- id: this.id
616
+ error: msg.error
617
617
  }
618
618
  }));
619
619
  } else if (msg.type === "catchup") {
@@ -621,18 +621,12 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
621
621
  this.data.push(...msg.data.data ?? []);
622
622
  for (const doc of this.data) {
623
623
  this.dispatchTypedEvent("document", new CustomEvent("document", {
624
- detail: {
625
- ...doc,
626
- id: this.id
627
- }
624
+ detail: doc
628
625
  }));
629
626
  }
630
627
  } else if (msg.type === "document") {
631
628
  this.dispatchTypedEvent("document", new CustomEvent("document", {
632
- detail: {
633
- ...msg.data,
634
- id: this.id
635
- }
629
+ detail: msg.data
636
630
  }));
637
631
  }
638
632
  };
@@ -654,8 +648,7 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
654
648
  detail: {
655
649
  status: this.status,
656
650
  data: this.data,
657
- error: "WebSocket error",
658
- id: this.id
651
+ error: "WebSocket error"
659
652
  }
660
653
  }));
661
654
  }).bind(this);
package/dist/index.d.cts CHANGED
@@ -171,7 +171,6 @@ interface BatchScrapeResponse {
171
171
  url?: string;
172
172
  success: true;
173
173
  error?: string;
174
- invalidURLs?: string[];
175
174
  }
176
175
  /**
177
176
  * Response interface for job status checks.
@@ -226,11 +225,10 @@ interface MapResponse {
226
225
  * Defines options for extracting information from URLs.
227
226
  */
228
227
  interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
229
- prompt?: string;
228
+ prompt: string;
230
229
  schema?: LLMSchema;
231
230
  systemPrompt?: string;
232
231
  allowExternalLinks?: boolean;
233
- includeSubdomains?: boolean;
234
232
  }
235
233
  /**
236
234
  * Response interface for extracting information from URLs.
@@ -331,8 +329,8 @@ declare class FirecrawlApp {
331
329
  * @param webhook - Optional webhook for the batch scrape.
332
330
  * @returns The response from the crawl operation.
333
331
  */
334
- batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
335
- asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
332
+ batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"]): Promise<BatchScrapeStatusResponse | ErrorResponse>;
333
+ asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
336
334
  /**
337
335
  * Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
338
336
  * @param urls - The URL to scrape.
@@ -340,7 +338,7 @@ declare class FirecrawlApp {
340
338
  * @param idempotencyKey - Optional idempotency key for the request.
341
339
  * @returns A CrawlWatcher instance to monitor the crawl job.
342
340
  */
343
- batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<CrawlWatcher>;
341
+ batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<CrawlWatcher>;
344
342
  /**
345
343
  * Checks the status of a batch scrape job using the Firecrawl API.
346
344
  * @param id - The ID of the batch scrape operation.
@@ -416,7 +414,6 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
416
414
  private ws;
417
415
  data: FirecrawlDocument<undefined>[];
418
416
  status: CrawlStatusResponse["status"];
419
- id: string;
420
417
  constructor(id: string, app: FirecrawlApp);
421
418
  close(): void;
422
419
  }
package/dist/index.d.ts CHANGED
@@ -171,7 +171,6 @@ interface BatchScrapeResponse {
171
171
  url?: string;
172
172
  success: true;
173
173
  error?: string;
174
- invalidURLs?: string[];
175
174
  }
176
175
  /**
177
176
  * Response interface for job status checks.
@@ -226,11 +225,10 @@ interface MapResponse {
226
225
  * Defines options for extracting information from URLs.
227
226
  */
228
227
  interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
229
- prompt?: string;
228
+ prompt: string;
230
229
  schema?: LLMSchema;
231
230
  systemPrompt?: string;
232
231
  allowExternalLinks?: boolean;
233
- includeSubdomains?: boolean;
234
232
  }
235
233
  /**
236
234
  * Response interface for extracting information from URLs.
@@ -331,8 +329,8 @@ declare class FirecrawlApp {
331
329
  * @param webhook - Optional webhook for the batch scrape.
332
330
  * @returns The response from the crawl operation.
333
331
  */
334
- batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
335
- asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<BatchScrapeResponse | ErrorResponse>;
332
+ batchScrapeUrls(urls: string[], params?: ScrapeParams, pollInterval?: number, idempotencyKey?: string, webhook?: CrawlParams["webhook"]): Promise<BatchScrapeStatusResponse | ErrorResponse>;
333
+ asyncBatchScrapeUrls(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<BatchScrapeResponse | ErrorResponse>;
336
334
  /**
337
335
  * Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
338
336
  * @param urls - The URL to scrape.
@@ -340,7 +338,7 @@ declare class FirecrawlApp {
340
338
  * @param idempotencyKey - Optional idempotency key for the request.
341
339
  * @returns A CrawlWatcher instance to monitor the crawl job.
342
340
  */
343
- batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string, webhook?: CrawlParams["webhook"], ignoreInvalidURLs?: boolean): Promise<CrawlWatcher>;
341
+ batchScrapeUrlsAndWatch(urls: string[], params?: ScrapeParams, idempotencyKey?: string): Promise<CrawlWatcher>;
344
342
  /**
345
343
  * Checks the status of a batch scrape job using the Firecrawl API.
346
344
  * @param id - The ID of the batch scrape operation.
@@ -416,7 +414,6 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
416
414
  private ws;
417
415
  data: FirecrawlDocument<undefined>[];
418
416
  status: CrawlStatusResponse["status"];
419
- id: string;
420
417
  constructor(id: string, app: FirecrawlApp);
421
418
  close(): void;
422
419
  }
package/dist/index.js CHANGED
@@ -162,7 +162,7 @@ var FirecrawlApp = class {
162
162
  let statusData = response.data;
163
163
  if ("data" in statusData) {
164
164
  let data = statusData.data;
165
- while ("next" in statusData) {
165
+ while (typeof statusData === "object" && "next" in statusData) {
166
166
  statusData = (await this.getRequest(statusData.next, headers)).data;
167
167
  data = data.concat(statusData.data);
168
168
  }
@@ -259,9 +259,9 @@ var FirecrawlApp = class {
259
259
  * @param webhook - Optional webhook for the batch scrape.
260
260
  * @returns The response from the crawl operation.
261
261
  */
262
- async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
262
+ async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook) {
263
263
  const headers = this.prepareHeaders(idempotencyKey);
264
- let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
264
+ let jsonData = { urls, ...params };
265
265
  if (jsonData?.extract?.schema) {
266
266
  let schema = jsonData.extract.schema;
267
267
  try {
@@ -297,9 +297,9 @@ var FirecrawlApp = class {
297
297
  }
298
298
  return { success: false, error: "Internal server error." };
299
299
  }
300
- async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
300
+ async asyncBatchScrapeUrls(urls, params, idempotencyKey) {
301
301
  const headers = this.prepareHeaders(idempotencyKey);
302
- let jsonData = { urls, webhook, ignoreInvalidURLs, ...params ?? {} };
302
+ let jsonData = { urls, ...params ?? {} };
303
303
  try {
304
304
  const response = await this.postRequest(
305
305
  this.apiUrl + `/v1/batch/scrape`,
@@ -327,8 +327,8 @@ var FirecrawlApp = class {
327
327
  * @param idempotencyKey - Optional idempotency key for the request.
328
328
  * @returns A CrawlWatcher instance to monitor the crawl job.
329
329
  */
330
- async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
331
- const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
330
+ async batchScrapeUrlsAndWatch(urls, params, idempotencyKey) {
331
+ const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
332
332
  if (crawl.success && crawl.id) {
333
333
  const id = crawl.id;
334
334
  return new CrawlWatcher(id, this);
@@ -357,7 +357,7 @@ var FirecrawlApp = class {
357
357
  let statusData = response.data;
358
358
  if ("data" in statusData) {
359
359
  let data = statusData.data;
360
- while ("next" in statusData) {
360
+ while (typeof statusData === "object" && "next" in statusData) {
361
361
  statusData = (await this.getRequest(statusData.next, headers)).data;
362
362
  data = data.concat(statusData.data);
363
363
  }
@@ -493,40 +493,44 @@ var FirecrawlApp = class {
493
493
  * @returns The final job status or data.
494
494
  */
495
495
  async monitorJobStatus(id, headers, checkInterval) {
496
- while (true) {
497
- let statusResponse = await this.getRequest(
498
- `${this.apiUrl}/v1/crawl/${id}`,
499
- headers
500
- );
501
- if (statusResponse.status === 200) {
502
- let statusData = statusResponse.data;
503
- if (statusData.status === "completed") {
504
- if ("data" in statusData) {
505
- let data = statusData.data;
506
- while ("next" in statusData) {
507
- statusResponse = await this.getRequest(statusData.next, headers);
508
- statusData = statusResponse.data;
509
- data = data.concat(statusData.data);
496
+ try {
497
+ while (true) {
498
+ let statusResponse = await this.getRequest(
499
+ `${this.apiUrl}/v1/crawl/${id}`,
500
+ headers
501
+ );
502
+ if (statusResponse.status === 200) {
503
+ let statusData = statusResponse.data;
504
+ if (statusData.status === "completed") {
505
+ if ("data" in statusData) {
506
+ let data = statusData.data;
507
+ while (typeof statusData === "object" && "next" in statusData) {
508
+ statusResponse = await this.getRequest(statusData.next, headers);
509
+ statusData = statusResponse.data;
510
+ data = data.concat(statusData.data);
511
+ }
512
+ statusData.data = data;
513
+ return statusData;
514
+ } else {
515
+ throw new FirecrawlError("Crawl job completed but no data was returned", 500);
510
516
  }
511
- statusData.data = data;
512
- return statusData;
517
+ } else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
518
+ checkInterval = Math.max(checkInterval, 2);
519
+ await new Promise(
520
+ (resolve) => setTimeout(resolve, checkInterval * 1e3)
521
+ );
513
522
  } else {
514
- throw new FirecrawlError("Crawl job completed but no data was returned", 500);
523
+ throw new FirecrawlError(
524
+ `Crawl job failed or was stopped. Status: ${statusData.status}`,
525
+ 500
526
+ );
515
527
  }
516
- } else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
517
- checkInterval = Math.max(checkInterval, 2);
518
- await new Promise(
519
- (resolve) => setTimeout(resolve, checkInterval * 1e3)
520
- );
521
528
  } else {
522
- throw new FirecrawlError(
523
- `Crawl job failed or was stopped. Status: ${statusData.status}`,
524
- 500
525
- );
529
+ this.handleError(statusResponse, "check crawl status");
526
530
  }
527
- } else {
528
- this.handleError(statusResponse, "check crawl status");
529
531
  }
532
+ } catch (error) {
533
+ throw new FirecrawlError(error, 500);
530
534
  }
531
535
  }
532
536
  /**
@@ -553,10 +557,8 @@ var CrawlWatcher = class extends TypedEventTarget {
553
557
  ws;
554
558
  data;
555
559
  status;
556
- id;
557
560
  constructor(id, app) {
558
561
  super();
559
- this.id = id;
560
562
  this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
561
563
  this.status = "scraping";
562
564
  this.data = [];
@@ -566,8 +568,7 @@ var CrawlWatcher = class extends TypedEventTarget {
566
568
  this.dispatchTypedEvent("done", new CustomEvent("done", {
567
569
  detail: {
568
570
  status: this.status,
569
- data: this.data,
570
- id: this.id
571
+ data: this.data
571
572
  }
572
573
  }));
573
574
  } else if (msg.type === "error") {
@@ -576,8 +577,7 @@ var CrawlWatcher = class extends TypedEventTarget {
576
577
  detail: {
577
578
  status: this.status,
578
579
  data: this.data,
579
- error: msg.error,
580
- id: this.id
580
+ error: msg.error
581
581
  }
582
582
  }));
583
583
  } else if (msg.type === "catchup") {
@@ -585,18 +585,12 @@ var CrawlWatcher = class extends TypedEventTarget {
585
585
  this.data.push(...msg.data.data ?? []);
586
586
  for (const doc of this.data) {
587
587
  this.dispatchTypedEvent("document", new CustomEvent("document", {
588
- detail: {
589
- ...doc,
590
- id: this.id
591
- }
588
+ detail: doc
592
589
  }));
593
590
  }
594
591
  } else if (msg.type === "document") {
595
592
  this.dispatchTypedEvent("document", new CustomEvent("document", {
596
- detail: {
597
- ...msg.data,
598
- id: this.id
599
- }
593
+ detail: msg.data
600
594
  }));
601
595
  }
602
596
  };
@@ -618,8 +612,7 @@ var CrawlWatcher = class extends TypedEventTarget {
618
612
  detail: {
619
613
  status: this.status,
620
614
  data: this.data,
621
- error: "WebSocket error",
622
- id: this.id
615
+ error: "WebSocket error"
623
616
  }
624
617
  }));
625
618
  }).bind(this);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "1.9.7",
3
+ "version": "1.10.1",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
package/src/index.ts CHANGED
@@ -183,7 +183,6 @@ export interface BatchScrapeResponse {
183
183
  url?: string;
184
184
  success: true;
185
185
  error?: string;
186
- invalidURLs?: string[];
187
186
  }
188
187
 
189
188
  /**
@@ -243,11 +242,10 @@ export interface MapResponse {
243
242
  * Defines options for extracting information from URLs.
244
243
  */
245
244
  export interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
246
- prompt?: string;
245
+ prompt: string;
247
246
  schema?: LLMSchema;
248
247
  systemPrompt?: string;
249
248
  allowExternalLinks?: boolean;
250
- includeSubdomains?: boolean;
251
249
  }
252
250
 
253
251
  /**
@@ -464,7 +462,7 @@ export default class FirecrawlApp {
464
462
  let statusData = response.data
465
463
  if ("data" in statusData) {
466
464
  let data = statusData.data;
467
- while ('next' in statusData) {
465
+ while (typeof statusData === 'object' && 'next' in statusData) {
468
466
  statusData = (await this.getRequest(statusData.next, headers)).data;
469
467
  data = data.concat(statusData.data);
470
468
  }
@@ -578,10 +576,9 @@ export default class FirecrawlApp {
578
576
  pollInterval: number = 2,
579
577
  idempotencyKey?: string,
580
578
  webhook?: CrawlParams["webhook"],
581
- ignoreInvalidURLs?: boolean,
582
579
  ): Promise<BatchScrapeStatusResponse | ErrorResponse> {
583
580
  const headers = this.prepareHeaders(idempotencyKey);
584
- let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params };
581
+ let jsonData: any = { urls, ...params };
585
582
  if (jsonData?.extract?.schema) {
586
583
  let schema = jsonData.extract.schema;
587
584
 
@@ -624,12 +621,10 @@ export default class FirecrawlApp {
624
621
  async asyncBatchScrapeUrls(
625
622
  urls: string[],
626
623
  params?: ScrapeParams,
627
- idempotencyKey?: string,
628
- webhook?: CrawlParams["webhook"],
629
- ignoreInvalidURLs?: boolean,
624
+ idempotencyKey?: string
630
625
  ): Promise<BatchScrapeResponse | ErrorResponse> {
631
626
  const headers = this.prepareHeaders(idempotencyKey);
632
- let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...(params ?? {}) };
627
+ let jsonData: any = { urls, ...(params ?? {}) };
633
628
  try {
634
629
  const response: AxiosResponse = await this.postRequest(
635
630
  this.apiUrl + `/v1/batch/scrape`,
@@ -662,10 +657,8 @@ export default class FirecrawlApp {
662
657
  urls: string[],
663
658
  params?: ScrapeParams,
664
659
  idempotencyKey?: string,
665
- webhook?: CrawlParams["webhook"],
666
- ignoreInvalidURLs?: boolean,
667
660
  ) {
668
- const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
661
+ const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey);
669
662
 
670
663
  if (crawl.success && crawl.id) {
671
664
  const id = crawl.id;
@@ -698,7 +691,7 @@ export default class FirecrawlApp {
698
691
  let statusData = response.data
699
692
  if ("data" in statusData) {
700
693
  let data = statusData.data;
701
- while ('next' in statusData) {
694
+ while (typeof statusData === 'object' && 'next' in statusData) {
702
695
  statusData = (await this.getRequest(statusData.next, headers)).data;
703
696
  data = data.concat(statusData.data);
704
697
  }
@@ -857,42 +850,46 @@ export default class FirecrawlApp {
857
850
  headers: AxiosRequestHeaders,
858
851
  checkInterval: number
859
852
  ): Promise<CrawlStatusResponse | ErrorResponse> {
860
- while (true) {
861
- let statusResponse: AxiosResponse = await this.getRequest(
862
- `${this.apiUrl}/v1/crawl/${id}`,
863
- headers
864
- );
865
- if (statusResponse.status === 200) {
866
- let statusData = statusResponse.data;
867
- if (statusData.status === "completed") {
868
- if ("data" in statusData) {
869
- let data = statusData.data;
870
- while ('next' in statusData) {
871
- statusResponse = await this.getRequest(statusData.next, headers);
872
- statusData = statusResponse.data;
873
- data = data.concat(statusData.data);
853
+ try {
854
+ while (true) {
855
+ let statusResponse: AxiosResponse = await this.getRequest(
856
+ `${this.apiUrl}/v1/crawl/${id}`,
857
+ headers
858
+ );
859
+ if (statusResponse.status === 200) {
860
+ let statusData = statusResponse.data;
861
+ if (statusData.status === "completed") {
862
+ if ("data" in statusData) {
863
+ let data = statusData.data;
864
+ while (typeof statusData === 'object' && 'next' in statusData) {
865
+ statusResponse = await this.getRequest(statusData.next, headers);
866
+ statusData = statusResponse.data;
867
+ data = data.concat(statusData.data);
868
+ }
869
+ statusData.data = data;
870
+ return statusData;
871
+ } else {
872
+ throw new FirecrawlError("Crawl job completed but no data was returned", 500);
874
873
  }
875
- statusData.data = data;
876
- return statusData;
877
- } else {
878
- throw new FirecrawlError("Crawl job completed but no data was returned", 500);
879
- }
880
- } else if (
881
- ["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
882
- ) {
883
- checkInterval = Math.max(checkInterval, 2);
884
- await new Promise((resolve) =>
885
- setTimeout(resolve, checkInterval * 1000)
886
- );
874
+ } else if (
875
+ ["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
876
+ ) {
877
+ checkInterval = Math.max(checkInterval, 2);
878
+ await new Promise((resolve) =>
879
+ setTimeout(resolve, checkInterval * 1000)
880
+ );
881
+ } else {
882
+ throw new FirecrawlError(
883
+ `Crawl job failed or was stopped. Status: ${statusData.status}`,
884
+ 500
885
+ );
886
+ }
887
887
  } else {
888
- throw new FirecrawlError(
889
- `Crawl job failed or was stopped. Status: ${statusData.status}`,
890
- 500
891
- );
888
+ this.handleError(statusResponse, "check crawl status");
892
889
  }
893
- } else {
894
- this.handleError(statusResponse, "check crawl status");
895
890
  }
891
+ } catch (error: any) {
892
+ throw new FirecrawlError(error, 500);
896
893
  }
897
894
  }
898
895
 
@@ -935,11 +932,9 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
935
932
  private ws: WebSocket;
936
933
  public data: FirecrawlDocument<undefined>[];
937
934
  public status: CrawlStatusResponse["status"];
938
- public id: string;
939
935
 
940
936
  constructor(id: string, app: FirecrawlApp) {
941
937
  super();
942
- this.id = id;
943
938
  this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
944
939
  this.status = "scraping";
945
940
  this.data = [];
@@ -970,7 +965,6 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
970
965
  detail: {
971
966
  status: this.status,
972
967
  data: this.data,
973
- id: this.id,
974
968
  },
975
969
  }));
976
970
  } else if (msg.type === "error") {
@@ -980,7 +974,6 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
980
974
  status: this.status,
981
975
  data: this.data,
982
976
  error: msg.error,
983
- id: this.id,
984
977
  },
985
978
  }));
986
979
  } else if (msg.type === "catchup") {
@@ -988,18 +981,12 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
988
981
  this.data.push(...(msg.data.data ?? []));
989
982
  for (const doc of this.data) {
990
983
  this.dispatchTypedEvent("document", new CustomEvent("document", {
991
- detail: {
992
- ...doc,
993
- id: this.id,
994
- },
984
+ detail: doc,
995
985
  }));
996
986
  }
997
987
  } else if (msg.type === "document") {
998
988
  this.dispatchTypedEvent("document", new CustomEvent("document", {
999
- detail: {
1000
- ...msg.data,
1001
- id: this.id,
1002
- },
989
+ detail: msg.data,
1003
990
  }));
1004
991
  }
1005
992
  }
@@ -1026,7 +1013,6 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
1026
1013
  status: this.status,
1027
1014
  data: this.data,
1028
1015
  error: "WebSocket error",
1029
- id: this.id,
1030
1016
  },
1031
1017
  }));
1032
1018
  }).bind(this);