firecrawl 4.9.3 → 4.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ var require_package = __commonJS({
8
8
  "package.json"(exports, module) {
9
9
  module.exports = {
10
10
  name: "@mendable/firecrawl-js",
11
- version: "4.9.3",
11
+ version: "4.10.0",
12
12
  description: "JavaScript SDK for Firecrawl API",
13
13
  main: "dist/index.js",
14
14
  types: "dist/index.d.ts",
package/dist/index.cjs CHANGED
@@ -35,7 +35,7 @@ var require_package = __commonJS({
35
35
  "package.json"(exports2, module2) {
36
36
  module2.exports = {
37
37
  name: "@mendable/firecrawl-js",
38
- version: "4.9.3",
38
+ version: "4.10.0",
39
39
  description: "JavaScript SDK for Firecrawl API",
40
40
  main: "dist/index.js",
41
41
  types: "dist/index.d.ts",
@@ -115,6 +115,7 @@ __export(index_exports, {
115
115
  Firecrawl: () => Firecrawl,
116
116
  FirecrawlAppV1: () => FirecrawlApp,
117
117
  FirecrawlClient: () => FirecrawlClient,
118
+ JobTimeoutError: () => JobTimeoutError,
118
119
  SdkError: () => SdkError,
119
120
  default: () => index_default
120
121
  });
@@ -222,12 +223,29 @@ var SdkError = class extends Error {
222
223
  status;
223
224
  code;
224
225
  details;
225
- constructor(message, status, code, details) {
226
+ jobId;
227
+ constructor(message, status, code, details, jobId) {
226
228
  super(message);
227
229
  this.name = "FirecrawlSdkError";
228
230
  this.status = status;
229
231
  this.code = code;
230
232
  this.details = details;
233
+ this.jobId = jobId;
234
+ }
235
+ };
236
+ var JobTimeoutError = class extends SdkError {
237
+ timeoutSeconds;
238
+ constructor(jobId, timeoutSeconds, jobType = "batch") {
239
+ const jobTypeLabel = jobType === "batch" ? "batch scrape" : "crawl";
240
+ super(
241
+ `${jobTypeLabel.charAt(0).toUpperCase() + jobTypeLabel.slice(1)} job ${jobId} did not complete within ${timeoutSeconds} seconds`,
242
+ void 0,
243
+ "JOB_TIMEOUT",
244
+ void 0,
245
+ jobId
246
+ );
247
+ this.name = "JobTimeoutError";
248
+ this.timeoutSeconds = timeoutSeconds;
231
249
  }
232
250
  };
233
251
 
@@ -303,6 +321,27 @@ function normalizeAxiosError(err, action) {
303
321
  const code = body?.code || err.code;
304
322
  throw new SdkError(message, status, code, body?.details ?? body);
305
323
  }
324
+ function isRetryableError(err) {
325
+ if (err instanceof JobTimeoutError) {
326
+ return false;
327
+ }
328
+ if (err instanceof SdkError || err && typeof err === "object" && "status" in err) {
329
+ const status = err.status;
330
+ if (status && status >= 400 && status < 500) {
331
+ return false;
332
+ }
333
+ if (status && status >= 500) {
334
+ return true;
335
+ }
336
+ }
337
+ if (err?.isAxiosError && !err.response) {
338
+ return true;
339
+ }
340
+ if (err?.code === "ECONNABORTED" || err?.message?.includes("timeout")) {
341
+ return true;
342
+ }
343
+ return true;
344
+ }
306
345
 
307
346
  // src/v2/methods/scrape.ts
308
347
  async function scrape(http, url, options) {
@@ -535,10 +574,28 @@ async function cancelCrawl(http, jobId) {
535
574
  async function waitForCrawlCompletion(http, jobId, pollInterval = 2, timeout) {
536
575
  const start = Date.now();
537
576
  while (true) {
538
- const status = await getCrawlStatus(http, jobId);
539
- if (["completed", "failed", "cancelled"].includes(status.status)) return status;
577
+ try {
578
+ const status = await getCrawlStatus(http, jobId);
579
+ if (["completed", "failed", "cancelled"].includes(status.status)) {
580
+ return status;
581
+ }
582
+ } catch (err) {
583
+ if (!isRetryableError(err)) {
584
+ if (err instanceof SdkError) {
585
+ const errorWithJobId = new SdkError(
586
+ err.message,
587
+ err.status,
588
+ err.code,
589
+ err.details,
590
+ jobId
591
+ );
592
+ throw errorWithJobId;
593
+ }
594
+ throw err;
595
+ }
596
+ }
540
597
  if (timeout != null && Date.now() - start > timeout * 1e3) {
541
- throw new Error(`Crawl job ${jobId} did not complete within ${timeout} seconds`);
598
+ throw new JobTimeoutError(jobId, timeout, "crawl");
542
599
  }
543
600
  await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
544
601
  }
@@ -677,10 +734,28 @@ async function getBatchScrapeErrors(http, jobId) {
677
734
  async function waitForBatchCompletion(http, jobId, pollInterval = 2, timeout) {
678
735
  const start = Date.now();
679
736
  while (true) {
680
- const status = await getBatchScrapeStatus(http, jobId);
681
- if (["completed", "failed", "cancelled"].includes(status.status)) return status;
737
+ try {
738
+ const status = await getBatchScrapeStatus(http, jobId);
739
+ if (["completed", "failed", "cancelled"].includes(status.status)) {
740
+ return status;
741
+ }
742
+ } catch (err) {
743
+ if (!isRetryableError(err)) {
744
+ if (err instanceof SdkError) {
745
+ const errorWithJobId = new SdkError(
746
+ err.message,
747
+ err.status,
748
+ err.code,
749
+ err.details,
750
+ jobId
751
+ );
752
+ throw errorWithJobId;
753
+ }
754
+ throw err;
755
+ }
756
+ }
682
757
  if (timeout != null && Date.now() - start > timeout * 1e3) {
683
- throw new Error(`Batch scrape job ${jobId} did not complete within ${timeout} seconds`);
758
+ throw new JobTimeoutError(jobId, timeout, "batch");
684
759
  }
685
760
  await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
686
761
  }
@@ -2810,5 +2885,6 @@ var index_default = Firecrawl;
2810
2885
  Firecrawl,
2811
2886
  FirecrawlAppV1,
2812
2887
  FirecrawlClient,
2888
+ JobTimeoutError,
2813
2889
  SdkError
2814
2890
  });
package/dist/index.d.cts CHANGED
@@ -531,7 +531,12 @@ declare class SdkError extends Error {
531
531
  status?: number;
532
532
  code?: string;
533
533
  details?: unknown;
534
- constructor(message: string, status?: number, code?: string, details?: unknown);
534
+ jobId?: string;
535
+ constructor(message: string, status?: number, code?: string, details?: unknown, jobId?: string);
536
+ }
537
+ declare class JobTimeoutError extends SdkError {
538
+ timeoutSeconds: number;
539
+ constructor(jobId: string, timeoutSeconds: number, jobType?: 'batch' | 'crawl');
535
540
  }
536
541
  interface QueueStatusResponse$1 {
537
542
  success: boolean;
@@ -1721,4 +1726,4 @@ declare class Firecrawl extends FirecrawlClient {
1721
1726
  get v1(): FirecrawlApp;
1722
1727
  }
1723
1728
 
1724
- export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
1729
+ export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
package/dist/index.d.ts CHANGED
@@ -531,7 +531,12 @@ declare class SdkError extends Error {
531
531
  status?: number;
532
532
  code?: string;
533
533
  details?: unknown;
534
- constructor(message: string, status?: number, code?: string, details?: unknown);
534
+ jobId?: string;
535
+ constructor(message: string, status?: number, code?: string, details?: unknown, jobId?: string);
536
+ }
537
+ declare class JobTimeoutError extends SdkError {
538
+ timeoutSeconds: number;
539
+ constructor(jobId: string, timeoutSeconds: number, jobType?: 'batch' | 'crawl');
535
540
  }
536
541
  interface QueueStatusResponse$1 {
537
542
  success: boolean;
@@ -1721,4 +1726,4 @@ declare class Firecrawl extends FirecrawlClient {
1721
1726
  get v1(): FirecrawlApp;
1722
1727
  }
1723
1728
 
1724
- export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
1729
+ export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
package/dist/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-46HSLP23.js";
3
+ } from "./chunk-GY35KXDS.js";
4
4
 
5
5
  // src/v2/utils/httpClient.ts
6
6
  import axios from "axios";
@@ -104,12 +104,29 @@ var SdkError = class extends Error {
104
104
  status;
105
105
  code;
106
106
  details;
107
- constructor(message, status, code, details) {
107
+ jobId;
108
+ constructor(message, status, code, details, jobId) {
108
109
  super(message);
109
110
  this.name = "FirecrawlSdkError";
110
111
  this.status = status;
111
112
  this.code = code;
112
113
  this.details = details;
114
+ this.jobId = jobId;
115
+ }
116
+ };
117
+ var JobTimeoutError = class extends SdkError {
118
+ timeoutSeconds;
119
+ constructor(jobId, timeoutSeconds, jobType = "batch") {
120
+ const jobTypeLabel = jobType === "batch" ? "batch scrape" : "crawl";
121
+ super(
122
+ `${jobTypeLabel.charAt(0).toUpperCase() + jobTypeLabel.slice(1)} job ${jobId} did not complete within ${timeoutSeconds} seconds`,
123
+ void 0,
124
+ "JOB_TIMEOUT",
125
+ void 0,
126
+ jobId
127
+ );
128
+ this.name = "JobTimeoutError";
129
+ this.timeoutSeconds = timeoutSeconds;
113
130
  }
114
131
  };
115
132
 
@@ -185,6 +202,27 @@ function normalizeAxiosError(err, action) {
185
202
  const code = body?.code || err.code;
186
203
  throw new SdkError(message, status, code, body?.details ?? body);
187
204
  }
205
+ function isRetryableError(err) {
206
+ if (err instanceof JobTimeoutError) {
207
+ return false;
208
+ }
209
+ if (err instanceof SdkError || err && typeof err === "object" && "status" in err) {
210
+ const status = err.status;
211
+ if (status && status >= 400 && status < 500) {
212
+ return false;
213
+ }
214
+ if (status && status >= 500) {
215
+ return true;
216
+ }
217
+ }
218
+ if (err?.isAxiosError && !err.response) {
219
+ return true;
220
+ }
221
+ if (err?.code === "ECONNABORTED" || err?.message?.includes("timeout")) {
222
+ return true;
223
+ }
224
+ return true;
225
+ }
188
226
 
189
227
  // src/v2/methods/scrape.ts
190
228
  async function scrape(http, url, options) {
@@ -417,10 +455,28 @@ async function cancelCrawl(http, jobId) {
417
455
  async function waitForCrawlCompletion(http, jobId, pollInterval = 2, timeout) {
418
456
  const start = Date.now();
419
457
  while (true) {
420
- const status = await getCrawlStatus(http, jobId);
421
- if (["completed", "failed", "cancelled"].includes(status.status)) return status;
458
+ try {
459
+ const status = await getCrawlStatus(http, jobId);
460
+ if (["completed", "failed", "cancelled"].includes(status.status)) {
461
+ return status;
462
+ }
463
+ } catch (err) {
464
+ if (!isRetryableError(err)) {
465
+ if (err instanceof SdkError) {
466
+ const errorWithJobId = new SdkError(
467
+ err.message,
468
+ err.status,
469
+ err.code,
470
+ err.details,
471
+ jobId
472
+ );
473
+ throw errorWithJobId;
474
+ }
475
+ throw err;
476
+ }
477
+ }
422
478
  if (timeout != null && Date.now() - start > timeout * 1e3) {
423
- throw new Error(`Crawl job ${jobId} did not complete within ${timeout} seconds`);
479
+ throw new JobTimeoutError(jobId, timeout, "crawl");
424
480
  }
425
481
  await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
426
482
  }
@@ -559,10 +615,28 @@ async function getBatchScrapeErrors(http, jobId) {
559
615
  async function waitForBatchCompletion(http, jobId, pollInterval = 2, timeout) {
560
616
  const start = Date.now();
561
617
  while (true) {
562
- const status = await getBatchScrapeStatus(http, jobId);
563
- if (["completed", "failed", "cancelled"].includes(status.status)) return status;
618
+ try {
619
+ const status = await getBatchScrapeStatus(http, jobId);
620
+ if (["completed", "failed", "cancelled"].includes(status.status)) {
621
+ return status;
622
+ }
623
+ } catch (err) {
624
+ if (!isRetryableError(err)) {
625
+ if (err instanceof SdkError) {
626
+ const errorWithJobId = new SdkError(
627
+ err.message,
628
+ err.status,
629
+ err.code,
630
+ err.details,
631
+ jobId
632
+ );
633
+ throw errorWithJobId;
634
+ }
635
+ throw err;
636
+ }
637
+ }
564
638
  if (timeout != null && Date.now() - start > timeout * 1e3) {
565
- throw new Error(`Batch scrape job ${jobId} did not complete within ${timeout} seconds`);
639
+ throw new JobTimeoutError(jobId, timeout, "batch");
566
640
  }
567
641
  await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
568
642
  }
@@ -1259,7 +1333,7 @@ var FirecrawlApp = class {
1259
1333
  if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
1260
1334
  return process.env.npm_package_version;
1261
1335
  }
1262
- const packageJson = await import("./package-ZXN6LAWY.js");
1336
+ const packageJson = await import("./package-THA2MQX4.js");
1263
1337
  return packageJson.default.version;
1264
1338
  } catch (error) {
1265
1339
  const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
@@ -2691,6 +2765,7 @@ export {
2691
2765
  Firecrawl,
2692
2766
  FirecrawlApp as FirecrawlAppV1,
2693
2767
  FirecrawlClient,
2768
+ JobTimeoutError,
2694
2769
  SdkError,
2695
2770
  index_default as default
2696
2771
  };
@@ -1,4 +1,4 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-46HSLP23.js";
3
+ } from "./chunk-GY35KXDS.js";
4
4
  export default require_package();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "4.9.3",
3
+ "version": "4.10.0",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -5,11 +5,13 @@ import {
5
5
  type Document,
6
6
  type BatchScrapeOptions,
7
7
  type PaginationConfig,
8
+ JobTimeoutError,
9
+ SdkError,
8
10
  } from "../types";
9
11
  import { HttpClient } from "../utils/httpClient";
10
12
  import { ensureValidScrapeOptions } from "../utils/validation";
11
13
  import { fetchAllPages } from "../utils/pagination";
12
- import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler";
14
+ import { normalizeAxiosError, throwForBadResponse, isRetryableError } from "../utils/errorHandler";
13
15
 
14
16
  export async function startBatchScrape(
15
17
  http: HttpClient,
@@ -115,12 +117,37 @@ export async function getBatchScrapeErrors(http: HttpClient, jobId: string): Pro
115
117
 
116
118
  export async function waitForBatchCompletion(http: HttpClient, jobId: string, pollInterval = 2, timeout?: number): Promise<BatchScrapeJob> {
117
119
  const start = Date.now();
120
+
118
121
  while (true) {
119
- const status = await getBatchScrapeStatus(http, jobId);
120
- if (["completed", "failed", "cancelled"].includes(status.status)) return status;
122
+ try {
123
+ const status = await getBatchScrapeStatus(http, jobId);
124
+
125
+ if (["completed", "failed", "cancelled"].includes(status.status)) {
126
+ return status;
127
+ }
128
+ } catch (err: any) {
129
+ // Don't retry on permanent errors (4xx) - re-throw immediately with jobId context
130
+ if (!isRetryableError(err)) {
131
+ // Create new error with jobId for better debugging (non-retryable errors like 404)
132
+ if (err instanceof SdkError) {
133
+ const errorWithJobId = new SdkError(
134
+ err.message,
135
+ err.status,
136
+ err.code,
137
+ err.details,
138
+ jobId
139
+ );
140
+ throw errorWithJobId;
141
+ }
142
+ throw err;
143
+ }
144
+ // Otherwise, retry after delay - error might be transient (network issue, timeout, 5xx, etc.)
145
+ }
146
+
121
147
  if (timeout != null && Date.now() - start > timeout * 1000) {
122
- throw new Error(`Batch scrape job ${jobId} did not complete within ${timeout} seconds`);
148
+ throw new JobTimeoutError(jobId, timeout, 'batch');
123
149
  }
150
+
124
151
  await new Promise((r) => setTimeout(r, Math.max(1000, pollInterval * 1000)));
125
152
  }
126
153
  }
@@ -6,10 +6,12 @@ import {
6
6
  type Document,
7
7
  type CrawlOptions,
8
8
  type PaginationConfig,
9
+ JobTimeoutError,
10
+ SdkError,
9
11
  } from "../types";
10
12
  import { HttpClient } from "../utils/httpClient";
11
13
  import { ensureValidScrapeOptions } from "../utils/validation";
12
- import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler";
14
+ import { normalizeAxiosError, throwForBadResponse, isRetryableError } from "../utils/errorHandler";
13
15
  import type { HttpClient as _Http } from "../utils/httpClient";
14
16
  import { fetchAllPages } from "../utils/pagination";
15
17
 
@@ -114,12 +116,37 @@ export async function cancelCrawl(http: HttpClient, jobId: string): Promise<bool
114
116
 
115
117
  export async function waitForCrawlCompletion(http: HttpClient, jobId: string, pollInterval = 2, timeout?: number): Promise<CrawlJob> {
116
118
  const start = Date.now();
119
+
117
120
  while (true) {
118
- const status = await getCrawlStatus(http, jobId);
119
- if (["completed", "failed", "cancelled"].includes(status.status)) return status;
121
+ try {
122
+ const status = await getCrawlStatus(http, jobId);
123
+
124
+ if (["completed", "failed", "cancelled"].includes(status.status)) {
125
+ return status;
126
+ }
127
+ } catch (err: any) {
128
+ // Don't retry on permanent errors (4xx) - re-throw immediately with jobId context
129
+ if (!isRetryableError(err)) {
130
+ // Create new error with jobId for better debugging (non-retryable errors like 404)
131
+ if (err instanceof SdkError) {
132
+ const errorWithJobId = new SdkError(
133
+ err.message,
134
+ err.status,
135
+ err.code,
136
+ err.details,
137
+ jobId
138
+ );
139
+ throw errorWithJobId;
140
+ }
141
+ throw err;
142
+ }
143
+ // Otherwise, retry after delay - error might be transient (network issue, timeout, 5xx, etc.)
144
+ }
145
+
120
146
  if (timeout != null && Date.now() - start > timeout * 1000) {
121
- throw new Error(`Crawl job ${jobId} did not complete within ${timeout} seconds`);
147
+ throw new JobTimeoutError(jobId, timeout, 'crawl');
122
148
  }
149
+
123
150
  await new Promise((r) => setTimeout(r, Math.max(1000, pollInterval * 1000)));
124
151
  }
125
152
  }
package/src/v2/types.ts CHANGED
@@ -631,17 +631,36 @@ export class SdkError extends Error {
631
631
  status?: number;
632
632
  code?: string;
633
633
  details?: unknown;
634
+ jobId?: string;
634
635
  constructor(
635
636
  message: string,
636
637
  status?: number,
637
638
  code?: string,
638
- details?: unknown
639
+ details?: unknown,
640
+ jobId?: string
639
641
  ) {
640
642
  super(message);
641
643
  this.name = 'FirecrawlSdkError';
642
644
  this.status = status;
643
645
  this.code = code;
644
646
  this.details = details;
647
+ this.jobId = jobId;
648
+ }
649
+ }
650
+
651
+ export class JobTimeoutError extends SdkError {
652
+ timeoutSeconds: number;
653
+ constructor(jobId: string, timeoutSeconds: number, jobType: 'batch' | 'crawl' = 'batch') {
654
+ const jobTypeLabel = jobType === 'batch' ? 'batch scrape' : 'crawl';
655
+ super(
656
+ `${jobTypeLabel.charAt(0).toUpperCase() + jobTypeLabel.slice(1)} job ${jobId} did not complete within ${timeoutSeconds} seconds`,
657
+ undefined,
658
+ 'JOB_TIMEOUT',
659
+ undefined,
660
+ jobId
661
+ );
662
+ this.name = 'JobTimeoutError';
663
+ this.timeoutSeconds = timeoutSeconds;
645
664
  }
646
665
  }
647
666
 
@@ -1,5 +1,5 @@
1
1
  import { type AxiosError, type AxiosResponse } from "axios";
2
- import { SdkError } from "../types";
2
+ import { SdkError, JobTimeoutError } from "../types";
3
3
 
4
4
  export function throwForBadResponse(resp: AxiosResponse, action: string): never {
5
5
  const status = resp.status;
@@ -16,3 +16,36 @@ export function normalizeAxiosError(err: AxiosError, action: string): never {
16
16
  throw new SdkError(message, status, code, body?.details ?? body);
17
17
  }
18
18
 
19
+ export function isRetryableError(err: any): boolean {
20
+ // JobTimeoutError should never be retried - it's the overall timeout
21
+ if (err instanceof JobTimeoutError) {
22
+ return false;
23
+ }
24
+
25
+ // If it's an SdkError with a status code, check if it's retryable
26
+ if (err instanceof SdkError || (err && typeof err === 'object' && 'status' in err)) {
27
+ const status = err.status;
28
+ // 4xx errors are client errors and shouldn't be retried
29
+ if (status && status >= 400 && status < 500) {
30
+ return false; // Don't retry client errors (401, 404, etc.)
31
+ }
32
+ // 5xx errors are server errors and can be retried
33
+ if (status && status >= 500) {
34
+ return true;
35
+ }
36
+ }
37
+
38
+ // Network errors (no response) are retryable
39
+ if (err?.isAxiosError && !err.response) {
40
+ return true;
41
+ }
42
+
43
+ // HTTP timeout errors are retryable (different from JobTimeoutError)
44
+ if (err?.code === 'ECONNABORTED' || err?.message?.includes('timeout')) {
45
+ return true;
46
+ }
47
+
48
+ // Default: retry on unknown errors (safer than not retrying)
49
+ return true;
50
+ }
51
+