firecrawl 4.9.2 → 4.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-NR7V4IEX.js → chunk-GY35KXDS.js} +1 -1
- package/dist/index.cjs +85 -8
- package/dist/index.d.cts +8 -2
- package/dist/index.d.ts +8 -2
- package/dist/index.js +85 -9
- package/dist/{package-647PIJ6T.js → package-THA2MQX4.js} +1 -1
- package/package.json +1 -1
- package/src/v2/methods/agent.ts +2 -0
- package/src/v2/methods/batch.ts +31 -4
- package/src/v2/methods/crawl.ts +31 -4
- package/src/v2/types.ts +20 -1
- package/src/v2/utils/errorHandler.ts +34 -1
|
@@ -8,7 +8,7 @@ var require_package = __commonJS({
|
|
|
8
8
|
"package.json"(exports, module) {
|
|
9
9
|
module.exports = {
|
|
10
10
|
name: "@mendable/firecrawl-js",
|
|
11
|
-
version: "4.
|
|
11
|
+
version: "4.10.0",
|
|
12
12
|
description: "JavaScript SDK for Firecrawl API",
|
|
13
13
|
main: "dist/index.js",
|
|
14
14
|
types: "dist/index.d.ts",
|
package/dist/index.cjs
CHANGED
|
@@ -35,7 +35,7 @@ var require_package = __commonJS({
|
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
37
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "4.
|
|
38
|
+
version: "4.10.0",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -115,6 +115,7 @@ __export(index_exports, {
|
|
|
115
115
|
Firecrawl: () => Firecrawl,
|
|
116
116
|
FirecrawlAppV1: () => FirecrawlApp,
|
|
117
117
|
FirecrawlClient: () => FirecrawlClient,
|
|
118
|
+
JobTimeoutError: () => JobTimeoutError,
|
|
118
119
|
SdkError: () => SdkError,
|
|
119
120
|
default: () => index_default
|
|
120
121
|
});
|
|
@@ -222,12 +223,29 @@ var SdkError = class extends Error {
|
|
|
222
223
|
status;
|
|
223
224
|
code;
|
|
224
225
|
details;
|
|
225
|
-
|
|
226
|
+
jobId;
|
|
227
|
+
constructor(message, status, code, details, jobId) {
|
|
226
228
|
super(message);
|
|
227
229
|
this.name = "FirecrawlSdkError";
|
|
228
230
|
this.status = status;
|
|
229
231
|
this.code = code;
|
|
230
232
|
this.details = details;
|
|
233
|
+
this.jobId = jobId;
|
|
234
|
+
}
|
|
235
|
+
};
|
|
236
|
+
var JobTimeoutError = class extends SdkError {
|
|
237
|
+
timeoutSeconds;
|
|
238
|
+
constructor(jobId, timeoutSeconds, jobType = "batch") {
|
|
239
|
+
const jobTypeLabel = jobType === "batch" ? "batch scrape" : "crawl";
|
|
240
|
+
super(
|
|
241
|
+
`${jobTypeLabel.charAt(0).toUpperCase() + jobTypeLabel.slice(1)} job ${jobId} did not complete within ${timeoutSeconds} seconds`,
|
|
242
|
+
void 0,
|
|
243
|
+
"JOB_TIMEOUT",
|
|
244
|
+
void 0,
|
|
245
|
+
jobId
|
|
246
|
+
);
|
|
247
|
+
this.name = "JobTimeoutError";
|
|
248
|
+
this.timeoutSeconds = timeoutSeconds;
|
|
231
249
|
}
|
|
232
250
|
};
|
|
233
251
|
|
|
@@ -303,6 +321,27 @@ function normalizeAxiosError(err, action) {
|
|
|
303
321
|
const code = body?.code || err.code;
|
|
304
322
|
throw new SdkError(message, status, code, body?.details ?? body);
|
|
305
323
|
}
|
|
324
|
+
function isRetryableError(err) {
|
|
325
|
+
if (err instanceof JobTimeoutError) {
|
|
326
|
+
return false;
|
|
327
|
+
}
|
|
328
|
+
if (err instanceof SdkError || err && typeof err === "object" && "status" in err) {
|
|
329
|
+
const status = err.status;
|
|
330
|
+
if (status && status >= 400 && status < 500) {
|
|
331
|
+
return false;
|
|
332
|
+
}
|
|
333
|
+
if (status && status >= 500) {
|
|
334
|
+
return true;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
if (err?.isAxiosError && !err.response) {
|
|
338
|
+
return true;
|
|
339
|
+
}
|
|
340
|
+
if (err?.code === "ECONNABORTED" || err?.message?.includes("timeout")) {
|
|
341
|
+
return true;
|
|
342
|
+
}
|
|
343
|
+
return true;
|
|
344
|
+
}
|
|
306
345
|
|
|
307
346
|
// src/v2/methods/scrape.ts
|
|
308
347
|
async function scrape(http, url, options) {
|
|
@@ -535,10 +574,28 @@ async function cancelCrawl(http, jobId) {
|
|
|
535
574
|
async function waitForCrawlCompletion(http, jobId, pollInterval = 2, timeout) {
|
|
536
575
|
const start = Date.now();
|
|
537
576
|
while (true) {
|
|
538
|
-
|
|
539
|
-
|
|
577
|
+
try {
|
|
578
|
+
const status = await getCrawlStatus(http, jobId);
|
|
579
|
+
if (["completed", "failed", "cancelled"].includes(status.status)) {
|
|
580
|
+
return status;
|
|
581
|
+
}
|
|
582
|
+
} catch (err) {
|
|
583
|
+
if (!isRetryableError(err)) {
|
|
584
|
+
if (err instanceof SdkError) {
|
|
585
|
+
const errorWithJobId = new SdkError(
|
|
586
|
+
err.message,
|
|
587
|
+
err.status,
|
|
588
|
+
err.code,
|
|
589
|
+
err.details,
|
|
590
|
+
jobId
|
|
591
|
+
);
|
|
592
|
+
throw errorWithJobId;
|
|
593
|
+
}
|
|
594
|
+
throw err;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
540
597
|
if (timeout != null && Date.now() - start > timeout * 1e3) {
|
|
541
|
-
throw new
|
|
598
|
+
throw new JobTimeoutError(jobId, timeout, "crawl");
|
|
542
599
|
}
|
|
543
600
|
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
|
|
544
601
|
}
|
|
@@ -677,10 +734,28 @@ async function getBatchScrapeErrors(http, jobId) {
|
|
|
677
734
|
async function waitForBatchCompletion(http, jobId, pollInterval = 2, timeout) {
|
|
678
735
|
const start = Date.now();
|
|
679
736
|
while (true) {
|
|
680
|
-
|
|
681
|
-
|
|
737
|
+
try {
|
|
738
|
+
const status = await getBatchScrapeStatus(http, jobId);
|
|
739
|
+
if (["completed", "failed", "cancelled"].includes(status.status)) {
|
|
740
|
+
return status;
|
|
741
|
+
}
|
|
742
|
+
} catch (err) {
|
|
743
|
+
if (!isRetryableError(err)) {
|
|
744
|
+
if (err instanceof SdkError) {
|
|
745
|
+
const errorWithJobId = new SdkError(
|
|
746
|
+
err.message,
|
|
747
|
+
err.status,
|
|
748
|
+
err.code,
|
|
749
|
+
err.details,
|
|
750
|
+
jobId
|
|
751
|
+
);
|
|
752
|
+
throw errorWithJobId;
|
|
753
|
+
}
|
|
754
|
+
throw err;
|
|
755
|
+
}
|
|
756
|
+
}
|
|
682
757
|
if (timeout != null && Date.now() - start > timeout * 1e3) {
|
|
683
|
-
throw new
|
|
758
|
+
throw new JobTimeoutError(jobId, timeout, "batch");
|
|
684
759
|
}
|
|
685
760
|
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
|
|
686
761
|
}
|
|
@@ -764,6 +839,7 @@ function prepareAgentPayload(args) {
|
|
|
764
839
|
}
|
|
765
840
|
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
|
|
766
841
|
if (args.maxCredits !== null && args.maxCredits !== void 0) body.maxCredits = args.maxCredits;
|
|
842
|
+
if (args.strictConstrainToURLs !== null && args.strictConstrainToURLs !== void 0) body.strictConstrainToURLs = args.strictConstrainToURLs;
|
|
767
843
|
return body;
|
|
768
844
|
}
|
|
769
845
|
async function startAgent(http, args) {
|
|
@@ -2809,5 +2885,6 @@ var index_default = Firecrawl;
|
|
|
2809
2885
|
Firecrawl,
|
|
2810
2886
|
FirecrawlAppV1,
|
|
2811
2887
|
FirecrawlClient,
|
|
2888
|
+
JobTimeoutError,
|
|
2812
2889
|
SdkError
|
|
2813
2890
|
});
|
package/dist/index.d.cts
CHANGED
|
@@ -531,7 +531,12 @@ declare class SdkError extends Error {
|
|
|
531
531
|
status?: number;
|
|
532
532
|
code?: string;
|
|
533
533
|
details?: unknown;
|
|
534
|
-
|
|
534
|
+
jobId?: string;
|
|
535
|
+
constructor(message: string, status?: number, code?: string, details?: unknown, jobId?: string);
|
|
536
|
+
}
|
|
537
|
+
declare class JobTimeoutError extends SdkError {
|
|
538
|
+
timeoutSeconds: number;
|
|
539
|
+
constructor(jobId: string, timeoutSeconds: number, jobType?: 'batch' | 'crawl');
|
|
535
540
|
}
|
|
536
541
|
interface QueueStatusResponse$1 {
|
|
537
542
|
success: boolean;
|
|
@@ -587,6 +592,7 @@ declare function prepareAgentPayload(args: {
|
|
|
587
592
|
schema?: Record<string, unknown> | ZodTypeAny;
|
|
588
593
|
integration?: string;
|
|
589
594
|
maxCredits?: number;
|
|
595
|
+
strictConstrainToURLs?: boolean;
|
|
590
596
|
}): Record<string, unknown>;
|
|
591
597
|
declare function startAgent(http: HttpClient, args: Parameters<typeof prepareAgentPayload>[0]): Promise<AgentResponse>;
|
|
592
598
|
|
|
@@ -1720,4 +1726,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1720
1726
|
get v1(): FirecrawlApp;
|
|
1721
1727
|
}
|
|
1722
1728
|
|
|
1723
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
1729
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -531,7 +531,12 @@ declare class SdkError extends Error {
|
|
|
531
531
|
status?: number;
|
|
532
532
|
code?: string;
|
|
533
533
|
details?: unknown;
|
|
534
|
-
|
|
534
|
+
jobId?: string;
|
|
535
|
+
constructor(message: string, status?: number, code?: string, details?: unknown, jobId?: string);
|
|
536
|
+
}
|
|
537
|
+
declare class JobTimeoutError extends SdkError {
|
|
538
|
+
timeoutSeconds: number;
|
|
539
|
+
constructor(jobId: string, timeoutSeconds: number, jobType?: 'batch' | 'crawl');
|
|
535
540
|
}
|
|
536
541
|
interface QueueStatusResponse$1 {
|
|
537
542
|
success: boolean;
|
|
@@ -587,6 +592,7 @@ declare function prepareAgentPayload(args: {
|
|
|
587
592
|
schema?: Record<string, unknown> | ZodTypeAny;
|
|
588
593
|
integration?: string;
|
|
589
594
|
maxCredits?: number;
|
|
595
|
+
strictConstrainToURLs?: boolean;
|
|
590
596
|
}): Record<string, unknown>;
|
|
591
597
|
declare function startAgent(http: HttpClient, args: Parameters<typeof prepareAgentPayload>[0]): Promise<AgentResponse>;
|
|
592
598
|
|
|
@@ -1720,4 +1726,4 @@ declare class Firecrawl extends FirecrawlClient {
|
|
|
1720
1726
|
get v1(): FirecrawlApp;
|
|
1721
1727
|
}
|
|
1722
1728
|
|
|
1723
|
-
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
|
1729
|
+
export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AgentOptions$1 as AgentOptions, type AgentResponse, type AgentStatusResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type BrandingProfile, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type CreditUsageHistoricalPeriod, type CreditUsageHistoricalResponse, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, JobTimeoutError, type JsonFormat, type LocationConfig$1 as LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type QueueStatusResponse$1 as QueueStatusResponse, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type TokenUsageHistoricalPeriod, type TokenUsageHistoricalResponse, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-GY35KXDS.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -104,12 +104,29 @@ var SdkError = class extends Error {
|
|
|
104
104
|
status;
|
|
105
105
|
code;
|
|
106
106
|
details;
|
|
107
|
-
|
|
107
|
+
jobId;
|
|
108
|
+
constructor(message, status, code, details, jobId) {
|
|
108
109
|
super(message);
|
|
109
110
|
this.name = "FirecrawlSdkError";
|
|
110
111
|
this.status = status;
|
|
111
112
|
this.code = code;
|
|
112
113
|
this.details = details;
|
|
114
|
+
this.jobId = jobId;
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
var JobTimeoutError = class extends SdkError {
|
|
118
|
+
timeoutSeconds;
|
|
119
|
+
constructor(jobId, timeoutSeconds, jobType = "batch") {
|
|
120
|
+
const jobTypeLabel = jobType === "batch" ? "batch scrape" : "crawl";
|
|
121
|
+
super(
|
|
122
|
+
`${jobTypeLabel.charAt(0).toUpperCase() + jobTypeLabel.slice(1)} job ${jobId} did not complete within ${timeoutSeconds} seconds`,
|
|
123
|
+
void 0,
|
|
124
|
+
"JOB_TIMEOUT",
|
|
125
|
+
void 0,
|
|
126
|
+
jobId
|
|
127
|
+
);
|
|
128
|
+
this.name = "JobTimeoutError";
|
|
129
|
+
this.timeoutSeconds = timeoutSeconds;
|
|
113
130
|
}
|
|
114
131
|
};
|
|
115
132
|
|
|
@@ -185,6 +202,27 @@ function normalizeAxiosError(err, action) {
|
|
|
185
202
|
const code = body?.code || err.code;
|
|
186
203
|
throw new SdkError(message, status, code, body?.details ?? body);
|
|
187
204
|
}
|
|
205
|
+
function isRetryableError(err) {
|
|
206
|
+
if (err instanceof JobTimeoutError) {
|
|
207
|
+
return false;
|
|
208
|
+
}
|
|
209
|
+
if (err instanceof SdkError || err && typeof err === "object" && "status" in err) {
|
|
210
|
+
const status = err.status;
|
|
211
|
+
if (status && status >= 400 && status < 500) {
|
|
212
|
+
return false;
|
|
213
|
+
}
|
|
214
|
+
if (status && status >= 500) {
|
|
215
|
+
return true;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
if (err?.isAxiosError && !err.response) {
|
|
219
|
+
return true;
|
|
220
|
+
}
|
|
221
|
+
if (err?.code === "ECONNABORTED" || err?.message?.includes("timeout")) {
|
|
222
|
+
return true;
|
|
223
|
+
}
|
|
224
|
+
return true;
|
|
225
|
+
}
|
|
188
226
|
|
|
189
227
|
// src/v2/methods/scrape.ts
|
|
190
228
|
async function scrape(http, url, options) {
|
|
@@ -417,10 +455,28 @@ async function cancelCrawl(http, jobId) {
|
|
|
417
455
|
async function waitForCrawlCompletion(http, jobId, pollInterval = 2, timeout) {
|
|
418
456
|
const start = Date.now();
|
|
419
457
|
while (true) {
|
|
420
|
-
|
|
421
|
-
|
|
458
|
+
try {
|
|
459
|
+
const status = await getCrawlStatus(http, jobId);
|
|
460
|
+
if (["completed", "failed", "cancelled"].includes(status.status)) {
|
|
461
|
+
return status;
|
|
462
|
+
}
|
|
463
|
+
} catch (err) {
|
|
464
|
+
if (!isRetryableError(err)) {
|
|
465
|
+
if (err instanceof SdkError) {
|
|
466
|
+
const errorWithJobId = new SdkError(
|
|
467
|
+
err.message,
|
|
468
|
+
err.status,
|
|
469
|
+
err.code,
|
|
470
|
+
err.details,
|
|
471
|
+
jobId
|
|
472
|
+
);
|
|
473
|
+
throw errorWithJobId;
|
|
474
|
+
}
|
|
475
|
+
throw err;
|
|
476
|
+
}
|
|
477
|
+
}
|
|
422
478
|
if (timeout != null && Date.now() - start > timeout * 1e3) {
|
|
423
|
-
throw new
|
|
479
|
+
throw new JobTimeoutError(jobId, timeout, "crawl");
|
|
424
480
|
}
|
|
425
481
|
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
|
|
426
482
|
}
|
|
@@ -559,10 +615,28 @@ async function getBatchScrapeErrors(http, jobId) {
|
|
|
559
615
|
async function waitForBatchCompletion(http, jobId, pollInterval = 2, timeout) {
|
|
560
616
|
const start = Date.now();
|
|
561
617
|
while (true) {
|
|
562
|
-
|
|
563
|
-
|
|
618
|
+
try {
|
|
619
|
+
const status = await getBatchScrapeStatus(http, jobId);
|
|
620
|
+
if (["completed", "failed", "cancelled"].includes(status.status)) {
|
|
621
|
+
return status;
|
|
622
|
+
}
|
|
623
|
+
} catch (err) {
|
|
624
|
+
if (!isRetryableError(err)) {
|
|
625
|
+
if (err instanceof SdkError) {
|
|
626
|
+
const errorWithJobId = new SdkError(
|
|
627
|
+
err.message,
|
|
628
|
+
err.status,
|
|
629
|
+
err.code,
|
|
630
|
+
err.details,
|
|
631
|
+
jobId
|
|
632
|
+
);
|
|
633
|
+
throw errorWithJobId;
|
|
634
|
+
}
|
|
635
|
+
throw err;
|
|
636
|
+
}
|
|
637
|
+
}
|
|
564
638
|
if (timeout != null && Date.now() - start > timeout * 1e3) {
|
|
565
|
-
throw new
|
|
639
|
+
throw new JobTimeoutError(jobId, timeout, "batch");
|
|
566
640
|
}
|
|
567
641
|
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
|
|
568
642
|
}
|
|
@@ -646,6 +720,7 @@ function prepareAgentPayload(args) {
|
|
|
646
720
|
}
|
|
647
721
|
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
|
|
648
722
|
if (args.maxCredits !== null && args.maxCredits !== void 0) body.maxCredits = args.maxCredits;
|
|
723
|
+
if (args.strictConstrainToURLs !== null && args.strictConstrainToURLs !== void 0) body.strictConstrainToURLs = args.strictConstrainToURLs;
|
|
649
724
|
return body;
|
|
650
725
|
}
|
|
651
726
|
async function startAgent(http, args) {
|
|
@@ -1258,7 +1333,7 @@ var FirecrawlApp = class {
|
|
|
1258
1333
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
1259
1334
|
return process.env.npm_package_version;
|
|
1260
1335
|
}
|
|
1261
|
-
const packageJson = await import("./package-
|
|
1336
|
+
const packageJson = await import("./package-THA2MQX4.js");
|
|
1262
1337
|
return packageJson.default.version;
|
|
1263
1338
|
} catch (error) {
|
|
1264
1339
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
|
@@ -2690,6 +2765,7 @@ export {
|
|
|
2690
2765
|
Firecrawl,
|
|
2691
2766
|
FirecrawlApp as FirecrawlAppV1,
|
|
2692
2767
|
FirecrawlClient,
|
|
2768
|
+
JobTimeoutError,
|
|
2693
2769
|
SdkError,
|
|
2694
2770
|
index_default as default
|
|
2695
2771
|
};
|
package/package.json
CHANGED
package/src/v2/methods/agent.ts
CHANGED
|
@@ -10,6 +10,7 @@ function prepareAgentPayload(args: {
|
|
|
10
10
|
schema?: Record<string, unknown> | ZodTypeAny;
|
|
11
11
|
integration?: string;
|
|
12
12
|
maxCredits?: number;
|
|
13
|
+
strictConstrainToURLs?: boolean;
|
|
13
14
|
}): Record<string, unknown> {
|
|
14
15
|
const body: Record<string, unknown> = {};
|
|
15
16
|
if (args.urls) body.urls = args.urls;
|
|
@@ -21,6 +22,7 @@ function prepareAgentPayload(args: {
|
|
|
21
22
|
}
|
|
22
23
|
if (args.integration && args.integration.trim()) body.integration = args.integration.trim();
|
|
23
24
|
if (args.maxCredits !== null && args.maxCredits !== undefined) body.maxCredits = args.maxCredits;
|
|
25
|
+
if (args.strictConstrainToURLs !== null && args.strictConstrainToURLs !== undefined) body.strictConstrainToURLs = args.strictConstrainToURLs;
|
|
24
26
|
return body;
|
|
25
27
|
}
|
|
26
28
|
|
package/src/v2/methods/batch.ts
CHANGED
|
@@ -5,11 +5,13 @@ import {
|
|
|
5
5
|
type Document,
|
|
6
6
|
type BatchScrapeOptions,
|
|
7
7
|
type PaginationConfig,
|
|
8
|
+
JobTimeoutError,
|
|
9
|
+
SdkError,
|
|
8
10
|
} from "../types";
|
|
9
11
|
import { HttpClient } from "../utils/httpClient";
|
|
10
12
|
import { ensureValidScrapeOptions } from "../utils/validation";
|
|
11
13
|
import { fetchAllPages } from "../utils/pagination";
|
|
12
|
-
import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler";
|
|
14
|
+
import { normalizeAxiosError, throwForBadResponse, isRetryableError } from "../utils/errorHandler";
|
|
13
15
|
|
|
14
16
|
export async function startBatchScrape(
|
|
15
17
|
http: HttpClient,
|
|
@@ -115,12 +117,37 @@ export async function getBatchScrapeErrors(http: HttpClient, jobId: string): Pro
|
|
|
115
117
|
|
|
116
118
|
export async function waitForBatchCompletion(http: HttpClient, jobId: string, pollInterval = 2, timeout?: number): Promise<BatchScrapeJob> {
|
|
117
119
|
const start = Date.now();
|
|
120
|
+
|
|
118
121
|
while (true) {
|
|
119
|
-
|
|
120
|
-
|
|
122
|
+
try {
|
|
123
|
+
const status = await getBatchScrapeStatus(http, jobId);
|
|
124
|
+
|
|
125
|
+
if (["completed", "failed", "cancelled"].includes(status.status)) {
|
|
126
|
+
return status;
|
|
127
|
+
}
|
|
128
|
+
} catch (err: any) {
|
|
129
|
+
// Don't retry on permanent errors (4xx) - re-throw immediately with jobId context
|
|
130
|
+
if (!isRetryableError(err)) {
|
|
131
|
+
// Create new error with jobId for better debugging (non-retryable errors like 404)
|
|
132
|
+
if (err instanceof SdkError) {
|
|
133
|
+
const errorWithJobId = new SdkError(
|
|
134
|
+
err.message,
|
|
135
|
+
err.status,
|
|
136
|
+
err.code,
|
|
137
|
+
err.details,
|
|
138
|
+
jobId
|
|
139
|
+
);
|
|
140
|
+
throw errorWithJobId;
|
|
141
|
+
}
|
|
142
|
+
throw err;
|
|
143
|
+
}
|
|
144
|
+
// Otherwise, retry after delay - error might be transient (network issue, timeout, 5xx, etc.)
|
|
145
|
+
}
|
|
146
|
+
|
|
121
147
|
if (timeout != null && Date.now() - start > timeout * 1000) {
|
|
122
|
-
throw new
|
|
148
|
+
throw new JobTimeoutError(jobId, timeout, 'batch');
|
|
123
149
|
}
|
|
150
|
+
|
|
124
151
|
await new Promise((r) => setTimeout(r, Math.max(1000, pollInterval * 1000)));
|
|
125
152
|
}
|
|
126
153
|
}
|
package/src/v2/methods/crawl.ts
CHANGED
|
@@ -6,10 +6,12 @@ import {
|
|
|
6
6
|
type Document,
|
|
7
7
|
type CrawlOptions,
|
|
8
8
|
type PaginationConfig,
|
|
9
|
+
JobTimeoutError,
|
|
10
|
+
SdkError,
|
|
9
11
|
} from "../types";
|
|
10
12
|
import { HttpClient } from "../utils/httpClient";
|
|
11
13
|
import { ensureValidScrapeOptions } from "../utils/validation";
|
|
12
|
-
import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler";
|
|
14
|
+
import { normalizeAxiosError, throwForBadResponse, isRetryableError } from "../utils/errorHandler";
|
|
13
15
|
import type { HttpClient as _Http } from "../utils/httpClient";
|
|
14
16
|
import { fetchAllPages } from "../utils/pagination";
|
|
15
17
|
|
|
@@ -114,12 +116,37 @@ export async function cancelCrawl(http: HttpClient, jobId: string): Promise<bool
|
|
|
114
116
|
|
|
115
117
|
export async function waitForCrawlCompletion(http: HttpClient, jobId: string, pollInterval = 2, timeout?: number): Promise<CrawlJob> {
|
|
116
118
|
const start = Date.now();
|
|
119
|
+
|
|
117
120
|
while (true) {
|
|
118
|
-
|
|
119
|
-
|
|
121
|
+
try {
|
|
122
|
+
const status = await getCrawlStatus(http, jobId);
|
|
123
|
+
|
|
124
|
+
if (["completed", "failed", "cancelled"].includes(status.status)) {
|
|
125
|
+
return status;
|
|
126
|
+
}
|
|
127
|
+
} catch (err: any) {
|
|
128
|
+
// Don't retry on permanent errors (4xx) - re-throw immediately with jobId context
|
|
129
|
+
if (!isRetryableError(err)) {
|
|
130
|
+
// Create new error with jobId for better debugging (non-retryable errors like 404)
|
|
131
|
+
if (err instanceof SdkError) {
|
|
132
|
+
const errorWithJobId = new SdkError(
|
|
133
|
+
err.message,
|
|
134
|
+
err.status,
|
|
135
|
+
err.code,
|
|
136
|
+
err.details,
|
|
137
|
+
jobId
|
|
138
|
+
);
|
|
139
|
+
throw errorWithJobId;
|
|
140
|
+
}
|
|
141
|
+
throw err;
|
|
142
|
+
}
|
|
143
|
+
// Otherwise, retry after delay - error might be transient (network issue, timeout, 5xx, etc.)
|
|
144
|
+
}
|
|
145
|
+
|
|
120
146
|
if (timeout != null && Date.now() - start > timeout * 1000) {
|
|
121
|
-
throw new
|
|
147
|
+
throw new JobTimeoutError(jobId, timeout, 'crawl');
|
|
122
148
|
}
|
|
149
|
+
|
|
123
150
|
await new Promise((r) => setTimeout(r, Math.max(1000, pollInterval * 1000)));
|
|
124
151
|
}
|
|
125
152
|
}
|
package/src/v2/types.ts
CHANGED
|
@@ -631,17 +631,36 @@ export class SdkError extends Error {
|
|
|
631
631
|
status?: number;
|
|
632
632
|
code?: string;
|
|
633
633
|
details?: unknown;
|
|
634
|
+
jobId?: string;
|
|
634
635
|
constructor(
|
|
635
636
|
message: string,
|
|
636
637
|
status?: number,
|
|
637
638
|
code?: string,
|
|
638
|
-
details?: unknown
|
|
639
|
+
details?: unknown,
|
|
640
|
+
jobId?: string
|
|
639
641
|
) {
|
|
640
642
|
super(message);
|
|
641
643
|
this.name = 'FirecrawlSdkError';
|
|
642
644
|
this.status = status;
|
|
643
645
|
this.code = code;
|
|
644
646
|
this.details = details;
|
|
647
|
+
this.jobId = jobId;
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
export class JobTimeoutError extends SdkError {
|
|
652
|
+
timeoutSeconds: number;
|
|
653
|
+
constructor(jobId: string, timeoutSeconds: number, jobType: 'batch' | 'crawl' = 'batch') {
|
|
654
|
+
const jobTypeLabel = jobType === 'batch' ? 'batch scrape' : 'crawl';
|
|
655
|
+
super(
|
|
656
|
+
`${jobTypeLabel.charAt(0).toUpperCase() + jobTypeLabel.slice(1)} job ${jobId} did not complete within ${timeoutSeconds} seconds`,
|
|
657
|
+
undefined,
|
|
658
|
+
'JOB_TIMEOUT',
|
|
659
|
+
undefined,
|
|
660
|
+
jobId
|
|
661
|
+
);
|
|
662
|
+
this.name = 'JobTimeoutError';
|
|
663
|
+
this.timeoutSeconds = timeoutSeconds;
|
|
645
664
|
}
|
|
646
665
|
}
|
|
647
666
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { type AxiosError, type AxiosResponse } from "axios";
|
|
2
|
-
import { SdkError } from "../types";
|
|
2
|
+
import { SdkError, JobTimeoutError } from "../types";
|
|
3
3
|
|
|
4
4
|
export function throwForBadResponse(resp: AxiosResponse, action: string): never {
|
|
5
5
|
const status = resp.status;
|
|
@@ -16,3 +16,36 @@ export function normalizeAxiosError(err: AxiosError, action: string): never {
|
|
|
16
16
|
throw new SdkError(message, status, code, body?.details ?? body);
|
|
17
17
|
}
|
|
18
18
|
|
|
19
|
+
export function isRetryableError(err: any): boolean {
|
|
20
|
+
// JobTimeoutError should never be retried - it's the overall timeout
|
|
21
|
+
if (err instanceof JobTimeoutError) {
|
|
22
|
+
return false;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// If it's an SdkError with a status code, check if it's retryable
|
|
26
|
+
if (err instanceof SdkError || (err && typeof err === 'object' && 'status' in err)) {
|
|
27
|
+
const status = err.status;
|
|
28
|
+
// 4xx errors are client errors and shouldn't be retried
|
|
29
|
+
if (status && status >= 400 && status < 500) {
|
|
30
|
+
return false; // Don't retry client errors (401, 404, etc.)
|
|
31
|
+
}
|
|
32
|
+
// 5xx errors are server errors and can be retried
|
|
33
|
+
if (status && status >= 500) {
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Network errors (no response) are retryable
|
|
39
|
+
if (err?.isAxiosError && !err.response) {
|
|
40
|
+
return true;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// HTTP timeout errors are retryable (different from JobTimeoutError)
|
|
44
|
+
if (err?.code === 'ECONNABORTED' || err?.message?.includes('timeout')) {
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Default: retry on unknown errors (safer than not retrying)
|
|
49
|
+
return true;
|
|
50
|
+
}
|
|
51
|
+
|