firecrawl 4.5.1 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-TNGVHCPH.js → chunk-47H6QFPY.js} +1 -1
- package/dist/index.cjs +7 -1
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +8 -2
- package/dist/{package-SV2UYR6U.js → package-OVF37QHH.js} +1 -1
- package/package.json +1 -1
- package/src/__tests__/e2e/v2/batch.test.ts +20 -0
- package/src/__tests__/e2e/v2/crawl.test.ts +17 -0
- package/src/v2/methods/batch.ts +3 -1
- package/src/v2/methods/crawl.ts +3 -1
- package/src/v2/types.ts +2 -0
- package/src/v2/watcher.ts +2 -0
|
@@ -8,7 +8,7 @@ var require_package = __commonJS({
|
|
|
8
8
|
"package.json"(exports, module) {
|
|
9
9
|
module.exports = {
|
|
10
10
|
name: "@mendable/firecrawl-js",
|
|
11
|
-
version: "4.
|
|
11
|
+
version: "4.6.1",
|
|
12
12
|
description: "JavaScript SDK for Firecrawl API",
|
|
13
13
|
main: "dist/index.js",
|
|
14
14
|
types: "dist/index.d.ts",
|
package/dist/index.cjs
CHANGED
|
@@ -35,7 +35,7 @@ var require_package = __commonJS({
|
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
37
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "4.
|
|
38
|
+
version: "4.6.1",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -495,6 +495,7 @@ async function getCrawlStatus(http, jobId, pagination) {
|
|
|
495
495
|
const auto = pagination?.autoPaginate ?? true;
|
|
496
496
|
if (!auto || !body.next) {
|
|
497
497
|
return {
|
|
498
|
+
id: jobId,
|
|
498
499
|
status: body.status,
|
|
499
500
|
completed: body.completed ?? 0,
|
|
500
501
|
total: body.total ?? 0,
|
|
@@ -506,6 +507,7 @@ async function getCrawlStatus(http, jobId, pagination) {
|
|
|
506
507
|
}
|
|
507
508
|
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
|
|
508
509
|
return {
|
|
510
|
+
id: jobId,
|
|
509
511
|
status: body.status,
|
|
510
512
|
completed: body.completed ?? 0,
|
|
511
513
|
total: body.total ?? 0,
|
|
@@ -624,6 +626,7 @@ async function getBatchScrapeStatus(http, jobId, pagination) {
|
|
|
624
626
|
const auto = pagination?.autoPaginate ?? true;
|
|
625
627
|
if (!auto || !body.next) {
|
|
626
628
|
return {
|
|
629
|
+
id: jobId,
|
|
627
630
|
status: body.status,
|
|
628
631
|
completed: body.completed ?? 0,
|
|
629
632
|
total: body.total ?? 0,
|
|
@@ -635,6 +638,7 @@ async function getBatchScrapeStatus(http, jobId, pagination) {
|
|
|
635
638
|
}
|
|
636
639
|
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
|
|
637
640
|
return {
|
|
641
|
+
id: jobId,
|
|
638
642
|
status: body.status,
|
|
639
643
|
completed: body.completed ?? 0,
|
|
640
644
|
total: body.total ?? 0,
|
|
@@ -914,6 +918,7 @@ var Watcher = class extends import_events.EventEmitter {
|
|
|
914
918
|
const status = payload.status;
|
|
915
919
|
const data = payload.data || [];
|
|
916
920
|
const snap = this.kind === "crawl" ? {
|
|
921
|
+
id: this.jobId,
|
|
917
922
|
status,
|
|
918
923
|
completed: payload.completed ?? 0,
|
|
919
924
|
total: payload.total ?? 0,
|
|
@@ -922,6 +927,7 @@ var Watcher = class extends import_events.EventEmitter {
|
|
|
922
927
|
next: payload.next ?? null,
|
|
923
928
|
data
|
|
924
929
|
} : {
|
|
930
|
+
id: this.jobId,
|
|
925
931
|
status,
|
|
926
932
|
completed: payload.completed ?? 0,
|
|
927
933
|
total: payload.total ?? 0,
|
package/dist/index.d.cts
CHANGED
|
@@ -387,6 +387,7 @@ interface CrawlResponse$1 {
|
|
|
387
387
|
url: string;
|
|
388
388
|
}
|
|
389
389
|
interface CrawlJob {
|
|
390
|
+
id: string;
|
|
390
391
|
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
|
|
391
392
|
total: number;
|
|
392
393
|
completed: number;
|
|
@@ -411,6 +412,7 @@ interface BatchScrapeResponse$1 {
|
|
|
411
412
|
invalidURLs?: string[];
|
|
412
413
|
}
|
|
413
414
|
interface BatchScrapeJob {
|
|
415
|
+
id: string;
|
|
414
416
|
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
|
|
415
417
|
completed: number;
|
|
416
418
|
total: number;
|
package/dist/index.d.ts
CHANGED
|
@@ -387,6 +387,7 @@ interface CrawlResponse$1 {
|
|
|
387
387
|
url: string;
|
|
388
388
|
}
|
|
389
389
|
interface CrawlJob {
|
|
390
|
+
id: string;
|
|
390
391
|
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
|
|
391
392
|
total: number;
|
|
392
393
|
completed: number;
|
|
@@ -411,6 +412,7 @@ interface BatchScrapeResponse$1 {
|
|
|
411
412
|
invalidURLs?: string[];
|
|
412
413
|
}
|
|
413
414
|
interface BatchScrapeJob {
|
|
415
|
+
id: string;
|
|
414
416
|
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
|
|
415
417
|
completed: number;
|
|
416
418
|
total: number;
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-47H6QFPY.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -377,6 +377,7 @@ async function getCrawlStatus(http, jobId, pagination) {
|
|
|
377
377
|
const auto = pagination?.autoPaginate ?? true;
|
|
378
378
|
if (!auto || !body.next) {
|
|
379
379
|
return {
|
|
380
|
+
id: jobId,
|
|
380
381
|
status: body.status,
|
|
381
382
|
completed: body.completed ?? 0,
|
|
382
383
|
total: body.total ?? 0,
|
|
@@ -388,6 +389,7 @@ async function getCrawlStatus(http, jobId, pagination) {
|
|
|
388
389
|
}
|
|
389
390
|
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
|
|
390
391
|
return {
|
|
392
|
+
id: jobId,
|
|
391
393
|
status: body.status,
|
|
392
394
|
completed: body.completed ?? 0,
|
|
393
395
|
total: body.total ?? 0,
|
|
@@ -506,6 +508,7 @@ async function getBatchScrapeStatus(http, jobId, pagination) {
|
|
|
506
508
|
const auto = pagination?.autoPaginate ?? true;
|
|
507
509
|
if (!auto || !body.next) {
|
|
508
510
|
return {
|
|
511
|
+
id: jobId,
|
|
509
512
|
status: body.status,
|
|
510
513
|
completed: body.completed ?? 0,
|
|
511
514
|
total: body.total ?? 0,
|
|
@@ -517,6 +520,7 @@ async function getBatchScrapeStatus(http, jobId, pagination) {
|
|
|
517
520
|
}
|
|
518
521
|
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
|
|
519
522
|
return {
|
|
523
|
+
id: jobId,
|
|
520
524
|
status: body.status,
|
|
521
525
|
completed: body.completed ?? 0,
|
|
522
526
|
total: body.total ?? 0,
|
|
@@ -796,6 +800,7 @@ var Watcher = class extends EventEmitter {
|
|
|
796
800
|
const status = payload.status;
|
|
797
801
|
const data = payload.data || [];
|
|
798
802
|
const snap = this.kind === "crawl" ? {
|
|
803
|
+
id: this.jobId,
|
|
799
804
|
status,
|
|
800
805
|
completed: payload.completed ?? 0,
|
|
801
806
|
total: payload.total ?? 0,
|
|
@@ -804,6 +809,7 @@ var Watcher = class extends EventEmitter {
|
|
|
804
809
|
next: payload.next ?? null,
|
|
805
810
|
data
|
|
806
811
|
} : {
|
|
812
|
+
id: this.jobId,
|
|
807
813
|
status,
|
|
808
814
|
completed: payload.completed ?? 0,
|
|
809
815
|
total: payload.total ?? 0,
|
|
@@ -1080,7 +1086,7 @@ var FirecrawlApp = class {
|
|
|
1080
1086
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
1081
1087
|
return process.env.npm_package_version;
|
|
1082
1088
|
}
|
|
1083
|
-
const packageJson = await import("./package-
|
|
1089
|
+
const packageJson = await import("./package-OVF37QHH.js");
|
|
1084
1090
|
return packageJson.default.version;
|
|
1085
1091
|
} catch (error) {
|
|
1086
1092
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
package/package.json
CHANGED
|
@@ -29,6 +29,23 @@ describe("v2.batch e2e", () => {
|
|
|
29
29
|
expect(Array.isArray(job.data)).toBe(true);
|
|
30
30
|
}, 240_000);
|
|
31
31
|
|
|
32
|
+
test("batch scrape with wait returns job id for error retrieval", async () => {
|
|
33
|
+
const urls = [
|
|
34
|
+
"https://docs.firecrawl.dev",
|
|
35
|
+
"https://firecrawl.dev",
|
|
36
|
+
];
|
|
37
|
+
const job = await client.batchScrape(urls, { options: { formats: ["markdown"] }, pollInterval: 1, timeout: 180 });
|
|
38
|
+
// Verify job has id field
|
|
39
|
+
expect(job.id).toBeDefined();
|
|
40
|
+
expect(typeof job.id).toBe("string");
|
|
41
|
+
// Verify we can use the id to retrieve errors
|
|
42
|
+
const errors = await client.getBatchScrapeErrors(job.id!);
|
|
43
|
+
expect(errors).toHaveProperty("errors");
|
|
44
|
+
expect(errors).toHaveProperty("robotsBlocked");
|
|
45
|
+
expect(Array.isArray(errors.errors)).toBe(true);
|
|
46
|
+
expect(Array.isArray(errors.robotsBlocked)).toBe(true);
|
|
47
|
+
}, 240_000);
|
|
48
|
+
|
|
32
49
|
test("start batch minimal and status", async () => {
|
|
33
50
|
const urls = ["https://docs.firecrawl.dev", "https://firecrawl.dev"];
|
|
34
51
|
const start = await client.startBatchScrape(urls, { options: { formats: ["markdown"] }, ignoreInvalidURLs: true });
|
|
@@ -37,6 +54,9 @@ describe("v2.batch e2e", () => {
|
|
|
37
54
|
const status = await client.getBatchScrapeStatus(start.id);
|
|
38
55
|
expect(["scraping", "completed", "failed", "cancelled"]).toContain(status.status);
|
|
39
56
|
expect(status.total).toBeGreaterThanOrEqual(0);
|
|
57
|
+
// Verify status includes id field
|
|
58
|
+
expect(status.id).toBeDefined();
|
|
59
|
+
expect(status.id).toBe(start.id);
|
|
40
60
|
}, 120_000);
|
|
41
61
|
|
|
42
62
|
test("wait batch with all params", async () => {
|
|
@@ -45,6 +45,9 @@ describe("v2.crawl e2e", () => {
|
|
|
45
45
|
const status = await client.getCrawlStatus(start.id);
|
|
46
46
|
expect(["scraping", "completed", "failed", "cancelled"]).toContain(status.status);
|
|
47
47
|
expect(status.completed).toBeGreaterThanOrEqual(0);
|
|
48
|
+
// Verify status includes id field
|
|
49
|
+
expect(status.id).toBeDefined();
|
|
50
|
+
expect(status.id).toBe(start.id);
|
|
48
51
|
// next/expiresAt may be null/undefined depending on state; check shape
|
|
49
52
|
expect(Array.isArray(status.data)).toBe(true);
|
|
50
53
|
}, 120_000);
|
|
@@ -112,6 +115,20 @@ describe("v2.crawl e2e", () => {
|
|
|
112
115
|
expect(Array.isArray(job.data)).toBe(true);
|
|
113
116
|
}, 180_000);
|
|
114
117
|
|
|
118
|
+
test("crawl with wait returns job id for error retrieval", async () => {
|
|
119
|
+
if (!client) throw new Error();
|
|
120
|
+
const job = await client.crawl("https://docs.firecrawl.dev", { limit: 3, maxDiscoveryDepth: 2, pollInterval: 1, timeout: 120 });
|
|
121
|
+
// Verify job has id field
|
|
122
|
+
expect(job.id).toBeDefined();
|
|
123
|
+
expect(typeof job.id).toBe("string");
|
|
124
|
+
// Verify we can use the id to retrieve errors
|
|
125
|
+
const errors = await client.getCrawlErrors(job.id!);
|
|
126
|
+
expect(errors).toHaveProperty("errors");
|
|
127
|
+
expect(errors).toHaveProperty("robotsBlocked");
|
|
128
|
+
expect(Array.isArray(errors.errors)).toBe(true);
|
|
129
|
+
expect(Array.isArray(errors.robotsBlocked)).toBe(true);
|
|
130
|
+
}, 180_000);
|
|
131
|
+
|
|
115
132
|
test("crawl with prompt and wait", async () => {
|
|
116
133
|
if (!client) throw new Error();
|
|
117
134
|
const job = await client.crawl("https://docs.firecrawl.dev", { prompt: "Extract all blog posts", limit: 3, pollInterval: 1, timeout: 120 });
|
package/src/v2/methods/batch.ts
CHANGED
|
@@ -62,6 +62,7 @@ export async function getBatchScrapeStatus(
|
|
|
62
62
|
const auto = pagination?.autoPaginate ?? true;
|
|
63
63
|
if (!auto || !body.next) {
|
|
64
64
|
return {
|
|
65
|
+
id: jobId,
|
|
65
66
|
status: body.status,
|
|
66
67
|
completed: body.completed ?? 0,
|
|
67
68
|
total: body.total ?? 0,
|
|
@@ -74,6 +75,7 @@ export async function getBatchScrapeStatus(
|
|
|
74
75
|
|
|
75
76
|
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
|
|
76
77
|
return {
|
|
78
|
+
id: jobId,
|
|
77
79
|
status: body.status,
|
|
78
80
|
completed: body.completed ?? 0,
|
|
79
81
|
total: body.total ?? 0,
|
|
@@ -136,4 +138,4 @@ export function chunkUrls(urls: string[], chunkSize = 100): string[][] {
|
|
|
136
138
|
const chunks: string[][] = [];
|
|
137
139
|
for (let i = 0; i < urls.length; i += chunkSize) chunks.push(urls.slice(i, i + chunkSize));
|
|
138
140
|
return chunks;
|
|
139
|
-
}
|
|
141
|
+
}
|
package/src/v2/methods/crawl.ts
CHANGED
|
@@ -72,6 +72,7 @@ export async function getCrawlStatus(
|
|
|
72
72
|
const auto = pagination?.autoPaginate ?? true;
|
|
73
73
|
if (!auto || !body.next) {
|
|
74
74
|
return {
|
|
75
|
+
id: jobId,
|
|
75
76
|
status: body.status,
|
|
76
77
|
completed: body.completed ?? 0,
|
|
77
78
|
total: body.total ?? 0,
|
|
@@ -85,6 +86,7 @@ export async function getCrawlStatus(
|
|
|
85
86
|
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
|
|
86
87
|
|
|
87
88
|
return {
|
|
89
|
+
id: jobId,
|
|
88
90
|
status: body.status,
|
|
89
91
|
completed: body.completed ?? 0,
|
|
90
92
|
total: body.total ?? 0,
|
|
@@ -165,4 +167,4 @@ export async function crawlParamsPreview(http: HttpClient, url: string, prompt:
|
|
|
165
167
|
if (err?.isAxiosError) return normalizeAxiosError(err, "crawl params preview");
|
|
166
168
|
throw err;
|
|
167
169
|
}
|
|
168
|
-
}
|
|
170
|
+
}
|
package/src/v2/types.ts
CHANGED
|
@@ -466,6 +466,7 @@ export interface CrawlResponse {
|
|
|
466
466
|
}
|
|
467
467
|
|
|
468
468
|
export interface CrawlJob {
|
|
469
|
+
id: string;
|
|
469
470
|
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
|
|
470
471
|
total: number;
|
|
471
472
|
completed: number;
|
|
@@ -493,6 +494,7 @@ export interface BatchScrapeResponse {
|
|
|
493
494
|
}
|
|
494
495
|
|
|
495
496
|
export interface BatchScrapeJob {
|
|
497
|
+
id: string;
|
|
496
498
|
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
|
|
497
499
|
completed: number;
|
|
498
500
|
total: number;
|
package/src/v2/watcher.ts
CHANGED
|
@@ -114,6 +114,7 @@ export class Watcher extends EventEmitter {
|
|
|
114
114
|
const data = (payload.data || []) as Document[];
|
|
115
115
|
const snap: Snapshot = this.kind === "crawl"
|
|
116
116
|
? {
|
|
117
|
+
id: this.jobId,
|
|
117
118
|
status,
|
|
118
119
|
completed: payload.completed ?? 0,
|
|
119
120
|
total: payload.total ?? 0,
|
|
@@ -123,6 +124,7 @@ export class Watcher extends EventEmitter {
|
|
|
123
124
|
data,
|
|
124
125
|
}
|
|
125
126
|
: {
|
|
127
|
+
id: this.jobId,
|
|
126
128
|
status,
|
|
127
129
|
completed: payload.completed ?? 0,
|
|
128
130
|
total: payload.total ?? 0,
|