@mendable/firecrawl 4.5.0 → 4.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-MVAMVUST.js → chunk-47H6QFPY.js} +3 -1
- package/dist/index.cjs +22 -5
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +21 -6
- package/dist/{package-O4UQBIGU.js → package-OVF37QHH.js} +1 -1
- package/package.json +3 -1
- package/src/__tests__/e2e/v2/batch.test.ts +20 -0
- package/src/__tests__/e2e/v2/crawl.test.ts +17 -0
- package/src/v2/methods/batch.ts +3 -1
- package/src/v2/methods/crawl.ts +3 -1
- package/src/v2/types.ts +2 -0
- package/src/v2/watcher.ts +17 -6
- package/tsup.config.ts +1 -0
|
@@ -8,7 +8,7 @@ var require_package = __commonJS({
|
|
|
8
8
|
"package.json"(exports, module) {
|
|
9
9
|
module.exports = {
|
|
10
10
|
name: "@mendable/firecrawl-js",
|
|
11
|
-
version: "4.
|
|
11
|
+
version: "4.6.1",
|
|
12
12
|
description: "JavaScript SDK for Firecrawl API",
|
|
13
13
|
main: "dist/index.js",
|
|
14
14
|
types: "dist/index.d.ts",
|
|
@@ -36,6 +36,7 @@ var require_package = __commonJS({
|
|
|
36
36
|
dependencies: {
|
|
37
37
|
axios: "^1.12.2",
|
|
38
38
|
"typescript-event-target": "^1.1.1",
|
|
39
|
+
ws: "^8.18.3",
|
|
39
40
|
zod: "^3.23.8",
|
|
40
41
|
"zod-to-json-schema": "^3.23.0"
|
|
41
42
|
},
|
|
@@ -50,6 +51,7 @@ var require_package = __commonJS({
|
|
|
50
51
|
"@types/mocha": "^10.0.6",
|
|
51
52
|
"@types/node": "^20.12.12",
|
|
52
53
|
"@types/uuid": "^9.0.8",
|
|
54
|
+
"@types/ws": "^8.18.1",
|
|
53
55
|
dotenv: "^16.4.5",
|
|
54
56
|
jest: "^30.0.5",
|
|
55
57
|
"ts-jest": "^29.4.0",
|
package/dist/index.cjs
CHANGED
|
@@ -35,7 +35,7 @@ var require_package = __commonJS({
|
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
37
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "4.
|
|
38
|
+
version: "4.6.1",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -63,6 +63,7 @@ var require_package = __commonJS({
|
|
|
63
63
|
dependencies: {
|
|
64
64
|
axios: "^1.12.2",
|
|
65
65
|
"typescript-event-target": "^1.1.1",
|
|
66
|
+
ws: "^8.18.3",
|
|
66
67
|
zod: "^3.23.8",
|
|
67
68
|
"zod-to-json-schema": "^3.23.0"
|
|
68
69
|
},
|
|
@@ -77,6 +78,7 @@ var require_package = __commonJS({
|
|
|
77
78
|
"@types/mocha": "^10.0.6",
|
|
78
79
|
"@types/node": "^20.12.12",
|
|
79
80
|
"@types/uuid": "^9.0.8",
|
|
81
|
+
"@types/ws": "^8.18.1",
|
|
80
82
|
dotenv: "^16.4.5",
|
|
81
83
|
jest: "^30.0.5",
|
|
82
84
|
"ts-jest": "^29.4.0",
|
|
@@ -493,6 +495,7 @@ async function getCrawlStatus(http, jobId, pagination) {
|
|
|
493
495
|
const auto = pagination?.autoPaginate ?? true;
|
|
494
496
|
if (!auto || !body.next) {
|
|
495
497
|
return {
|
|
498
|
+
id: jobId,
|
|
496
499
|
status: body.status,
|
|
497
500
|
completed: body.completed ?? 0,
|
|
498
501
|
total: body.total ?? 0,
|
|
@@ -504,6 +507,7 @@ async function getCrawlStatus(http, jobId, pagination) {
|
|
|
504
507
|
}
|
|
505
508
|
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
|
|
506
509
|
return {
|
|
510
|
+
id: jobId,
|
|
507
511
|
status: body.status,
|
|
508
512
|
completed: body.completed ?? 0,
|
|
509
513
|
total: body.total ?? 0,
|
|
@@ -622,6 +626,7 @@ async function getBatchScrapeStatus(http, jobId, pagination) {
|
|
|
622
626
|
const auto = pagination?.autoPaginate ?? true;
|
|
623
627
|
if (!auto || !body.next) {
|
|
624
628
|
return {
|
|
629
|
+
id: jobId,
|
|
625
630
|
status: body.status,
|
|
626
631
|
completed: body.completed ?? 0,
|
|
627
632
|
total: body.total ?? 0,
|
|
@@ -633,6 +638,7 @@ async function getBatchScrapeStatus(http, jobId, pagination) {
|
|
|
633
638
|
}
|
|
634
639
|
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
|
|
635
640
|
return {
|
|
641
|
+
id: jobId,
|
|
636
642
|
status: body.status,
|
|
637
643
|
completed: body.completed ?? 0,
|
|
638
644
|
total: body.total ?? 0,
|
|
@@ -823,6 +829,7 @@ async function getTokenUsageHistorical(http, byApiKey) {
|
|
|
823
829
|
|
|
824
830
|
// src/v2/watcher.ts
|
|
825
831
|
var import_events = require("events");
|
|
832
|
+
var import_ws = require("ws");
|
|
826
833
|
var Watcher = class extends import_events.EventEmitter {
|
|
827
834
|
http;
|
|
828
835
|
jobId;
|
|
@@ -848,9 +855,15 @@ var Watcher = class extends import_events.EventEmitter {
|
|
|
848
855
|
async start() {
|
|
849
856
|
try {
|
|
850
857
|
const url = this.buildWsUrl();
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
858
|
+
if (typeof WebSocket !== "undefined") {
|
|
859
|
+
this.ws = new WebSocket(url, this.http.getApiKey());
|
|
860
|
+
} else {
|
|
861
|
+
this.ws = new import_ws.WebSocket(url, this.http.getApiKey());
|
|
862
|
+
}
|
|
863
|
+
if (this.ws) {
|
|
864
|
+
this.attachWsHandlers(this.ws);
|
|
865
|
+
}
|
|
866
|
+
} catch (err) {
|
|
854
867
|
this.pollLoop();
|
|
855
868
|
}
|
|
856
869
|
}
|
|
@@ -878,7 +891,9 @@ var Watcher = class extends import_events.EventEmitter {
|
|
|
878
891
|
return;
|
|
879
892
|
}
|
|
880
893
|
if (type === "done") {
|
|
881
|
-
|
|
894
|
+
const payload2 = body.data || body;
|
|
895
|
+
const data = payload2.data || [];
|
|
896
|
+
this.emit("done", { status: "completed", data, id: this.jobId });
|
|
882
897
|
this.close();
|
|
883
898
|
return;
|
|
884
899
|
}
|
|
@@ -903,6 +918,7 @@ var Watcher = class extends import_events.EventEmitter {
|
|
|
903
918
|
const status = payload.status;
|
|
904
919
|
const data = payload.data || [];
|
|
905
920
|
const snap = this.kind === "crawl" ? {
|
|
921
|
+
id: this.jobId,
|
|
906
922
|
status,
|
|
907
923
|
completed: payload.completed ?? 0,
|
|
908
924
|
total: payload.total ?? 0,
|
|
@@ -911,6 +927,7 @@ var Watcher = class extends import_events.EventEmitter {
|
|
|
911
927
|
next: payload.next ?? null,
|
|
912
928
|
data
|
|
913
929
|
} : {
|
|
930
|
+
id: this.jobId,
|
|
914
931
|
status,
|
|
915
932
|
completed: payload.completed ?? 0,
|
|
916
933
|
total: payload.total ?? 0,
|
package/dist/index.d.cts
CHANGED
|
@@ -387,6 +387,7 @@ interface CrawlResponse$1 {
|
|
|
387
387
|
url: string;
|
|
388
388
|
}
|
|
389
389
|
interface CrawlJob {
|
|
390
|
+
id: string;
|
|
390
391
|
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
|
|
391
392
|
total: number;
|
|
392
393
|
completed: number;
|
|
@@ -411,6 +412,7 @@ interface BatchScrapeResponse$1 {
|
|
|
411
412
|
invalidURLs?: string[];
|
|
412
413
|
}
|
|
413
414
|
interface BatchScrapeJob {
|
|
415
|
+
id: string;
|
|
414
416
|
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
|
|
415
417
|
completed: number;
|
|
416
418
|
total: number;
|
package/dist/index.d.ts
CHANGED
|
@@ -387,6 +387,7 @@ interface CrawlResponse$1 {
|
|
|
387
387
|
url: string;
|
|
388
388
|
}
|
|
389
389
|
interface CrawlJob {
|
|
390
|
+
id: string;
|
|
390
391
|
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
|
|
391
392
|
total: number;
|
|
392
393
|
completed: number;
|
|
@@ -411,6 +412,7 @@ interface BatchScrapeResponse$1 {
|
|
|
411
412
|
invalidURLs?: string[];
|
|
412
413
|
}
|
|
413
414
|
interface BatchScrapeJob {
|
|
415
|
+
id: string;
|
|
414
416
|
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
|
|
415
417
|
completed: number;
|
|
416
418
|
total: number;
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
require_package
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-47H6QFPY.js";
|
|
4
4
|
|
|
5
5
|
// src/v2/utils/httpClient.ts
|
|
6
6
|
import axios from "axios";
|
|
@@ -377,6 +377,7 @@ async function getCrawlStatus(http, jobId, pagination) {
|
|
|
377
377
|
const auto = pagination?.autoPaginate ?? true;
|
|
378
378
|
if (!auto || !body.next) {
|
|
379
379
|
return {
|
|
380
|
+
id: jobId,
|
|
380
381
|
status: body.status,
|
|
381
382
|
completed: body.completed ?? 0,
|
|
382
383
|
total: body.total ?? 0,
|
|
@@ -388,6 +389,7 @@ async function getCrawlStatus(http, jobId, pagination) {
|
|
|
388
389
|
}
|
|
389
390
|
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
|
|
390
391
|
return {
|
|
392
|
+
id: jobId,
|
|
391
393
|
status: body.status,
|
|
392
394
|
completed: body.completed ?? 0,
|
|
393
395
|
total: body.total ?? 0,
|
|
@@ -506,6 +508,7 @@ async function getBatchScrapeStatus(http, jobId, pagination) {
|
|
|
506
508
|
const auto = pagination?.autoPaginate ?? true;
|
|
507
509
|
if (!auto || !body.next) {
|
|
508
510
|
return {
|
|
511
|
+
id: jobId,
|
|
509
512
|
status: body.status,
|
|
510
513
|
completed: body.completed ?? 0,
|
|
511
514
|
total: body.total ?? 0,
|
|
@@ -517,6 +520,7 @@ async function getBatchScrapeStatus(http, jobId, pagination) {
|
|
|
517
520
|
}
|
|
518
521
|
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
|
|
519
522
|
return {
|
|
523
|
+
id: jobId,
|
|
520
524
|
status: body.status,
|
|
521
525
|
completed: body.completed ?? 0,
|
|
522
526
|
total: body.total ?? 0,
|
|
@@ -707,6 +711,7 @@ async function getTokenUsageHistorical(http, byApiKey) {
|
|
|
707
711
|
|
|
708
712
|
// src/v2/watcher.ts
|
|
709
713
|
import { EventEmitter } from "events";
|
|
714
|
+
import { WebSocket as WS } from "ws";
|
|
710
715
|
var Watcher = class extends EventEmitter {
|
|
711
716
|
http;
|
|
712
717
|
jobId;
|
|
@@ -732,9 +737,15 @@ var Watcher = class extends EventEmitter {
|
|
|
732
737
|
async start() {
|
|
733
738
|
try {
|
|
734
739
|
const url = this.buildWsUrl();
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
740
|
+
if (typeof WebSocket !== "undefined") {
|
|
741
|
+
this.ws = new WebSocket(url, this.http.getApiKey());
|
|
742
|
+
} else {
|
|
743
|
+
this.ws = new WS(url, this.http.getApiKey());
|
|
744
|
+
}
|
|
745
|
+
if (this.ws) {
|
|
746
|
+
this.attachWsHandlers(this.ws);
|
|
747
|
+
}
|
|
748
|
+
} catch (err) {
|
|
738
749
|
this.pollLoop();
|
|
739
750
|
}
|
|
740
751
|
}
|
|
@@ -762,7 +773,9 @@ var Watcher = class extends EventEmitter {
|
|
|
762
773
|
return;
|
|
763
774
|
}
|
|
764
775
|
if (type === "done") {
|
|
765
|
-
|
|
776
|
+
const payload2 = body.data || body;
|
|
777
|
+
const data = payload2.data || [];
|
|
778
|
+
this.emit("done", { status: "completed", data, id: this.jobId });
|
|
766
779
|
this.close();
|
|
767
780
|
return;
|
|
768
781
|
}
|
|
@@ -787,6 +800,7 @@ var Watcher = class extends EventEmitter {
|
|
|
787
800
|
const status = payload.status;
|
|
788
801
|
const data = payload.data || [];
|
|
789
802
|
const snap = this.kind === "crawl" ? {
|
|
803
|
+
id: this.jobId,
|
|
790
804
|
status,
|
|
791
805
|
completed: payload.completed ?? 0,
|
|
792
806
|
total: payload.total ?? 0,
|
|
@@ -795,6 +809,7 @@ var Watcher = class extends EventEmitter {
|
|
|
795
809
|
next: payload.next ?? null,
|
|
796
810
|
data
|
|
797
811
|
} : {
|
|
812
|
+
id: this.jobId,
|
|
798
813
|
status,
|
|
799
814
|
completed: payload.completed ?? 0,
|
|
800
815
|
total: payload.total ?? 0,
|
|
@@ -1071,7 +1086,7 @@ var FirecrawlApp = class {
|
|
|
1071
1086
|
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
1072
1087
|
return process.env.npm_package_version;
|
|
1073
1088
|
}
|
|
1074
|
-
const packageJson = await import("./package-
|
|
1089
|
+
const packageJson = await import("./package-OVF37QHH.js");
|
|
1075
1090
|
return packageJson.default.version;
|
|
1076
1091
|
} catch (error) {
|
|
1077
1092
|
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mendable/firecrawl",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.6.1",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -28,6 +28,7 @@
|
|
|
28
28
|
"dependencies": {
|
|
29
29
|
"axios": "^1.12.2",
|
|
30
30
|
"typescript-event-target": "^1.1.1",
|
|
31
|
+
"ws": "^8.18.3",
|
|
31
32
|
"zod": "^3.23.8",
|
|
32
33
|
"zod-to-json-schema": "^3.23.0"
|
|
33
34
|
},
|
|
@@ -42,6 +43,7 @@
|
|
|
42
43
|
"@types/mocha": "^10.0.6",
|
|
43
44
|
"@types/node": "^20.12.12",
|
|
44
45
|
"@types/uuid": "^9.0.8",
|
|
46
|
+
"@types/ws": "^8.18.1",
|
|
45
47
|
"dotenv": "^16.4.5",
|
|
46
48
|
"jest": "^30.0.5",
|
|
47
49
|
"ts-jest": "^29.4.0",
|
|
@@ -29,6 +29,23 @@ describe("v2.batch e2e", () => {
|
|
|
29
29
|
expect(Array.isArray(job.data)).toBe(true);
|
|
30
30
|
}, 240_000);
|
|
31
31
|
|
|
32
|
+
test("batch scrape with wait returns job id for error retrieval", async () => {
|
|
33
|
+
const urls = [
|
|
34
|
+
"https://docs.firecrawl.dev",
|
|
35
|
+
"https://firecrawl.dev",
|
|
36
|
+
];
|
|
37
|
+
const job = await client.batchScrape(urls, { options: { formats: ["markdown"] }, pollInterval: 1, timeout: 180 });
|
|
38
|
+
// Verify job has id field
|
|
39
|
+
expect(job.id).toBeDefined();
|
|
40
|
+
expect(typeof job.id).toBe("string");
|
|
41
|
+
// Verify we can use the id to retrieve errors
|
|
42
|
+
const errors = await client.getBatchScrapeErrors(job.id!);
|
|
43
|
+
expect(errors).toHaveProperty("errors");
|
|
44
|
+
expect(errors).toHaveProperty("robotsBlocked");
|
|
45
|
+
expect(Array.isArray(errors.errors)).toBe(true);
|
|
46
|
+
expect(Array.isArray(errors.robotsBlocked)).toBe(true);
|
|
47
|
+
}, 240_000);
|
|
48
|
+
|
|
32
49
|
test("start batch minimal and status", async () => {
|
|
33
50
|
const urls = ["https://docs.firecrawl.dev", "https://firecrawl.dev"];
|
|
34
51
|
const start = await client.startBatchScrape(urls, { options: { formats: ["markdown"] }, ignoreInvalidURLs: true });
|
|
@@ -37,6 +54,9 @@ describe("v2.batch e2e", () => {
|
|
|
37
54
|
const status = await client.getBatchScrapeStatus(start.id);
|
|
38
55
|
expect(["scraping", "completed", "failed", "cancelled"]).toContain(status.status);
|
|
39
56
|
expect(status.total).toBeGreaterThanOrEqual(0);
|
|
57
|
+
// Verify status includes id field
|
|
58
|
+
expect(status.id).toBeDefined();
|
|
59
|
+
expect(status.id).toBe(start.id);
|
|
40
60
|
}, 120_000);
|
|
41
61
|
|
|
42
62
|
test("wait batch with all params", async () => {
|
|
@@ -45,6 +45,9 @@ describe("v2.crawl e2e", () => {
|
|
|
45
45
|
const status = await client.getCrawlStatus(start.id);
|
|
46
46
|
expect(["scraping", "completed", "failed", "cancelled"]).toContain(status.status);
|
|
47
47
|
expect(status.completed).toBeGreaterThanOrEqual(0);
|
|
48
|
+
// Verify status includes id field
|
|
49
|
+
expect(status.id).toBeDefined();
|
|
50
|
+
expect(status.id).toBe(start.id);
|
|
48
51
|
// next/expiresAt may be null/undefined depending on state; check shape
|
|
49
52
|
expect(Array.isArray(status.data)).toBe(true);
|
|
50
53
|
}, 120_000);
|
|
@@ -112,6 +115,20 @@ describe("v2.crawl e2e", () => {
|
|
|
112
115
|
expect(Array.isArray(job.data)).toBe(true);
|
|
113
116
|
}, 180_000);
|
|
114
117
|
|
|
118
|
+
test("crawl with wait returns job id for error retrieval", async () => {
|
|
119
|
+
if (!client) throw new Error();
|
|
120
|
+
const job = await client.crawl("https://docs.firecrawl.dev", { limit: 3, maxDiscoveryDepth: 2, pollInterval: 1, timeout: 120 });
|
|
121
|
+
// Verify job has id field
|
|
122
|
+
expect(job.id).toBeDefined();
|
|
123
|
+
expect(typeof job.id).toBe("string");
|
|
124
|
+
// Verify we can use the id to retrieve errors
|
|
125
|
+
const errors = await client.getCrawlErrors(job.id!);
|
|
126
|
+
expect(errors).toHaveProperty("errors");
|
|
127
|
+
expect(errors).toHaveProperty("robotsBlocked");
|
|
128
|
+
expect(Array.isArray(errors.errors)).toBe(true);
|
|
129
|
+
expect(Array.isArray(errors.robotsBlocked)).toBe(true);
|
|
130
|
+
}, 180_000);
|
|
131
|
+
|
|
115
132
|
test("crawl with prompt and wait", async () => {
|
|
116
133
|
if (!client) throw new Error();
|
|
117
134
|
const job = await client.crawl("https://docs.firecrawl.dev", { prompt: "Extract all blog posts", limit: 3, pollInterval: 1, timeout: 120 });
|
package/src/v2/methods/batch.ts
CHANGED
|
@@ -62,6 +62,7 @@ export async function getBatchScrapeStatus(
|
|
|
62
62
|
const auto = pagination?.autoPaginate ?? true;
|
|
63
63
|
if (!auto || !body.next) {
|
|
64
64
|
return {
|
|
65
|
+
id: jobId,
|
|
65
66
|
status: body.status,
|
|
66
67
|
completed: body.completed ?? 0,
|
|
67
68
|
total: body.total ?? 0,
|
|
@@ -74,6 +75,7 @@ export async function getBatchScrapeStatus(
|
|
|
74
75
|
|
|
75
76
|
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
|
|
76
77
|
return {
|
|
78
|
+
id: jobId,
|
|
77
79
|
status: body.status,
|
|
78
80
|
completed: body.completed ?? 0,
|
|
79
81
|
total: body.total ?? 0,
|
|
@@ -136,4 +138,4 @@ export function chunkUrls(urls: string[], chunkSize = 100): string[][] {
|
|
|
136
138
|
const chunks: string[][] = [];
|
|
137
139
|
for (let i = 0; i < urls.length; i += chunkSize) chunks.push(urls.slice(i, i + chunkSize));
|
|
138
140
|
return chunks;
|
|
139
|
-
}
|
|
141
|
+
}
|
package/src/v2/methods/crawl.ts
CHANGED
|
@@ -72,6 +72,7 @@ export async function getCrawlStatus(
|
|
|
72
72
|
const auto = pagination?.autoPaginate ?? true;
|
|
73
73
|
if (!auto || !body.next) {
|
|
74
74
|
return {
|
|
75
|
+
id: jobId,
|
|
75
76
|
status: body.status,
|
|
76
77
|
completed: body.completed ?? 0,
|
|
77
78
|
total: body.total ?? 0,
|
|
@@ -85,6 +86,7 @@ export async function getCrawlStatus(
|
|
|
85
86
|
const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
|
|
86
87
|
|
|
87
88
|
return {
|
|
89
|
+
id: jobId,
|
|
88
90
|
status: body.status,
|
|
89
91
|
completed: body.completed ?? 0,
|
|
90
92
|
total: body.total ?? 0,
|
|
@@ -165,4 +167,4 @@ export async function crawlParamsPreview(http: HttpClient, url: string, prompt:
|
|
|
165
167
|
if (err?.isAxiosError) return normalizeAxiosError(err, "crawl params preview");
|
|
166
168
|
throw err;
|
|
167
169
|
}
|
|
168
|
-
}
|
|
170
|
+
}
|
package/src/v2/types.ts
CHANGED
|
@@ -466,6 +466,7 @@ export interface CrawlResponse {
|
|
|
466
466
|
}
|
|
467
467
|
|
|
468
468
|
export interface CrawlJob {
|
|
469
|
+
id: string;
|
|
469
470
|
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
|
|
470
471
|
total: number;
|
|
471
472
|
completed: number;
|
|
@@ -493,6 +494,7 @@ export interface BatchScrapeResponse {
|
|
|
493
494
|
}
|
|
494
495
|
|
|
495
496
|
export interface BatchScrapeJob {
|
|
497
|
+
id: string;
|
|
496
498
|
status: 'scraping' | 'completed' | 'failed' | 'cancelled';
|
|
497
499
|
completed: number;
|
|
498
500
|
total: number;
|
package/src/v2/watcher.ts
CHANGED
|
@@ -3,6 +3,7 @@ import type { BatchScrapeJob, CrawlJob, Document } from "./types";
|
|
|
3
3
|
import type { HttpClient } from "./utils/httpClient";
|
|
4
4
|
import { getBatchScrapeStatus } from "./methods/batch";
|
|
5
5
|
import { getCrawlStatus } from "./methods/crawl";
|
|
6
|
+
import { WebSocket as WS } from "ws";
|
|
6
7
|
|
|
7
8
|
type JobKind = "crawl" | "batch";
|
|
8
9
|
|
|
@@ -43,11 +44,17 @@ export class Watcher extends EventEmitter {
|
|
|
43
44
|
async start(): Promise<void> {
|
|
44
45
|
try {
|
|
45
46
|
const url = this.buildWsUrl();
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
47
|
+
|
|
48
|
+
if (typeof WebSocket !== 'undefined') {
|
|
49
|
+
this.ws = new WebSocket(url, this.http.getApiKey()) as any;
|
|
50
|
+
} else {
|
|
51
|
+
this.ws = new WS(url, this.http.getApiKey()) as any;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (this.ws) {
|
|
55
|
+
this.attachWsHandlers(this.ws);
|
|
56
|
+
}
|
|
57
|
+
} catch (err) {
|
|
51
58
|
this.pollLoop();
|
|
52
59
|
}
|
|
53
60
|
}
|
|
@@ -76,7 +83,9 @@ export class Watcher extends EventEmitter {
|
|
|
76
83
|
return;
|
|
77
84
|
}
|
|
78
85
|
if (type === "done") {
|
|
79
|
-
|
|
86
|
+
const payload = body.data || body;
|
|
87
|
+
const data = (payload.data || []) as Document[];
|
|
88
|
+
this.emit("done", { status: "completed", data, id: this.jobId });
|
|
80
89
|
this.close();
|
|
81
90
|
return;
|
|
82
91
|
}
|
|
@@ -105,6 +114,7 @@ export class Watcher extends EventEmitter {
|
|
|
105
114
|
const data = (payload.data || []) as Document[];
|
|
106
115
|
const snap: Snapshot = this.kind === "crawl"
|
|
107
116
|
? {
|
|
117
|
+
id: this.jobId,
|
|
108
118
|
status,
|
|
109
119
|
completed: payload.completed ?? 0,
|
|
110
120
|
total: payload.total ?? 0,
|
|
@@ -114,6 +124,7 @@ export class Watcher extends EventEmitter {
|
|
|
114
124
|
data,
|
|
115
125
|
}
|
|
116
126
|
: {
|
|
127
|
+
id: this.jobId,
|
|
117
128
|
status,
|
|
118
129
|
completed: payload.completed ?? 0,
|
|
119
130
|
total: payload.total ?? 0,
|