@mendable/firecrawl 4.8.0 → 4.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ var require_package = __commonJS({
8
8
  "package.json"(exports, module) {
9
9
  module.exports = {
10
10
  name: "@mendable/firecrawl-js",
11
- version: "4.8.0",
11
+ version: "4.8.1",
12
12
  description: "JavaScript SDK for Firecrawl API",
13
13
  main: "dist/index.js",
14
14
  types: "dist/index.d.ts",
@@ -36,7 +36,6 @@ var require_package = __commonJS({
36
36
  dependencies: {
37
37
  axios: "^1.12.2",
38
38
  "typescript-event-target": "^1.1.1",
39
- ws: "^8.18.3",
40
39
  zod: "^3.23.8",
41
40
  "zod-to-json-schema": "^3.23.0"
42
41
  },
@@ -51,7 +50,6 @@ var require_package = __commonJS({
51
50
  "@types/mocha": "^10.0.6",
52
51
  "@types/node": "^20.12.12",
53
52
  "@types/uuid": "^9.0.8",
54
- "@types/ws": "^8.18.1",
55
53
  dotenv: "^16.4.5",
56
54
  jest: "^30.2.0",
57
55
  "ts-jest": "^29.4.5",
package/dist/index.cjs CHANGED
@@ -35,7 +35,7 @@ var require_package = __commonJS({
35
35
  "package.json"(exports2, module2) {
36
36
  module2.exports = {
37
37
  name: "@mendable/firecrawl-js",
38
- version: "4.8.0",
38
+ version: "4.8.1",
39
39
  description: "JavaScript SDK for Firecrawl API",
40
40
  main: "dist/index.js",
41
41
  types: "dist/index.d.ts",
@@ -63,7 +63,6 @@ var require_package = __commonJS({
63
63
  dependencies: {
64
64
  axios: "^1.12.2",
65
65
  "typescript-event-target": "^1.1.1",
66
- ws: "^8.18.3",
67
66
  zod: "^3.23.8",
68
67
  "zod-to-json-schema": "^3.23.0"
69
68
  },
@@ -78,7 +77,6 @@ var require_package = __commonJS({
78
77
  "@types/mocha": "^10.0.6",
79
78
  "@types/node": "^20.12.12",
80
79
  "@types/uuid": "^9.0.8",
81
- "@types/ws": "^8.18.1",
82
80
  dotenv: "^16.4.5",
83
81
  jest: "^30.2.0",
84
82
  "ts-jest": "^29.4.5",
@@ -832,7 +830,57 @@ async function getTokenUsageHistorical(http, byApiKey) {
832
830
 
833
831
  // src/v2/watcher.ts
834
832
  var import_events = require("events");
835
- var import_ws = require("ws");
833
+ var hasGlobalWebSocket = () => {
834
+ if (typeof globalThis === "undefined") return void 0;
835
+ const candidate = globalThis.WebSocket;
836
+ return typeof candidate === "function" ? candidate : void 0;
837
+ };
838
+ var isNodeRuntime = () => typeof process !== "undefined" && !!process.versions?.node;
839
+ var cachedWebSocket;
840
+ var loadPromise;
841
+ var loadNodeWebSocket = async () => {
842
+ if (!isNodeRuntime()) return void 0;
843
+ try {
844
+ const undici = await import("undici");
845
+ const ctor = undici.WebSocket ?? undici.default?.WebSocket;
846
+ return typeof ctor === "function" ? ctor : void 0;
847
+ } catch {
848
+ return void 0;
849
+ }
850
+ };
851
+ var getWebSocketCtor = async () => {
852
+ if (cachedWebSocket) return cachedWebSocket;
853
+ const globalWs = hasGlobalWebSocket();
854
+ if (globalWs) {
855
+ cachedWebSocket = globalWs;
856
+ return cachedWebSocket;
857
+ }
858
+ if (!loadPromise) {
859
+ loadPromise = loadNodeWebSocket();
860
+ }
861
+ cachedWebSocket = await loadPromise;
862
+ return cachedWebSocket;
863
+ };
864
+ var decoder = typeof TextDecoder !== "undefined" ? new TextDecoder() : void 0;
865
+ var ensureUtf8String = (data) => {
866
+ if (typeof data === "string") return data;
867
+ if (typeof Buffer !== "undefined" && Buffer.isBuffer(data)) {
868
+ return data.toString("utf8");
869
+ }
870
+ const convertView = (view) => {
871
+ if (typeof Buffer !== "undefined") {
872
+ return Buffer.from(view.buffer, view.byteOffset, view.byteLength).toString("utf8");
873
+ }
874
+ return decoder?.decode(view);
875
+ };
876
+ if (ArrayBuffer.isView(data)) {
877
+ return convertView(data);
878
+ }
879
+ if (data instanceof ArrayBuffer) {
880
+ return convertView(new Uint8Array(data));
881
+ }
882
+ return void 0;
883
+ };
836
884
  var Watcher = class extends import_events.EventEmitter {
837
885
  http;
838
886
  jobId;
@@ -841,6 +889,7 @@ var Watcher = class extends import_events.EventEmitter {
841
889
  timeout;
842
890
  ws;
843
891
  closed = false;
892
+ emittedDocumentKeys = /* @__PURE__ */ new Set();
844
893
  constructor(http, jobId, opts = {}) {
845
894
  super();
846
895
  this.http = http;
@@ -858,10 +907,14 @@ var Watcher = class extends import_events.EventEmitter {
858
907
  async start() {
859
908
  try {
860
909
  const url = this.buildWsUrl();
861
- if (typeof WebSocket !== "undefined") {
862
- this.ws = new WebSocket(url, this.http.getApiKey());
863
- } else {
864
- this.ws = new import_ws.WebSocket(url, this.http.getApiKey());
910
+ const wsCtor = await getWebSocketCtor();
911
+ if (!wsCtor) {
912
+ this.pollLoop();
913
+ return;
914
+ }
915
+ this.ws = new wsCtor(url, this.http.getApiKey());
916
+ if (this.ws && "binaryType" in this.ws) {
917
+ this.ws.binaryType = "arraybuffer";
865
918
  }
866
919
  if (this.ws) {
867
920
  this.attachWsHandlers(this.ws);
@@ -875,8 +928,9 @@ var Watcher = class extends import_events.EventEmitter {
875
928
  const timeoutMs = this.timeout ? this.timeout * 1e3 : void 0;
876
929
  ws.onmessage = (ev) => {
877
930
  try {
878
- const body = typeof ev.data === "string" ? JSON.parse(ev.data) : null;
879
- if (!body) return;
931
+ const raw = ensureUtf8String(ev.data);
932
+ if (!raw) return;
933
+ const body = JSON.parse(raw);
880
934
  const type = body.type;
881
935
  if (type === "error") {
882
936
  this.emit("error", { status: "failed", data: [], error: body.error, id: this.jobId });
@@ -896,6 +950,7 @@ var Watcher = class extends import_events.EventEmitter {
896
950
  if (type === "done") {
897
951
  const payload2 = body.data || body;
898
952
  const data = payload2.data || [];
953
+ if (data.length) this.emitDocuments(data);
899
954
  this.emit("done", { status: "completed", data, id: this.jobId });
900
955
  this.close();
901
956
  return;
@@ -914,8 +969,27 @@ var Watcher = class extends import_events.EventEmitter {
914
969
  if (!this.closed) this.pollLoop();
915
970
  };
916
971
  }
972
+ documentKey(doc) {
973
+ if (doc && typeof doc === "object") {
974
+ const explicitId = doc.id ?? doc.docId ?? doc.url;
975
+ if (typeof explicitId === "string" && explicitId.length) {
976
+ return explicitId;
977
+ }
978
+ }
979
+ try {
980
+ return JSON.stringify(doc);
981
+ } catch {
982
+ return `${Date.now()}-${Math.random()}`;
983
+ }
984
+ }
917
985
  emitDocuments(docs) {
918
- for (const doc of docs) this.emit("document", { ...doc, id: this.jobId });
986
+ for (const doc of docs) {
987
+ if (!doc) continue;
988
+ const key = this.documentKey(doc);
989
+ if (this.emittedDocumentKeys.has(key)) continue;
990
+ this.emittedDocumentKeys.add(key);
991
+ this.emit("document", { ...doc, id: this.jobId });
992
+ }
919
993
  }
920
994
  emitSnapshot(payload) {
921
995
  const status = payload.status;
@@ -951,6 +1025,7 @@ var Watcher = class extends import_events.EventEmitter {
951
1025
  while (!this.closed) {
952
1026
  try {
953
1027
  const snap = this.kind === "crawl" ? await getCrawlStatus(this.http, this.jobId) : await getBatchScrapeStatus(this.http, this.jobId);
1028
+ this.emitDocuments(snap.data || []);
954
1029
  this.emit("snapshot", snap);
955
1030
  if (["completed", "failed", "cancelled"].includes(snap.status)) {
956
1031
  this.emit("done", { status: snap.status, data: snap.data, id: this.jobId });
package/dist/index.d.cts CHANGED
@@ -578,10 +578,12 @@ declare class Watcher extends EventEmitter {
578
578
  private readonly timeout?;
579
579
  private ws?;
580
580
  private closed;
581
+ private readonly emittedDocumentKeys;
581
582
  constructor(http: HttpClient, jobId: string, opts?: WatcherOptions);
582
583
  private buildWsUrl;
583
584
  start(): Promise<void>;
584
585
  private attachWsHandlers;
586
+ private documentKey;
585
587
  private emitDocuments;
586
588
  private emitSnapshot;
587
589
  private pollLoop;
package/dist/index.d.ts CHANGED
@@ -578,10 +578,12 @@ declare class Watcher extends EventEmitter {
578
578
  private readonly timeout?;
579
579
  private ws?;
580
580
  private closed;
581
+ private readonly emittedDocumentKeys;
581
582
  constructor(http: HttpClient, jobId: string, opts?: WatcherOptions);
582
583
  private buildWsUrl;
583
584
  start(): Promise<void>;
584
585
  private attachWsHandlers;
586
+ private documentKey;
585
587
  private emitDocuments;
586
588
  private emitSnapshot;
587
589
  private pollLoop;
package/dist/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-SP54QQ5D.js";
3
+ } from "./chunk-SY42GUIL.js";
4
4
 
5
5
  // src/v2/utils/httpClient.ts
6
6
  import axios from "axios";
@@ -712,7 +712,57 @@ async function getTokenUsageHistorical(http, byApiKey) {
712
712
 
713
713
  // src/v2/watcher.ts
714
714
  import { EventEmitter } from "events";
715
- import { WebSocket as WS } from "ws";
715
+ var hasGlobalWebSocket = () => {
716
+ if (typeof globalThis === "undefined") return void 0;
717
+ const candidate = globalThis.WebSocket;
718
+ return typeof candidate === "function" ? candidate : void 0;
719
+ };
720
+ var isNodeRuntime = () => typeof process !== "undefined" && !!process.versions?.node;
721
+ var cachedWebSocket;
722
+ var loadPromise;
723
+ var loadNodeWebSocket = async () => {
724
+ if (!isNodeRuntime()) return void 0;
725
+ try {
726
+ const undici = await import("undici");
727
+ const ctor = undici.WebSocket ?? undici.default?.WebSocket;
728
+ return typeof ctor === "function" ? ctor : void 0;
729
+ } catch {
730
+ return void 0;
731
+ }
732
+ };
733
+ var getWebSocketCtor = async () => {
734
+ if (cachedWebSocket) return cachedWebSocket;
735
+ const globalWs = hasGlobalWebSocket();
736
+ if (globalWs) {
737
+ cachedWebSocket = globalWs;
738
+ return cachedWebSocket;
739
+ }
740
+ if (!loadPromise) {
741
+ loadPromise = loadNodeWebSocket();
742
+ }
743
+ cachedWebSocket = await loadPromise;
744
+ return cachedWebSocket;
745
+ };
746
+ var decoder = typeof TextDecoder !== "undefined" ? new TextDecoder() : void 0;
747
+ var ensureUtf8String = (data) => {
748
+ if (typeof data === "string") return data;
749
+ if (typeof Buffer !== "undefined" && Buffer.isBuffer(data)) {
750
+ return data.toString("utf8");
751
+ }
752
+ const convertView = (view) => {
753
+ if (typeof Buffer !== "undefined") {
754
+ return Buffer.from(view.buffer, view.byteOffset, view.byteLength).toString("utf8");
755
+ }
756
+ return decoder?.decode(view);
757
+ };
758
+ if (ArrayBuffer.isView(data)) {
759
+ return convertView(data);
760
+ }
761
+ if (data instanceof ArrayBuffer) {
762
+ return convertView(new Uint8Array(data));
763
+ }
764
+ return void 0;
765
+ };
716
766
  var Watcher = class extends EventEmitter {
717
767
  http;
718
768
  jobId;
@@ -721,6 +771,7 @@ var Watcher = class extends EventEmitter {
721
771
  timeout;
722
772
  ws;
723
773
  closed = false;
774
+ emittedDocumentKeys = /* @__PURE__ */ new Set();
724
775
  constructor(http, jobId, opts = {}) {
725
776
  super();
726
777
  this.http = http;
@@ -738,10 +789,14 @@ var Watcher = class extends EventEmitter {
738
789
  async start() {
739
790
  try {
740
791
  const url = this.buildWsUrl();
741
- if (typeof WebSocket !== "undefined") {
742
- this.ws = new WebSocket(url, this.http.getApiKey());
743
- } else {
744
- this.ws = new WS(url, this.http.getApiKey());
792
+ const wsCtor = await getWebSocketCtor();
793
+ if (!wsCtor) {
794
+ this.pollLoop();
795
+ return;
796
+ }
797
+ this.ws = new wsCtor(url, this.http.getApiKey());
798
+ if (this.ws && "binaryType" in this.ws) {
799
+ this.ws.binaryType = "arraybuffer";
745
800
  }
746
801
  if (this.ws) {
747
802
  this.attachWsHandlers(this.ws);
@@ -755,8 +810,9 @@ var Watcher = class extends EventEmitter {
755
810
  const timeoutMs = this.timeout ? this.timeout * 1e3 : void 0;
756
811
  ws.onmessage = (ev) => {
757
812
  try {
758
- const body = typeof ev.data === "string" ? JSON.parse(ev.data) : null;
759
- if (!body) return;
813
+ const raw = ensureUtf8String(ev.data);
814
+ if (!raw) return;
815
+ const body = JSON.parse(raw);
760
816
  const type = body.type;
761
817
  if (type === "error") {
762
818
  this.emit("error", { status: "failed", data: [], error: body.error, id: this.jobId });
@@ -776,6 +832,7 @@ var Watcher = class extends EventEmitter {
776
832
  if (type === "done") {
777
833
  const payload2 = body.data || body;
778
834
  const data = payload2.data || [];
835
+ if (data.length) this.emitDocuments(data);
779
836
  this.emit("done", { status: "completed", data, id: this.jobId });
780
837
  this.close();
781
838
  return;
@@ -794,8 +851,27 @@ var Watcher = class extends EventEmitter {
794
851
  if (!this.closed) this.pollLoop();
795
852
  };
796
853
  }
854
+ documentKey(doc) {
855
+ if (doc && typeof doc === "object") {
856
+ const explicitId = doc.id ?? doc.docId ?? doc.url;
857
+ if (typeof explicitId === "string" && explicitId.length) {
858
+ return explicitId;
859
+ }
860
+ }
861
+ try {
862
+ return JSON.stringify(doc);
863
+ } catch {
864
+ return `${Date.now()}-${Math.random()}`;
865
+ }
866
+ }
797
867
  emitDocuments(docs) {
798
- for (const doc of docs) this.emit("document", { ...doc, id: this.jobId });
868
+ for (const doc of docs) {
869
+ if (!doc) continue;
870
+ const key = this.documentKey(doc);
871
+ if (this.emittedDocumentKeys.has(key)) continue;
872
+ this.emittedDocumentKeys.add(key);
873
+ this.emit("document", { ...doc, id: this.jobId });
874
+ }
799
875
  }
800
876
  emitSnapshot(payload) {
801
877
  const status = payload.status;
@@ -831,6 +907,7 @@ var Watcher = class extends EventEmitter {
831
907
  while (!this.closed) {
832
908
  try {
833
909
  const snap = this.kind === "crawl" ? await getCrawlStatus(this.http, this.jobId) : await getBatchScrapeStatus(this.http, this.jobId);
910
+ this.emitDocuments(snap.data || []);
834
911
  this.emit("snapshot", snap);
835
912
  if (["completed", "failed", "cancelled"].includes(snap.status)) {
836
913
  this.emit("done", { status: snap.status, data: snap.data, id: this.jobId });
@@ -1087,7 +1164,7 @@ var FirecrawlApp = class {
1087
1164
  if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
1088
1165
  return process.env.npm_package_version;
1089
1166
  }
1090
- const packageJson = await import("./package-MR6WWGRM.js");
1167
+ const packageJson = await import("./package-G5T62GGJ.js");
1091
1168
  return packageJson.default.version;
1092
1169
  } catch (error) {
1093
1170
  const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
@@ -1,4 +1,4 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-SP54QQ5D.js";
3
+ } from "./chunk-SY42GUIL.js";
4
4
  export default require_package();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mendable/firecrawl",
3
- "version": "4.8.0",
3
+ "version": "4.8.1",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -28,7 +28,6 @@
28
28
  "dependencies": {
29
29
  "axios": "^1.12.2",
30
30
  "typescript-event-target": "^1.1.1",
31
- "ws": "^8.18.3",
32
31
  "zod": "^3.23.8",
33
32
  "zod-to-json-schema": "^3.23.0"
34
33
  },
@@ -43,7 +42,6 @@
43
42
  "@types/mocha": "^10.0.6",
44
43
  "@types/node": "^20.12.12",
45
44
  "@types/uuid": "^9.0.8",
46
- "@types/ws": "^8.18.1",
47
45
  "dotenv": "^16.4.5",
48
46
  "jest": "^30.2.0",
49
47
  "ts-jest": "^29.4.5",
@@ -0,0 +1,10 @@
1
+ type WebSocketCtor = typeof globalThis.WebSocket;
2
+
3
+ declare module "node:undici" {
4
+ export const WebSocket: WebSocketCtor;
5
+ const _default: {
6
+ WebSocket: WebSocketCtor;
7
+ };
8
+ export default _default;
9
+ }
10
+
package/src/v2/watcher.ts CHANGED
@@ -3,7 +3,73 @@ import type { BatchScrapeJob, CrawlJob, Document } from "./types";
3
3
  import type { HttpClient } from "./utils/httpClient";
4
4
  import { getBatchScrapeStatus } from "./methods/batch";
5
5
  import { getCrawlStatus } from "./methods/crawl";
6
- import { WebSocket as WS } from "ws";
6
+ // Note: browsers/Deno expose globalThis.WebSocket, but many Node runtimes (<22.4 or without
7
+ // experimental flags) do not. We lazily fall back to node:undici.
8
+
9
+ type WebSocketConstructor = new (url: string, protocols?: string | string[]) => WebSocket;
10
+
11
+ const hasGlobalWebSocket = (): WebSocketConstructor | undefined => {
12
+ if (typeof globalThis === "undefined") return undefined;
13
+ const candidate = (globalThis as any).WebSocket;
14
+ return typeof candidate === "function" ? (candidate as WebSocketConstructor) : undefined;
15
+ };
16
+
17
+ const isNodeRuntime = () => typeof process !== "undefined" && !!process.versions?.node;
18
+
19
+ let cachedWebSocket: WebSocketConstructor | undefined;
20
+ let loadPromise: Promise<WebSocketConstructor | undefined> | undefined;
21
+
22
+ const loadNodeWebSocket = async (): Promise<WebSocketConstructor | undefined> => {
23
+ if (!isNodeRuntime()) return undefined;
24
+ try {
25
+ const undici = await import("node:undici");
26
+ const ctor = (undici as any).WebSocket ?? (undici as any).default?.WebSocket;
27
+ return typeof ctor === "function" ? (ctor as WebSocketConstructor) : undefined;
28
+ } catch {
29
+ return undefined;
30
+ }
31
+ };
32
+
33
+ const getWebSocketCtor = async (): Promise<WebSocketConstructor | undefined> => {
34
+ if (cachedWebSocket) return cachedWebSocket;
35
+ const globalWs = hasGlobalWebSocket();
36
+ if (globalWs) {
37
+ cachedWebSocket = globalWs;
38
+ return cachedWebSocket;
39
+ }
40
+ if (!loadPromise) {
41
+ loadPromise = loadNodeWebSocket();
42
+ }
43
+ cachedWebSocket = await loadPromise;
44
+ return cachedWebSocket;
45
+ };
46
+
47
+ const decoder = typeof TextDecoder !== "undefined" ? new TextDecoder() : undefined;
48
+
49
+ const ensureUtf8String = (data: unknown): string | undefined => {
50
+ if (typeof data === "string") return data;
51
+
52
+ if (typeof Buffer !== "undefined" && Buffer.isBuffer(data)) {
53
+ return data.toString("utf8");
54
+ }
55
+
56
+ const convertView = (view: ArrayBufferView): string | undefined => {
57
+ if (typeof Buffer !== "undefined") {
58
+ return Buffer.from(view.buffer, view.byteOffset, view.byteLength).toString("utf8");
59
+ }
60
+ return decoder?.decode(view);
61
+ };
62
+
63
+ if (ArrayBuffer.isView(data)) {
64
+ return convertView(data);
65
+ }
66
+
67
+ if (data instanceof ArrayBuffer) {
68
+ return convertView(new Uint8Array(data));
69
+ }
70
+
71
+ return undefined;
72
+ };
7
73
 
8
74
  type JobKind = "crawl" | "batch";
9
75
 
@@ -23,6 +89,7 @@ export class Watcher extends EventEmitter {
23
89
  private readonly timeout?: number;
24
90
  private ws?: WebSocket;
25
91
  private closed = false;
92
+ private readonly emittedDocumentKeys = new Set<string>();
26
93
 
27
94
  constructor(http: HttpClient, jobId: string, opts: WatcherOptions = {}) {
28
95
  super();
@@ -44,11 +111,14 @@ export class Watcher extends EventEmitter {
44
111
  async start(): Promise<void> {
45
112
  try {
46
113
  const url = this.buildWsUrl();
47
-
48
- if (typeof WebSocket !== 'undefined') {
49
- this.ws = new WebSocket(url, this.http.getApiKey()) as any;
50
- } else {
51
- this.ws = new WS(url, this.http.getApiKey()) as any;
114
+ const wsCtor = await getWebSocketCtor();
115
+ if (!wsCtor) {
116
+ this.pollLoop();
117
+ return;
118
+ }
119
+ this.ws = new wsCtor(url, this.http.getApiKey()) as any;
120
+ if (this.ws && "binaryType" in this.ws) {
121
+ (this.ws as any).binaryType = "arraybuffer";
52
122
  }
53
123
 
54
124
  if (this.ws) {
@@ -64,8 +134,9 @@ export class Watcher extends EventEmitter {
64
134
  const timeoutMs = this.timeout ? this.timeout * 1000 : undefined;
65
135
  ws.onmessage = (ev: MessageEvent) => {
66
136
  try {
67
- const body = typeof ev.data === "string" ? JSON.parse(ev.data) : null;
68
- if (!body) return;
137
+ const raw = ensureUtf8String(ev.data);
138
+ if (!raw) return;
139
+ const body = JSON.parse(raw);
69
140
  const type = body.type as string | undefined;
70
141
  if (type === "error") {
71
142
  this.emit("error", { status: "failed", data: [], error: body.error, id: this.jobId });
@@ -85,6 +156,7 @@ export class Watcher extends EventEmitter {
85
156
  if (type === "done") {
86
157
  const payload = body.data || body;
87
158
  const data = (payload.data || []) as Document[];
159
+ if (data.length) this.emitDocuments(data);
88
160
  this.emit("done", { status: "completed", data, id: this.jobId });
89
161
  this.close();
90
162
  return;
@@ -105,8 +177,28 @@ export class Watcher extends EventEmitter {
105
177
  };
106
178
  }
107
179
 
180
+ private documentKey(doc: Document): string {
181
+ if (doc && typeof doc === "object") {
182
+ const explicitId = (doc as any).id ?? (doc as any).docId ?? (doc as any).url;
183
+ if (typeof explicitId === "string" && explicitId.length) {
184
+ return explicitId;
185
+ }
186
+ }
187
+ try {
188
+ return JSON.stringify(doc);
189
+ } catch {
190
+ return `${Date.now()}-${Math.random()}`;
191
+ }
192
+ }
193
+
108
194
  private emitDocuments(docs: Document[]) {
109
- for (const doc of docs) this.emit("document", { ...(doc as any), id: this.jobId });
195
+ for (const doc of docs) {
196
+ if (!doc) continue;
197
+ const key = this.documentKey(doc);
198
+ if (this.emittedDocumentKeys.has(key)) continue;
199
+ this.emittedDocumentKeys.add(key);
200
+ this.emit("document", { ...(doc as any), id: this.jobId });
201
+ }
110
202
  }
111
203
 
112
204
  private emitSnapshot(payload: any) {
@@ -148,6 +240,7 @@ export class Watcher extends EventEmitter {
148
240
  const snap = this.kind === "crawl"
149
241
  ? await getCrawlStatus(this.http as any, this.jobId)
150
242
  : await getBatchScrapeStatus(this.http as any, this.jobId);
243
+ this.emitDocuments((snap.data || []) as Document[]);
151
244
  this.emit("snapshot", snap);
152
245
  if (["completed", "failed", "cancelled"].includes(snap.status)) {
153
246
  this.emit("done", { status: snap.status, data: snap.data, id: this.jobId });
package/tsup.config.ts CHANGED
@@ -8,7 +8,6 @@ export default defineConfig({
8
8
  clean: true,
9
9
  platform: "node",
10
10
  target: "node22",
11
- external: ["ws"],
12
11
  noExternal: ["typescript-event-target"],
13
12
  esbuildOptions(options) {
14
13
  options.define = {