firecrawl 1.29.3 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/.env.example +4 -2
  2. package/README.md +85 -78
  3. package/audit-ci.jsonc +4 -0
  4. package/dist/chunk-OIZ6OKY4.js +85 -0
  5. package/dist/index.cjs +961 -35
  6. package/dist/index.d.cts +524 -11
  7. package/dist/index.d.ts +524 -11
  8. package/dist/index.js +953 -27
  9. package/dist/package-V5IPFKBE.js +4 -0
  10. package/package.json +6 -6
  11. package/src/__tests__/e2e/v2/batch.test.ts +74 -0
  12. package/src/__tests__/e2e/v2/crawl.test.ts +182 -0
  13. package/src/__tests__/e2e/v2/extract.test.ts +70 -0
  14. package/src/__tests__/e2e/v2/map.test.ts +55 -0
  15. package/src/__tests__/e2e/v2/scrape.test.ts +130 -0
  16. package/src/__tests__/e2e/v2/search.test.ts +247 -0
  17. package/src/__tests__/e2e/v2/usage.test.ts +36 -0
  18. package/src/__tests__/e2e/v2/utils/idmux.ts +58 -0
  19. package/src/__tests__/e2e/v2/watcher.test.ts +96 -0
  20. package/src/__tests__/unit/v2/errorHandler.test.ts +19 -0
  21. package/src/__tests__/unit/v2/scrape.unit.test.ts +11 -0
  22. package/src/__tests__/unit/v2/validation.test.ts +59 -0
  23. package/src/index.backup.ts +2146 -0
  24. package/src/index.ts +27 -2134
  25. package/src/v1/index.ts +2158 -0
  26. package/src/v2/client.ts +283 -0
  27. package/src/v2/methods/batch.ts +119 -0
  28. package/src/v2/methods/crawl.ts +144 -0
  29. package/src/v2/methods/extract.ts +86 -0
  30. package/src/v2/methods/map.ts +37 -0
  31. package/src/v2/methods/scrape.ts +26 -0
  32. package/src/v2/methods/search.ts +69 -0
  33. package/src/v2/methods/usage.ts +39 -0
  34. package/src/v2/types.ts +337 -0
  35. package/src/v2/utils/errorHandler.ts +18 -0
  36. package/src/v2/utils/getVersion.ts +14 -0
  37. package/src/v2/utils/httpClient.ts +99 -0
  38. package/src/v2/utils/validation.ts +50 -0
  39. package/src/v2/watcher.ts +159 -0
  40. package/tsconfig.json +2 -1
  41. package/dist/package-Z6F7JDXI.js +0 -111
  42. /package/src/__tests__/{v1/e2e_withAuth → e2e/v1}/index.test.ts +0 -0
  43. /package/src/__tests__/{v1/unit → unit/v1}/monitor-job-status-retry.test.ts +0 -0
@@ -0,0 +1,159 @@
1
+ import { EventEmitter } from "events";
2
+ import type { BatchScrapeJob, CrawlJob, Document } from "./types";
3
+ import type { HttpClient } from "./utils/httpClient";
4
+ import { getBatchScrapeStatus } from "./methods/batch";
5
+ import { getCrawlStatus } from "./methods/crawl";
6
+
7
+ type JobKind = "crawl" | "batch";
8
+
9
+ export interface WatcherOptions {
10
+ kind?: JobKind;
11
+ pollInterval?: number; // seconds
12
+ timeout?: number; // seconds
13
+ }
14
+
15
+ type Snapshot = CrawlJob | BatchScrapeJob;
16
+
17
+ export class Watcher extends EventEmitter {
18
+ private readonly http: HttpClient;
19
+ private readonly jobId: string;
20
+ private readonly kind: JobKind;
21
+ private readonly pollInterval: number;
22
+ private readonly timeout?: number;
23
+ private ws?: WebSocket;
24
+ private closed = false;
25
+
26
+ constructor(http: HttpClient, jobId: string, opts: WatcherOptions = {}) {
27
+ super();
28
+ this.http = http;
29
+ this.jobId = jobId;
30
+ this.kind = opts.kind ?? "crawl";
31
+ this.pollInterval = opts.pollInterval ?? 2;
32
+ this.timeout = opts.timeout;
33
+ }
34
+
35
+ private buildWsUrl(): string {
36
+ // replace http/https with ws/wss
37
+ const apiUrl = this.http.getApiUrl();
38
+ const wsBase = apiUrl.replace(/^http/, "ws");
39
+ const path = this.kind === "crawl" ? `/v2/crawl/${this.jobId}` : `/v2/batch/scrape/${this.jobId}`;
40
+ return `${wsBase}${path}`;
41
+ }
42
+
43
+ async start(): Promise<void> {
44
+ try {
45
+ const url = this.buildWsUrl();
46
+ // Pass API key as subprotocol for browser compatibility
47
+ this.ws = new WebSocket(url, this.http.getApiKey());
48
+ this.attachWsHandlers(this.ws);
49
+ } catch {
50
+ // Fallback to polling immediately
51
+ this.pollLoop();
52
+ }
53
+ }
54
+
55
+ private attachWsHandlers(ws: WebSocket) {
56
+ let startTs = Date.now();
57
+ const timeoutMs = this.timeout ? this.timeout * 1000 : undefined;
58
+ ws.onmessage = (ev: MessageEvent) => {
59
+ try {
60
+ const body = typeof ev.data === "string" ? JSON.parse(ev.data) : null;
61
+ if (!body) return;
62
+ const type = body.type as string | undefined;
63
+ if (type === "error") {
64
+ this.emit("error", { status: "failed", data: [], error: body.error, id: this.jobId });
65
+ return;
66
+ }
67
+ if (type === "catchup") {
68
+ const payload = body.data || {};
69
+ this.emitDocuments(payload.data || []);
70
+ this.emitSnapshot(payload);
71
+ return;
72
+ }
73
+ if (type === "document") {
74
+ const doc = body.data;
75
+ if (doc) this.emit("document", doc as Document & { id: string });
76
+ return;
77
+ }
78
+ if (type === "done") {
79
+ this.emit("done", { status: "completed", data: [], id: this.jobId });
80
+ this.close();
81
+ return;
82
+ }
83
+ const payload = body.data || body;
84
+ if (payload && payload.status) this.emitSnapshot(payload);
85
+ } catch {
86
+ // ignore
87
+ }
88
+ if (timeoutMs && Date.now() - startTs > timeoutMs) this.close();
89
+ };
90
+ ws.onerror = () => {
91
+ this.emit("error", { status: "failed", data: [], error: "WebSocket error", id: this.jobId });
92
+ this.close();
93
+ };
94
+ ws.onclose = () => {
95
+ if (!this.closed) this.pollLoop();
96
+ };
97
+ }
98
+
99
+ private emitDocuments(docs: Document[]) {
100
+ for (const doc of docs) this.emit("document", { ...(doc as any), id: this.jobId });
101
+ }
102
+
103
+ private emitSnapshot(payload: any) {
104
+ const status = payload.status as Snapshot["status"];
105
+ const data = (payload.data || []) as Document[];
106
+ const snap: Snapshot = this.kind === "crawl"
107
+ ? {
108
+ status,
109
+ completed: payload.completed ?? 0,
110
+ total: payload.total ?? 0,
111
+ creditsUsed: payload.creditsUsed,
112
+ expiresAt: payload.expiresAt,
113
+ next: payload.next ?? null,
114
+ data,
115
+ }
116
+ : {
117
+ status,
118
+ completed: payload.completed ?? 0,
119
+ total: payload.total ?? 0,
120
+ creditsUsed: payload.creditsUsed,
121
+ expiresAt: payload.expiresAt,
122
+ next: payload.next ?? null,
123
+ data,
124
+ };
125
+ this.emit("snapshot", snap);
126
+ if (["completed", "failed", "cancelled"].includes(status)) {
127
+ this.emit("done", { status, data, id: this.jobId });
128
+ this.close();
129
+ }
130
+ }
131
+
132
+ private async pollLoop() {
133
+ const startTs = Date.now();
134
+ const timeoutMs = this.timeout ? this.timeout * 1000 : undefined;
135
+ while (!this.closed) {
136
+ try {
137
+ const snap = this.kind === "crawl"
138
+ ? await getCrawlStatus(this.http as any, this.jobId)
139
+ : await getBatchScrapeStatus(this.http as any, this.jobId);
140
+ this.emit("snapshot", snap);
141
+ if (["completed", "failed", "cancelled"].includes(snap.status)) {
142
+ this.emit("done", { status: snap.status, data: snap.data, id: this.jobId });
143
+ this.close();
144
+ break;
145
+ }
146
+ } catch {
147
+ // ignore polling errors
148
+ }
149
+ if (timeoutMs && Date.now() - startTs > timeoutMs) break;
150
+ await new Promise((r) => setTimeout(r, Math.max(1000, this.pollInterval * 1000)));
151
+ }
152
+ }
153
+
154
+ close() {
155
+ this.closed = true;
156
+ if (this.ws && (this.ws as any).close) (this.ws as any).close();
157
+ }
158
+ }
159
+
package/tsconfig.json CHANGED
@@ -17,7 +17,8 @@
17
17
  "noImplicitOverride": true,
18
18
 
19
19
  /* If NOT transpiling with TypeScript: */
20
- "module": "NodeNext",
20
+ "module": "ESNext",
21
+ "moduleResolution": "Bundler",
21
22
  "noEmit": true,
22
23
  },
23
24
  "include": ["src/**/*"],
@@ -1,111 +0,0 @@
1
- // package.json
2
- var name = "@mendable/firecrawl-js";
3
- var version = "1.29.3";
4
- var description = "JavaScript SDK for Firecrawl API";
5
- var main = "dist/index.js";
6
- var types = "dist/index.d.ts";
7
- var exports = {
8
- "./package.json": "./package.json",
9
- ".": {
10
- import: "./dist/index.js",
11
- default: "./dist/index.cjs"
12
- }
13
- };
14
- var type = "module";
15
- var scripts = {
16
- build: "tsup",
17
- "build-and-publish": "npm run build && npm publish --access public",
18
- "publish-beta": "npm run build && npm publish --access public --tag beta",
19
- test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts",
20
- "test:unit": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/unit/*.test.ts"
21
- };
22
- var repository = {
23
- type: "git",
24
- url: "git+https://github.com/mendableai/firecrawl.git"
25
- };
26
- var author = "Mendable.ai";
27
- var license = "MIT";
28
- var dependencies = {
29
- axios: "^1.11.0",
30
- "typescript-event-target": "^1.1.1",
31
- zod: "^3.23.8",
32
- "zod-to-json-schema": "^3.23.0"
33
- };
34
- var bugs = {
35
- url: "https://github.com/mendableai/firecrawl/issues"
36
- };
37
- var homepage = "https://github.com/mendableai/firecrawl#readme";
38
- var devDependencies = {
39
- "@jest/globals": "^30.0.5",
40
- "@types/dotenv": "^8.2.0",
41
- "@types/jest": "^30.0.0",
42
- "@types/mocha": "^10.0.6",
43
- "@types/node": "^20.12.12",
44
- "@types/uuid": "^9.0.8",
45
- dotenv: "^16.4.5",
46
- jest: "^30.0.5",
47
- "ts-jest": "^29.4.0",
48
- tsup: "^8.5.0",
49
- typescript: "^5.4.5",
50
- uuid: "^9.0.1"
51
- };
52
- var keywords = [
53
- "firecrawl",
54
- "mendable",
55
- "crawler",
56
- "web",
57
- "scraper",
58
- "api",
59
- "sdk"
60
- ];
61
- var engines = {
62
- node: ">=22.0.0"
63
- };
64
- var pnpm = {
65
- overrides: {
66
- "@babel/helpers@<7.26.10": ">=7.26.10",
67
- "brace-expansion@>=1.0.0 <=1.1.11": ">=1.1.12",
68
- "brace-expansion@>=2.0.0 <=2.0.1": ">=2.0.2"
69
- }
70
- };
71
- var package_default = {
72
- name,
73
- version,
74
- description,
75
- main,
76
- types,
77
- exports,
78
- type,
79
- scripts,
80
- repository,
81
- author,
82
- license,
83
- dependencies,
84
- bugs,
85
- homepage,
86
- devDependencies,
87
- keywords,
88
- engines,
89
- pnpm
90
- };
91
- export {
92
- author,
93
- bugs,
94
- package_default as default,
95
- dependencies,
96
- description,
97
- devDependencies,
98
- engines,
99
- exports,
100
- homepage,
101
- keywords,
102
- license,
103
- main,
104
- name,
105
- pnpm,
106
- repository,
107
- scripts,
108
- type,
109
- types,
110
- version
111
- };