firecrawl 1.29.2 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +4 -2
- package/LICENSE +0 -0
- package/README.md +85 -78
- package/audit-ci.jsonc +4 -0
- package/dist/chunk-JFWW4BWA.js +85 -0
- package/dist/index.cjs +1005 -42
- package/dist/index.d.cts +535 -11
- package/dist/index.d.ts +535 -11
- package/dist/index.js +994 -32
- package/dist/package-KYZ3HXR5.js +4 -0
- package/dump.rdb +0 -0
- package/jest.config.js +0 -0
- package/package.json +6 -5
- package/src/__tests__/{v1/e2e_withAuth → e2e/v1}/index.test.ts +1 -0
- package/src/__tests__/e2e/v2/batch.test.ts +74 -0
- package/src/__tests__/e2e/v2/crawl.test.ts +182 -0
- package/src/__tests__/e2e/v2/extract.test.ts +70 -0
- package/src/__tests__/e2e/v2/map.test.ts +55 -0
- package/src/__tests__/e2e/v2/scrape.test.ts +130 -0
- package/src/__tests__/e2e/v2/search.test.ts +247 -0
- package/src/__tests__/e2e/v2/usage.test.ts +36 -0
- package/src/__tests__/e2e/v2/utils/idmux.ts +58 -0
- package/src/__tests__/e2e/v2/watcher.test.ts +96 -0
- package/src/__tests__/unit/v1/monitor-job-status-retry.test.ts +154 -0
- package/src/__tests__/unit/v2/errorHandler.test.ts +19 -0
- package/src/__tests__/unit/v2/scrape.unit.test.ts +11 -0
- package/src/__tests__/unit/v2/validation.test.ts +59 -0
- package/src/index.backup.ts +2146 -0
- package/src/index.ts +27 -2071
- package/src/v1/index.ts +2158 -0
- package/src/v2/client.ts +281 -0
- package/src/v2/methods/batch.ts +131 -0
- package/src/v2/methods/crawl.ts +160 -0
- package/src/v2/methods/extract.ts +86 -0
- package/src/v2/methods/map.ts +37 -0
- package/src/v2/methods/scrape.ts +26 -0
- package/src/v2/methods/search.ts +69 -0
- package/src/v2/methods/usage.ts +39 -0
- package/src/v2/types.ts +308 -0
- package/src/v2/utils/errorHandler.ts +18 -0
- package/src/v2/utils/getVersion.ts +14 -0
- package/src/v2/utils/httpClient.ts +99 -0
- package/src/v2/utils/validation.ts +50 -0
- package/src/v2/watcher.ts +159 -0
- package/tsconfig.json +2 -1
- package/tsup.config.ts +0 -0
- package/dist/package-E7ICGMY6.js +0 -110
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import { EventEmitter } from "events";
|
|
2
|
+
import type { BatchScrapeJob, CrawlJob, Document } from "./types";
|
|
3
|
+
import type { HttpClient } from "./utils/httpClient";
|
|
4
|
+
import { getBatchScrapeStatus } from "./methods/batch";
|
|
5
|
+
import { getCrawlStatus } from "./methods/crawl";
|
|
6
|
+
|
|
7
|
+
type JobKind = "crawl" | "batch";
|
|
8
|
+
|
|
9
|
+
export interface WatcherOptions {
|
|
10
|
+
kind?: JobKind;
|
|
11
|
+
pollInterval?: number; // seconds
|
|
12
|
+
timeout?: number; // seconds
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
type Snapshot = CrawlJob | BatchScrapeJob;
|
|
16
|
+
|
|
17
|
+
export class Watcher extends EventEmitter {
|
|
18
|
+
private readonly http: HttpClient;
|
|
19
|
+
private readonly jobId: string;
|
|
20
|
+
private readonly kind: JobKind;
|
|
21
|
+
private readonly pollInterval: number;
|
|
22
|
+
private readonly timeout?: number;
|
|
23
|
+
private ws?: WebSocket;
|
|
24
|
+
private closed = false;
|
|
25
|
+
|
|
26
|
+
constructor(http: HttpClient, jobId: string, opts: WatcherOptions = {}) {
|
|
27
|
+
super();
|
|
28
|
+
this.http = http;
|
|
29
|
+
this.jobId = jobId;
|
|
30
|
+
this.kind = opts.kind ?? "crawl";
|
|
31
|
+
this.pollInterval = opts.pollInterval ?? 2;
|
|
32
|
+
this.timeout = opts.timeout;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
private buildWsUrl(): string {
|
|
36
|
+
// replace http/https with ws/wss
|
|
37
|
+
const apiUrl = this.http.getApiUrl();
|
|
38
|
+
const wsBase = apiUrl.replace(/^http/, "ws");
|
|
39
|
+
const path = this.kind === "crawl" ? `/v2/crawl/${this.jobId}` : `/v2/batch/scrape/${this.jobId}`;
|
|
40
|
+
return `${wsBase}${path}`;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async start(): Promise<void> {
|
|
44
|
+
try {
|
|
45
|
+
const url = this.buildWsUrl();
|
|
46
|
+
// Pass API key as subprotocol for browser compatibility
|
|
47
|
+
this.ws = new WebSocket(url, this.http.getApiKey());
|
|
48
|
+
this.attachWsHandlers(this.ws);
|
|
49
|
+
} catch {
|
|
50
|
+
// Fallback to polling immediately
|
|
51
|
+
this.pollLoop();
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
private attachWsHandlers(ws: WebSocket) {
|
|
56
|
+
let startTs = Date.now();
|
|
57
|
+
const timeoutMs = this.timeout ? this.timeout * 1000 : undefined;
|
|
58
|
+
ws.onmessage = (ev: MessageEvent) => {
|
|
59
|
+
try {
|
|
60
|
+
const body = typeof ev.data === "string" ? JSON.parse(ev.data) : null;
|
|
61
|
+
if (!body) return;
|
|
62
|
+
const type = body.type as string | undefined;
|
|
63
|
+
if (type === "error") {
|
|
64
|
+
this.emit("error", { status: "failed", data: [], error: body.error, id: this.jobId });
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
if (type === "catchup") {
|
|
68
|
+
const payload = body.data || {};
|
|
69
|
+
this.emitDocuments(payload.data || []);
|
|
70
|
+
this.emitSnapshot(payload);
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
if (type === "document") {
|
|
74
|
+
const doc = body.data;
|
|
75
|
+
if (doc) this.emit("document", doc as Document & { id: string });
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
if (type === "done") {
|
|
79
|
+
this.emit("done", { status: "completed", data: [], id: this.jobId });
|
|
80
|
+
this.close();
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
const payload = body.data || body;
|
|
84
|
+
if (payload && payload.status) this.emitSnapshot(payload);
|
|
85
|
+
} catch {
|
|
86
|
+
// ignore
|
|
87
|
+
}
|
|
88
|
+
if (timeoutMs && Date.now() - startTs > timeoutMs) this.close();
|
|
89
|
+
};
|
|
90
|
+
ws.onerror = () => {
|
|
91
|
+
this.emit("error", { status: "failed", data: [], error: "WebSocket error", id: this.jobId });
|
|
92
|
+
this.close();
|
|
93
|
+
};
|
|
94
|
+
ws.onclose = () => {
|
|
95
|
+
if (!this.closed) this.pollLoop();
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
private emitDocuments(docs: Document[]) {
|
|
100
|
+
for (const doc of docs) this.emit("document", { ...(doc as any), id: this.jobId });
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
private emitSnapshot(payload: any) {
|
|
104
|
+
const status = payload.status as Snapshot["status"];
|
|
105
|
+
const data = (payload.data || []) as Document[];
|
|
106
|
+
const snap: Snapshot = this.kind === "crawl"
|
|
107
|
+
? {
|
|
108
|
+
status,
|
|
109
|
+
completed: payload.completed ?? 0,
|
|
110
|
+
total: payload.total ?? 0,
|
|
111
|
+
creditsUsed: payload.creditsUsed,
|
|
112
|
+
expiresAt: payload.expiresAt,
|
|
113
|
+
next: payload.next ?? null,
|
|
114
|
+
data,
|
|
115
|
+
}
|
|
116
|
+
: {
|
|
117
|
+
status,
|
|
118
|
+
completed: payload.completed ?? 0,
|
|
119
|
+
total: payload.total ?? 0,
|
|
120
|
+
creditsUsed: payload.creditsUsed,
|
|
121
|
+
expiresAt: payload.expiresAt,
|
|
122
|
+
next: payload.next ?? null,
|
|
123
|
+
data,
|
|
124
|
+
};
|
|
125
|
+
this.emit("snapshot", snap);
|
|
126
|
+
if (["completed", "failed", "cancelled"].includes(status)) {
|
|
127
|
+
this.emit("done", { status, data, id: this.jobId });
|
|
128
|
+
this.close();
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
private async pollLoop() {
|
|
133
|
+
const startTs = Date.now();
|
|
134
|
+
const timeoutMs = this.timeout ? this.timeout * 1000 : undefined;
|
|
135
|
+
while (!this.closed) {
|
|
136
|
+
try {
|
|
137
|
+
const snap = this.kind === "crawl"
|
|
138
|
+
? await getCrawlStatus(this.http as any, this.jobId)
|
|
139
|
+
: await getBatchScrapeStatus(this.http as any, this.jobId);
|
|
140
|
+
this.emit("snapshot", snap);
|
|
141
|
+
if (["completed", "failed", "cancelled"].includes(snap.status)) {
|
|
142
|
+
this.emit("done", { status: snap.status, data: snap.data, id: this.jobId });
|
|
143
|
+
this.close();
|
|
144
|
+
break;
|
|
145
|
+
}
|
|
146
|
+
} catch {
|
|
147
|
+
// ignore polling errors
|
|
148
|
+
}
|
|
149
|
+
if (timeoutMs && Date.now() - startTs > timeoutMs) break;
|
|
150
|
+
await new Promise((r) => setTimeout(r, Math.max(1000, this.pollInterval * 1000)));
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
close() {
|
|
155
|
+
this.closed = true;
|
|
156
|
+
if (this.ws && (this.ws as any).close) (this.ws as any).close();
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
package/tsconfig.json
CHANGED
package/tsup.config.ts
CHANGED
|
File without changes
|
package/dist/package-E7ICGMY6.js
DELETED
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
// package.json
|
|
2
|
-
var name = "@mendable/firecrawl-js";
|
|
3
|
-
var version = "1.29.2";
|
|
4
|
-
var description = "JavaScript SDK for Firecrawl API";
|
|
5
|
-
var main = "dist/index.js";
|
|
6
|
-
var types = "dist/index.d.ts";
|
|
7
|
-
var exports = {
|
|
8
|
-
"./package.json": "./package.json",
|
|
9
|
-
".": {
|
|
10
|
-
import: "./dist/index.js",
|
|
11
|
-
default: "./dist/index.cjs"
|
|
12
|
-
}
|
|
13
|
-
};
|
|
14
|
-
var type = "module";
|
|
15
|
-
var scripts = {
|
|
16
|
-
build: "tsup",
|
|
17
|
-
"build-and-publish": "npm run build && npm publish --access public",
|
|
18
|
-
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
19
|
-
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
20
|
-
};
|
|
21
|
-
var repository = {
|
|
22
|
-
type: "git",
|
|
23
|
-
url: "git+https://github.com/mendableai/firecrawl.git"
|
|
24
|
-
};
|
|
25
|
-
var author = "Mendable.ai";
|
|
26
|
-
var license = "MIT";
|
|
27
|
-
var dependencies = {
|
|
28
|
-
axios: "^1.11.0",
|
|
29
|
-
"typescript-event-target": "^1.1.1",
|
|
30
|
-
zod: "^3.23.8",
|
|
31
|
-
"zod-to-json-schema": "^3.23.0"
|
|
32
|
-
};
|
|
33
|
-
var bugs = {
|
|
34
|
-
url: "https://github.com/mendableai/firecrawl/issues"
|
|
35
|
-
};
|
|
36
|
-
var homepage = "https://github.com/mendableai/firecrawl#readme";
|
|
37
|
-
var devDependencies = {
|
|
38
|
-
"@jest/globals": "^30.0.5",
|
|
39
|
-
"@types/dotenv": "^8.2.0",
|
|
40
|
-
"@types/jest": "^30.0.0",
|
|
41
|
-
"@types/mocha": "^10.0.6",
|
|
42
|
-
"@types/node": "^20.12.12",
|
|
43
|
-
"@types/uuid": "^9.0.8",
|
|
44
|
-
dotenv: "^16.4.5",
|
|
45
|
-
jest: "^30.0.5",
|
|
46
|
-
"ts-jest": "^29.4.0",
|
|
47
|
-
tsup: "^8.5.0",
|
|
48
|
-
typescript: "^5.4.5",
|
|
49
|
-
uuid: "^9.0.1"
|
|
50
|
-
};
|
|
51
|
-
var keywords = [
|
|
52
|
-
"firecrawl",
|
|
53
|
-
"mendable",
|
|
54
|
-
"crawler",
|
|
55
|
-
"web",
|
|
56
|
-
"scraper",
|
|
57
|
-
"api",
|
|
58
|
-
"sdk"
|
|
59
|
-
];
|
|
60
|
-
var engines = {
|
|
61
|
-
node: ">=22.0.0"
|
|
62
|
-
};
|
|
63
|
-
var pnpm = {
|
|
64
|
-
overrides: {
|
|
65
|
-
"@babel/helpers@<7.26.10": ">=7.26.10",
|
|
66
|
-
"brace-expansion@>=1.0.0 <=1.1.11": ">=1.1.12",
|
|
67
|
-
"brace-expansion@>=2.0.0 <=2.0.1": ">=2.0.2"
|
|
68
|
-
}
|
|
69
|
-
};
|
|
70
|
-
var package_default = {
|
|
71
|
-
name,
|
|
72
|
-
version,
|
|
73
|
-
description,
|
|
74
|
-
main,
|
|
75
|
-
types,
|
|
76
|
-
exports,
|
|
77
|
-
type,
|
|
78
|
-
scripts,
|
|
79
|
-
repository,
|
|
80
|
-
author,
|
|
81
|
-
license,
|
|
82
|
-
dependencies,
|
|
83
|
-
bugs,
|
|
84
|
-
homepage,
|
|
85
|
-
devDependencies,
|
|
86
|
-
keywords,
|
|
87
|
-
engines,
|
|
88
|
-
pnpm
|
|
89
|
-
};
|
|
90
|
-
export {
|
|
91
|
-
author,
|
|
92
|
-
bugs,
|
|
93
|
-
package_default as default,
|
|
94
|
-
dependencies,
|
|
95
|
-
description,
|
|
96
|
-
devDependencies,
|
|
97
|
-
engines,
|
|
98
|
-
exports,
|
|
99
|
-
homepage,
|
|
100
|
-
keywords,
|
|
101
|
-
license,
|
|
102
|
-
main,
|
|
103
|
-
name,
|
|
104
|
-
pnpm,
|
|
105
|
-
repository,
|
|
106
|
-
scripts,
|
|
107
|
-
type,
|
|
108
|
-
types,
|
|
109
|
-
version
|
|
110
|
-
};
|