firecrawl 1.29.3 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +4 -2
- package/LICENSE +0 -0
- package/README.md +85 -78
- package/audit-ci.jsonc +4 -0
- package/dist/chunk-JFWW4BWA.js +85 -0
- package/dist/index.cjs +964 -39
- package/dist/index.d.cts +529 -11
- package/dist/index.d.ts +529 -11
- package/dist/index.js +952 -27
- package/dist/package-KYZ3HXR5.js +4 -0
- package/dump.rdb +0 -0
- package/jest.config.js +0 -0
- package/package.json +6 -6
- package/src/__tests__/e2e/v2/batch.test.ts +74 -0
- package/src/__tests__/e2e/v2/crawl.test.ts +182 -0
- package/src/__tests__/e2e/v2/extract.test.ts +70 -0
- package/src/__tests__/e2e/v2/map.test.ts +55 -0
- package/src/__tests__/e2e/v2/scrape.test.ts +130 -0
- package/src/__tests__/e2e/v2/search.test.ts +247 -0
- package/src/__tests__/e2e/v2/usage.test.ts +36 -0
- package/src/__tests__/e2e/v2/utils/idmux.ts +58 -0
- package/src/__tests__/e2e/v2/watcher.test.ts +96 -0
- package/src/__tests__/unit/v2/errorHandler.test.ts +19 -0
- package/src/__tests__/unit/v2/scrape.unit.test.ts +11 -0
- package/src/__tests__/unit/v2/validation.test.ts +59 -0
- package/src/index.backup.ts +2146 -0
- package/src/index.ts +27 -2134
- package/src/v1/index.ts +2158 -0
- package/src/v2/client.ts +281 -0
- package/src/v2/methods/batch.ts +131 -0
- package/src/v2/methods/crawl.ts +160 -0
- package/src/v2/methods/extract.ts +86 -0
- package/src/v2/methods/map.ts +37 -0
- package/src/v2/methods/scrape.ts +26 -0
- package/src/v2/methods/search.ts +69 -0
- package/src/v2/methods/usage.ts +39 -0
- package/src/v2/types.ts +308 -0
- package/src/v2/utils/errorHandler.ts +18 -0
- package/src/v2/utils/getVersion.ts +14 -0
- package/src/v2/utils/httpClient.ts +99 -0
- package/src/v2/utils/validation.ts +50 -0
- package/src/v2/watcher.ts +159 -0
- package/tsconfig.json +2 -1
- package/tsup.config.ts +0 -0
- package/dist/package-Z6F7JDXI.js +0 -111
- /package/src/__tests__/{v1/e2e_withAuth → e2e/v1}/index.test.ts +0 -0
- /package/src/__tests__/{v1/unit → unit/v1}/monitor-job-status-retry.test.ts +0 -0
package/dist/index.cjs
CHANGED
|
@@ -34,8 +34,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
34
34
|
var require_package = __commonJS({
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
|
-
name: "
|
|
38
|
-
version: "
|
|
37
|
+
name: "firecrawl",
|
|
38
|
+
version: "3.0.2",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -51,12 +51,12 @@ var require_package = __commonJS({
|
|
|
51
51
|
build: "tsup",
|
|
52
52
|
"build-and-publish": "npm run build && npm publish --access public",
|
|
53
53
|
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
54
|
-
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/
|
|
55
|
-
"test:unit": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/
|
|
54
|
+
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/e2e/v2/*.test.ts --detectOpenHandles",
|
|
55
|
+
"test:unit": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/unit/v2/*.test.ts"
|
|
56
56
|
},
|
|
57
57
|
repository: {
|
|
58
58
|
type: "git",
|
|
59
|
-
url: "git+https://github.com/
|
|
59
|
+
url: "git+https://github.com/firecrawl/firecrawl.git"
|
|
60
60
|
},
|
|
61
61
|
author: "Mendable.ai",
|
|
62
62
|
license: "MIT",
|
|
@@ -67,9 +67,9 @@ var require_package = __commonJS({
|
|
|
67
67
|
"zod-to-json-schema": "^3.23.0"
|
|
68
68
|
},
|
|
69
69
|
bugs: {
|
|
70
|
-
url: "https://github.com/
|
|
70
|
+
url: "https://github.com/firecrawl/firecrawl/issues"
|
|
71
71
|
},
|
|
72
|
-
homepage: "https://github.com/
|
|
72
|
+
homepage: "https://github.com/firecrawl/firecrawl#readme",
|
|
73
73
|
devDependencies: {
|
|
74
74
|
"@jest/globals": "^30.0.5",
|
|
75
75
|
"@types/dotenv": "^8.2.0",
|
|
@@ -108,16 +108,915 @@ var require_package = __commonJS({
|
|
|
108
108
|
});
|
|
109
109
|
|
|
110
110
|
// src/index.ts
|
|
111
|
-
var
|
|
112
|
-
__export(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
111
|
+
var src_exports = {};
|
|
112
|
+
__export(src_exports, {
|
|
113
|
+
Firecrawl: () => Firecrawl,
|
|
114
|
+
FirecrawlAppV1: () => FirecrawlApp,
|
|
115
|
+
FirecrawlClient: () => FirecrawlClient,
|
|
116
|
+
SdkError: () => SdkError,
|
|
117
|
+
default: () => src_default
|
|
116
118
|
});
|
|
117
|
-
module.exports = __toCommonJS(
|
|
119
|
+
module.exports = __toCommonJS(src_exports);
|
|
120
|
+
|
|
121
|
+
// src/v2/utils/httpClient.ts
|
|
118
122
|
var import_axios = __toESM(require("axios"), 1);
|
|
123
|
+
|
|
124
|
+
// src/v2/utils/getVersion.ts
|
|
125
|
+
function getVersion() {
|
|
126
|
+
try {
|
|
127
|
+
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
128
|
+
return process.env.npm_package_version;
|
|
129
|
+
}
|
|
130
|
+
const pkg = require_package();
|
|
131
|
+
return pkg?.version || "3.x.x";
|
|
132
|
+
} catch {
|
|
133
|
+
return "3.x.x";
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// src/v2/utils/httpClient.ts
|
|
138
|
+
var HttpClient = class {
|
|
139
|
+
instance;
|
|
140
|
+
apiKey;
|
|
141
|
+
apiUrl;
|
|
142
|
+
maxRetries;
|
|
143
|
+
backoffFactor;
|
|
144
|
+
constructor(options) {
|
|
145
|
+
this.apiKey = options.apiKey;
|
|
146
|
+
this.apiUrl = options.apiUrl.replace(/\/$/, "");
|
|
147
|
+
this.maxRetries = options.maxRetries ?? 3;
|
|
148
|
+
this.backoffFactor = options.backoffFactor ?? 0.5;
|
|
149
|
+
this.instance = import_axios.default.create({
|
|
150
|
+
baseURL: this.apiUrl,
|
|
151
|
+
timeout: options.timeoutMs ?? 6e4,
|
|
152
|
+
headers: {
|
|
153
|
+
"Content-Type": "application/json",
|
|
154
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
155
|
+
},
|
|
156
|
+
transitional: { clarifyTimeoutError: true }
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
getApiUrl() {
|
|
160
|
+
return this.apiUrl;
|
|
161
|
+
}
|
|
162
|
+
getApiKey() {
|
|
163
|
+
return this.apiKey;
|
|
164
|
+
}
|
|
165
|
+
async request(config) {
|
|
166
|
+
const version = getVersion();
|
|
167
|
+
config.headers = {
|
|
168
|
+
...config.headers || {}
|
|
169
|
+
};
|
|
170
|
+
let lastError;
|
|
171
|
+
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
|
|
172
|
+
try {
|
|
173
|
+
const cfg = { ...config };
|
|
174
|
+
if (cfg.method && ["post", "put", "patch"].includes(cfg.method.toLowerCase())) {
|
|
175
|
+
const data = cfg.data ?? {};
|
|
176
|
+
cfg.data = { ...data, origin: `js-sdk@${version}` };
|
|
177
|
+
}
|
|
178
|
+
const res = await this.instance.request(cfg);
|
|
179
|
+
if (res.status === 502 && attempt < this.maxRetries - 1) {
|
|
180
|
+
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
|
|
181
|
+
continue;
|
|
182
|
+
}
|
|
183
|
+
return res;
|
|
184
|
+
} catch (err) {
|
|
185
|
+
lastError = err;
|
|
186
|
+
const status = err?.response?.status;
|
|
187
|
+
if (status === 502 && attempt < this.maxRetries - 1) {
|
|
188
|
+
await this.sleep(this.backoffFactor * Math.pow(2, attempt));
|
|
189
|
+
continue;
|
|
190
|
+
}
|
|
191
|
+
throw err;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
throw lastError ?? new Error("Unexpected HTTP client error");
|
|
195
|
+
}
|
|
196
|
+
sleep(seconds) {
|
|
197
|
+
return new Promise((r) => setTimeout(r, seconds * 1e3));
|
|
198
|
+
}
|
|
199
|
+
post(endpoint, body, headers) {
|
|
200
|
+
return this.request({ method: "post", url: endpoint, data: body, headers });
|
|
201
|
+
}
|
|
202
|
+
get(endpoint, headers) {
|
|
203
|
+
return this.request({ method: "get", url: endpoint, headers });
|
|
204
|
+
}
|
|
205
|
+
delete(endpoint, headers) {
|
|
206
|
+
return this.request({ method: "delete", url: endpoint, headers });
|
|
207
|
+
}
|
|
208
|
+
prepareHeaders(idempotencyKey) {
|
|
209
|
+
const headers = {};
|
|
210
|
+
if (idempotencyKey) headers["x-idempotency-key"] = idempotencyKey;
|
|
211
|
+
return headers;
|
|
212
|
+
}
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
// src/v2/types.ts
|
|
216
|
+
var SdkError = class extends Error {
|
|
217
|
+
status;
|
|
218
|
+
code;
|
|
219
|
+
details;
|
|
220
|
+
constructor(message, status, code, details) {
|
|
221
|
+
super(message);
|
|
222
|
+
this.name = "FirecrawlSdkError";
|
|
223
|
+
this.status = status;
|
|
224
|
+
this.code = code;
|
|
225
|
+
this.details = details;
|
|
226
|
+
}
|
|
227
|
+
};
|
|
228
|
+
|
|
229
|
+
// src/v2/utils/validation.ts
|
|
230
|
+
var import_zod_to_json_schema = __toESM(require("zod-to-json-schema"), 1);
|
|
231
|
+
function ensureValidFormats(formats) {
|
|
232
|
+
if (!formats) return;
|
|
233
|
+
for (const fmt of formats) {
|
|
234
|
+
if (typeof fmt === "string") {
|
|
235
|
+
if (fmt === "json") {
|
|
236
|
+
throw new Error("json format must be an object with { type: 'json', prompt, schema }");
|
|
237
|
+
}
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
if (fmt.type === "json") {
|
|
241
|
+
const j = fmt;
|
|
242
|
+
if (!j.prompt && !j.schema) {
|
|
243
|
+
throw new Error("json format requires either 'prompt' or 'schema' (or both)");
|
|
244
|
+
}
|
|
245
|
+
const maybeSchema = j.schema;
|
|
246
|
+
const isZod = !!maybeSchema && (typeof maybeSchema.safeParse === "function" || typeof maybeSchema.parse === "function") && !!maybeSchema._def;
|
|
247
|
+
if (isZod) {
|
|
248
|
+
try {
|
|
249
|
+
j.schema = (0, import_zod_to_json_schema.default)(maybeSchema);
|
|
250
|
+
} catch {
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
continue;
|
|
254
|
+
}
|
|
255
|
+
if (fmt.type === "screenshot") {
|
|
256
|
+
const s = fmt;
|
|
257
|
+
if (s.quality != null && (typeof s.quality !== "number" || s.quality < 0)) {
|
|
258
|
+
throw new Error("screenshot.quality must be a non-negative number");
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
function ensureValidScrapeOptions(options) {
|
|
264
|
+
if (!options) return;
|
|
265
|
+
if (options.timeout != null && options.timeout <= 0) {
|
|
266
|
+
throw new Error("timeout must be positive");
|
|
267
|
+
}
|
|
268
|
+
if (options.waitFor != null && options.waitFor < 0) {
|
|
269
|
+
throw new Error("waitFor must be non-negative");
|
|
270
|
+
}
|
|
271
|
+
ensureValidFormats(options.formats);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// src/v2/utils/errorHandler.ts
|
|
275
|
+
var import_axios2 = require("axios");
|
|
276
|
+
function throwForBadResponse(resp, action) {
|
|
277
|
+
const status = resp.status;
|
|
278
|
+
const body = resp.data || {};
|
|
279
|
+
const msg = body?.error || body?.message || `Request failed (${status}) while trying to ${action}`;
|
|
280
|
+
throw new SdkError(msg, status, void 0, body?.details);
|
|
281
|
+
}
|
|
282
|
+
function normalizeAxiosError(err, action) {
|
|
283
|
+
const status = err.response?.status;
|
|
284
|
+
const body = err.response?.data;
|
|
285
|
+
const message = body?.error || err.message || `Request failed${status ? ` (${status})` : ""} while trying to ${action}`;
|
|
286
|
+
const code = body?.code || err.code;
|
|
287
|
+
throw new SdkError(message, status, code, body?.details ?? body);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// src/v2/methods/scrape.ts
|
|
291
|
+
async function scrape(http, url, options) {
|
|
292
|
+
if (!url || !url.trim()) {
|
|
293
|
+
throw new Error("URL cannot be empty");
|
|
294
|
+
}
|
|
295
|
+
if (options) ensureValidScrapeOptions(options);
|
|
296
|
+
const payload = { url: url.trim() };
|
|
297
|
+
if (options) Object.assign(payload, options);
|
|
298
|
+
try {
|
|
299
|
+
const res = await http.post("/v2/scrape", payload);
|
|
300
|
+
if (res.status !== 200 || !res.data?.success) {
|
|
301
|
+
throwForBadResponse(res, "scrape");
|
|
302
|
+
}
|
|
303
|
+
return res.data.data || {};
|
|
304
|
+
} catch (err) {
|
|
305
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "scrape");
|
|
306
|
+
throw err;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// src/v2/methods/search.ts
|
|
311
|
+
function prepareSearchPayload(req) {
|
|
312
|
+
if (!req.query || !req.query.trim()) throw new Error("Query cannot be empty");
|
|
313
|
+
if (req.limit != null && req.limit <= 0) throw new Error("limit must be positive");
|
|
314
|
+
if (req.timeout != null && req.timeout <= 0) throw new Error("timeout must be positive");
|
|
315
|
+
const payload = {
|
|
316
|
+
query: req.query
|
|
317
|
+
};
|
|
318
|
+
if (req.sources) payload.sources = req.sources;
|
|
319
|
+
if (req.limit != null) payload.limit = req.limit;
|
|
320
|
+
if (req.tbs != null) payload.tbs = req.tbs;
|
|
321
|
+
if (req.location != null) payload.location = req.location;
|
|
322
|
+
if (req.ignoreInvalidURLs != null) payload.ignoreInvalidURLs = req.ignoreInvalidURLs;
|
|
323
|
+
if (req.timeout != null) payload.timeout = req.timeout;
|
|
324
|
+
if (req.scrapeOptions) {
|
|
325
|
+
ensureValidScrapeOptions(req.scrapeOptions);
|
|
326
|
+
payload.scrapeOptions = req.scrapeOptions;
|
|
327
|
+
}
|
|
328
|
+
return payload;
|
|
329
|
+
}
|
|
330
|
+
async function search(http, request) {
|
|
331
|
+
const payload = prepareSearchPayload(request);
|
|
332
|
+
try {
|
|
333
|
+
const res = await http.post("/v2/search", payload);
|
|
334
|
+
if (res.status !== 200 || !res.data?.success) {
|
|
335
|
+
throwForBadResponse(res, "search");
|
|
336
|
+
}
|
|
337
|
+
const data = res.data.data || {};
|
|
338
|
+
const out = {};
|
|
339
|
+
for (const key of Object.keys(data)) {
|
|
340
|
+
const arr = data[key];
|
|
341
|
+
if (Array.isArray(arr)) {
|
|
342
|
+
const results = [];
|
|
343
|
+
for (const item of arr) {
|
|
344
|
+
if (item && typeof item === "object") {
|
|
345
|
+
if ("markdown" in item || "html" in item || "rawHtml" in item || "links" in item || "screenshot" in item || "changeTracking" in item || "summary" in item || "json" in item) {
|
|
346
|
+
results.push(item);
|
|
347
|
+
} else {
|
|
348
|
+
results.push({ url: item.url, title: item.title, description: item.description });
|
|
349
|
+
}
|
|
350
|
+
} else if (typeof item === "string") {
|
|
351
|
+
results.push({ url: item });
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
out[key] = results;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
return out;
|
|
358
|
+
} catch (err) {
|
|
359
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "search");
|
|
360
|
+
throw err;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// src/v2/methods/map.ts
|
|
365
|
+
function prepareMapPayload(url, options) {
|
|
366
|
+
if (!url || !url.trim()) throw new Error("URL cannot be empty");
|
|
367
|
+
const payload = { url: url.trim() };
|
|
368
|
+
if (options) {
|
|
369
|
+
if (options.sitemap != null) payload.sitemap = options.sitemap;
|
|
370
|
+
if (options.search != null) payload.search = options.search;
|
|
371
|
+
if (options.includeSubdomains != null) payload.includeSubdomains = options.includeSubdomains;
|
|
372
|
+
if (options.limit != null) payload.limit = options.limit;
|
|
373
|
+
if (options.timeout != null) payload.timeout = options.timeout;
|
|
374
|
+
}
|
|
375
|
+
return payload;
|
|
376
|
+
}
|
|
377
|
+
async function map(http, url, options) {
|
|
378
|
+
const payload = prepareMapPayload(url, options);
|
|
379
|
+
try {
|
|
380
|
+
const res = await http.post("/v2/map", payload);
|
|
381
|
+
if (res.status !== 200 || !res.data?.success) {
|
|
382
|
+
throwForBadResponse(res, "map");
|
|
383
|
+
}
|
|
384
|
+
const linksIn = res.data.links || [];
|
|
385
|
+
const links = [];
|
|
386
|
+
for (const item of linksIn) {
|
|
387
|
+
if (typeof item === "string") links.push({ url: item });
|
|
388
|
+
else if (item && typeof item === "object") links.push({ url: item.url, title: item.title, description: item.description });
|
|
389
|
+
}
|
|
390
|
+
return { links };
|
|
391
|
+
} catch (err) {
|
|
392
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "map");
|
|
393
|
+
throw err;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// src/v2/methods/crawl.ts
|
|
398
|
+
function prepareCrawlPayload(request) {
|
|
399
|
+
if (!request.url || !request.url.trim()) throw new Error("URL cannot be empty");
|
|
400
|
+
const data = { url: request.url.trim() };
|
|
401
|
+
if (request.prompt) data.prompt = request.prompt;
|
|
402
|
+
if (request.excludePaths) data.excludePaths = request.excludePaths;
|
|
403
|
+
if (request.includePaths) data.includePaths = request.includePaths;
|
|
404
|
+
if (request.maxDiscoveryDepth != null) data.maxDiscoveryDepth = request.maxDiscoveryDepth;
|
|
405
|
+
if (request.sitemap != null) data.sitemap = request.sitemap;
|
|
406
|
+
if (request.ignoreQueryParameters != null) data.ignoreQueryParameters = request.ignoreQueryParameters;
|
|
407
|
+
if (request.limit != null) data.limit = request.limit;
|
|
408
|
+
if (request.crawlEntireDomain != null) data.crawlEntireDomain = request.crawlEntireDomain;
|
|
409
|
+
if (request.allowExternalLinks != null) data.allowExternalLinks = request.allowExternalLinks;
|
|
410
|
+
if (request.allowSubdomains != null) data.allowSubdomains = request.allowSubdomains;
|
|
411
|
+
if (request.delay != null) data.delay = request.delay;
|
|
412
|
+
if (request.maxConcurrency != null) data.maxConcurrency = request.maxConcurrency;
|
|
413
|
+
if (request.webhook != null) data.webhook = request.webhook;
|
|
414
|
+
if (request.scrapeOptions) {
|
|
415
|
+
ensureValidScrapeOptions(request.scrapeOptions);
|
|
416
|
+
data.scrapeOptions = request.scrapeOptions;
|
|
417
|
+
}
|
|
418
|
+
if (request.zeroDataRetention != null) data.zeroDataRetention = request.zeroDataRetention;
|
|
419
|
+
return data;
|
|
420
|
+
}
|
|
421
|
+
async function startCrawl(http, request) {
|
|
422
|
+
const payload = prepareCrawlPayload(request);
|
|
423
|
+
try {
|
|
424
|
+
const res = await http.post("/v2/crawl", payload);
|
|
425
|
+
if (res.status !== 200 || !res.data?.success) {
|
|
426
|
+
throwForBadResponse(res, "start crawl");
|
|
427
|
+
}
|
|
428
|
+
return { id: res.data.id, url: res.data.url };
|
|
429
|
+
} catch (err) {
|
|
430
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "start crawl");
|
|
431
|
+
throw err;
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
async function getCrawlStatus(http, jobId) {
|
|
435
|
+
try {
|
|
436
|
+
const res = await http.get(`/v2/crawl/${jobId}`);
|
|
437
|
+
if (res.status !== 200 || !res.data?.success) {
|
|
438
|
+
throwForBadResponse(res, "get crawl status");
|
|
439
|
+
}
|
|
440
|
+
const body = res.data;
|
|
441
|
+
return {
|
|
442
|
+
status: body.status,
|
|
443
|
+
completed: body.completed ?? 0,
|
|
444
|
+
total: body.total ?? 0,
|
|
445
|
+
creditsUsed: body.creditsUsed,
|
|
446
|
+
expiresAt: body.expiresAt,
|
|
447
|
+
next: body.next ?? null,
|
|
448
|
+
data: body.data || []
|
|
449
|
+
};
|
|
450
|
+
} catch (err) {
|
|
451
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl status");
|
|
452
|
+
throw err;
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
async function cancelCrawl(http, jobId) {
|
|
456
|
+
try {
|
|
457
|
+
const res = await http.delete(`/v2/crawl/${jobId}`);
|
|
458
|
+
if (res.status !== 200) throwForBadResponse(res, "cancel crawl");
|
|
459
|
+
return res.data?.status === "cancelled";
|
|
460
|
+
} catch (err) {
|
|
461
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "cancel crawl");
|
|
462
|
+
throw err;
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
async function waitForCrawlCompletion(http, jobId, pollInterval = 2, timeout) {
|
|
466
|
+
const start = Date.now();
|
|
467
|
+
while (true) {
|
|
468
|
+
const status = await getCrawlStatus(http, jobId);
|
|
469
|
+
if (["completed", "failed", "cancelled"].includes(status.status)) return status;
|
|
470
|
+
if (timeout != null && Date.now() - start > timeout * 1e3) {
|
|
471
|
+
throw new Error(`Crawl job ${jobId} did not complete within ${timeout} seconds`);
|
|
472
|
+
}
|
|
473
|
+
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
async function crawl(http, request, pollInterval = 2, timeout) {
|
|
477
|
+
const started = await startCrawl(http, request);
|
|
478
|
+
return waitForCrawlCompletion(http, started.id, pollInterval, timeout);
|
|
479
|
+
}
|
|
480
|
+
async function getCrawlErrors(http, crawlId) {
|
|
481
|
+
try {
|
|
482
|
+
const res = await http.get(`/v2/crawl/${crawlId}/errors`);
|
|
483
|
+
if (res.status !== 200) throwForBadResponse(res, "get crawl errors");
|
|
484
|
+
const payload = res.data?.data ?? res.data;
|
|
485
|
+
return { errors: payload.errors || [], robotsBlocked: payload.robotsBlocked || [] };
|
|
486
|
+
} catch (err) {
|
|
487
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl errors");
|
|
488
|
+
throw err;
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
async function getActiveCrawls(http) {
|
|
492
|
+
try {
|
|
493
|
+
const res = await http.get(`/v2/crawl/active`);
|
|
494
|
+
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get active crawls");
|
|
495
|
+
const crawlsIn = res.data?.crawls || [];
|
|
496
|
+
const crawls = crawlsIn.map((c) => ({ id: c.id, teamId: c.teamId ?? c.team_id, url: c.url, options: c.options ?? null }));
|
|
497
|
+
return { success: true, crawls };
|
|
498
|
+
} catch (err) {
|
|
499
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "get active crawls");
|
|
500
|
+
throw err;
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
async function crawlParamsPreview(http, url, prompt) {
|
|
504
|
+
if (!url || !url.trim()) throw new Error("URL cannot be empty");
|
|
505
|
+
if (!prompt || !prompt.trim()) throw new Error("Prompt cannot be empty");
|
|
506
|
+
try {
|
|
507
|
+
const res = await http.post("/v2/crawl/params-preview", { url: url.trim(), prompt });
|
|
508
|
+
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "crawl params preview");
|
|
509
|
+
const data = res.data.data || {};
|
|
510
|
+
if (res.data.warning) data.warning = res.data.warning;
|
|
511
|
+
return data;
|
|
512
|
+
} catch (err) {
|
|
513
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "crawl params preview");
|
|
514
|
+
throw err;
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
// src/v2/methods/batch.ts
|
|
519
|
+
async function startBatchScrape(http, urls, {
|
|
520
|
+
options,
|
|
521
|
+
webhook,
|
|
522
|
+
appendToId,
|
|
523
|
+
ignoreInvalidURLs,
|
|
524
|
+
maxConcurrency,
|
|
525
|
+
zeroDataRetention,
|
|
526
|
+
integration,
|
|
527
|
+
idempotencyKey
|
|
528
|
+
} = {}) {
|
|
529
|
+
if (!Array.isArray(urls) || urls.length === 0) throw new Error("URLs list cannot be empty");
|
|
530
|
+
const payload = { urls };
|
|
531
|
+
if (options) {
|
|
532
|
+
ensureValidScrapeOptions(options);
|
|
533
|
+
Object.assign(payload, options);
|
|
534
|
+
}
|
|
535
|
+
if (webhook != null) payload.webhook = webhook;
|
|
536
|
+
if (appendToId != null) payload.appendToId = appendToId;
|
|
537
|
+
if (ignoreInvalidURLs != null) payload.ignoreInvalidURLs = ignoreInvalidURLs;
|
|
538
|
+
if (maxConcurrency != null) payload.maxConcurrency = maxConcurrency;
|
|
539
|
+
if (zeroDataRetention != null) payload.zeroDataRetention = zeroDataRetention;
|
|
540
|
+
if (integration != null) payload.integration = integration;
|
|
541
|
+
try {
|
|
542
|
+
const headers = http.prepareHeaders(idempotencyKey);
|
|
543
|
+
const res = await http.post("/v2/batch/scrape", payload, headers);
|
|
544
|
+
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "start batch scrape");
|
|
545
|
+
return { id: res.data.id, url: res.data.url, invalidURLs: res.data.invalidURLs || void 0 };
|
|
546
|
+
} catch (err) {
|
|
547
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "start batch scrape");
|
|
548
|
+
throw err;
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
async function getBatchScrapeStatus(http, jobId) {
|
|
552
|
+
try {
|
|
553
|
+
const res = await http.get(`/v2/batch/scrape/${jobId}`);
|
|
554
|
+
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get batch scrape status");
|
|
555
|
+
const body = res.data;
|
|
556
|
+
return {
|
|
557
|
+
status: body.status,
|
|
558
|
+
completed: body.completed ?? 0,
|
|
559
|
+
total: body.total ?? 0,
|
|
560
|
+
creditsUsed: body.creditsUsed,
|
|
561
|
+
expiresAt: body.expiresAt,
|
|
562
|
+
next: body.next ?? null,
|
|
563
|
+
data: body.data || []
|
|
564
|
+
};
|
|
565
|
+
} catch (err) {
|
|
566
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "get batch scrape status");
|
|
567
|
+
throw err;
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
async function cancelBatchScrape(http, jobId) {
|
|
571
|
+
try {
|
|
572
|
+
const res = await http.delete(`/v2/batch/scrape/${jobId}`);
|
|
573
|
+
if (res.status !== 200) throwForBadResponse(res, "cancel batch scrape");
|
|
574
|
+
return res.data?.status === "cancelled";
|
|
575
|
+
} catch (err) {
|
|
576
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "cancel batch scrape");
|
|
577
|
+
throw err;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
async function getBatchScrapeErrors(http, jobId) {
|
|
581
|
+
try {
|
|
582
|
+
const res = await http.get(`/v2/batch/scrape/${jobId}/errors`);
|
|
583
|
+
if (res.status !== 200) throwForBadResponse(res, "get batch scrape errors");
|
|
584
|
+
const payload = res.data?.data ?? res.data;
|
|
585
|
+
return { errors: payload.errors || [], robotsBlocked: payload.robotsBlocked || [] };
|
|
586
|
+
} catch (err) {
|
|
587
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "get batch scrape errors");
|
|
588
|
+
throw err;
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
async function waitForBatchCompletion(http, jobId, pollInterval = 2, timeout) {
|
|
592
|
+
const start = Date.now();
|
|
593
|
+
while (true) {
|
|
594
|
+
const status = await getBatchScrapeStatus(http, jobId);
|
|
595
|
+
if (["completed", "failed", "cancelled"].includes(status.status)) return status;
|
|
596
|
+
if (timeout != null && Date.now() - start > timeout * 1e3) {
|
|
597
|
+
throw new Error(`Batch scrape job ${jobId} did not complete within ${timeout} seconds`);
|
|
598
|
+
}
|
|
599
|
+
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
async function batchScrape(http, urls, opts = {}) {
|
|
603
|
+
const start = await startBatchScrape(http, urls, opts);
|
|
604
|
+
return waitForBatchCompletion(http, start.id, opts.pollInterval ?? 2, opts.timeout);
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
// src/v2/methods/extract.ts
|
|
608
|
+
var import_zod_to_json_schema2 = require("zod-to-json-schema");
|
|
609
|
+
function prepareExtractPayload(args) {
|
|
610
|
+
const body = {};
|
|
611
|
+
if (args.urls) body.urls = args.urls;
|
|
612
|
+
if (args.prompt != null) body.prompt = args.prompt;
|
|
613
|
+
if (args.schema != null) {
|
|
614
|
+
const s = args.schema;
|
|
615
|
+
const isZod = s && (typeof s.safeParse === "function" || typeof s.parse === "function") && s._def;
|
|
616
|
+
body.schema = isZod ? (0, import_zod_to_json_schema2.zodToJsonSchema)(s) : args.schema;
|
|
617
|
+
}
|
|
618
|
+
if (args.systemPrompt != null) body.systemPrompt = args.systemPrompt;
|
|
619
|
+
if (args.allowExternalLinks != null) body.allowExternalLinks = args.allowExternalLinks;
|
|
620
|
+
if (args.enableWebSearch != null) body.enableWebSearch = args.enableWebSearch;
|
|
621
|
+
if (args.showSources != null) body.showSources = args.showSources;
|
|
622
|
+
if (args.ignoreInvalidURLs != null) body.ignoreInvalidURLs = args.ignoreInvalidURLs;
|
|
623
|
+
if (args.scrapeOptions) {
|
|
624
|
+
ensureValidScrapeOptions(args.scrapeOptions);
|
|
625
|
+
body.scrapeOptions = args.scrapeOptions;
|
|
626
|
+
}
|
|
627
|
+
return body;
|
|
628
|
+
}
|
|
629
|
+
async function startExtract(http, args) {
|
|
630
|
+
const payload = prepareExtractPayload(args);
|
|
631
|
+
try {
|
|
632
|
+
const res = await http.post("/v2/extract", payload);
|
|
633
|
+
if (res.status !== 200) throwForBadResponse(res, "extract");
|
|
634
|
+
return res.data;
|
|
635
|
+
} catch (err) {
|
|
636
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "extract");
|
|
637
|
+
throw err;
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
async function getExtractStatus(http, jobId) {
|
|
641
|
+
try {
|
|
642
|
+
const res = await http.get(`/v2/extract/${jobId}`);
|
|
643
|
+
if (res.status !== 200) throwForBadResponse(res, "extract status");
|
|
644
|
+
return res.data;
|
|
645
|
+
} catch (err) {
|
|
646
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "extract status");
|
|
647
|
+
throw err;
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
async function waitExtract(http, jobId, pollInterval = 2, timeout) {
|
|
651
|
+
const start = Date.now();
|
|
652
|
+
while (true) {
|
|
653
|
+
const status = await getExtractStatus(http, jobId);
|
|
654
|
+
if (["completed", "failed", "cancelled"].includes(status.status || "")) return status;
|
|
655
|
+
if (timeout != null && Date.now() - start > timeout * 1e3) return status;
|
|
656
|
+
await new Promise((r) => setTimeout(r, Math.max(1e3, pollInterval * 1e3)));
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
async function extract(http, args) {
|
|
660
|
+
const started = await startExtract(http, args);
|
|
661
|
+
const jobId = started.id;
|
|
662
|
+
if (!jobId) return started;
|
|
663
|
+
return waitExtract(http, jobId, args.pollInterval ?? 2, args.timeout);
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
// src/v2/methods/usage.ts
|
|
667
|
+
async function getConcurrency(http) {
|
|
668
|
+
try {
|
|
669
|
+
const res = await http.get("/v2/concurrency-check");
|
|
670
|
+
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get concurrency");
|
|
671
|
+
const d = res.data.data || res.data;
|
|
672
|
+
return { concurrency: d.concurrency, maxConcurrency: d.maxConcurrency ?? d.max_concurrency };
|
|
673
|
+
} catch (err) {
|
|
674
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "get concurrency");
|
|
675
|
+
throw err;
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
async function getCreditUsage(http) {
|
|
679
|
+
try {
|
|
680
|
+
const res = await http.get("/v2/team/credit-usage");
|
|
681
|
+
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get credit usage");
|
|
682
|
+
const d = res.data.data || res.data;
|
|
683
|
+
return { remainingCredits: d.remainingCredits ?? d.remaining_credits ?? 0 };
|
|
684
|
+
} catch (err) {
|
|
685
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "get credit usage");
|
|
686
|
+
throw err;
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
async function getTokenUsage(http) {
|
|
690
|
+
try {
|
|
691
|
+
const res = await http.get("/v2/team/token-usage");
|
|
692
|
+
if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get token usage");
|
|
693
|
+
return res.data.data || res.data;
|
|
694
|
+
} catch (err) {
|
|
695
|
+
if (err?.isAxiosError) return normalizeAxiosError(err, "get token usage");
|
|
696
|
+
throw err;
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
// src/v2/watcher.ts
|
|
701
|
+
var import_events = require("events");
|
|
702
|
+
var Watcher = class extends import_events.EventEmitter {
|
|
703
|
+
http;
|
|
704
|
+
jobId;
|
|
705
|
+
kind;
|
|
706
|
+
pollInterval;
|
|
707
|
+
timeout;
|
|
708
|
+
ws;
|
|
709
|
+
closed = false;
|
|
710
|
+
constructor(http, jobId, opts = {}) {
|
|
711
|
+
super();
|
|
712
|
+
this.http = http;
|
|
713
|
+
this.jobId = jobId;
|
|
714
|
+
this.kind = opts.kind ?? "crawl";
|
|
715
|
+
this.pollInterval = opts.pollInterval ?? 2;
|
|
716
|
+
this.timeout = opts.timeout;
|
|
717
|
+
}
|
|
718
|
+
buildWsUrl() {
|
|
719
|
+
const apiUrl = this.http.getApiUrl();
|
|
720
|
+
const wsBase = apiUrl.replace(/^http/, "ws");
|
|
721
|
+
const path = this.kind === "crawl" ? `/v2/crawl/${this.jobId}` : `/v2/batch/scrape/${this.jobId}`;
|
|
722
|
+
return `${wsBase}${path}`;
|
|
723
|
+
}
|
|
724
|
+
async start() {
|
|
725
|
+
try {
|
|
726
|
+
const url = this.buildWsUrl();
|
|
727
|
+
this.ws = new WebSocket(url, this.http.getApiKey());
|
|
728
|
+
this.attachWsHandlers(this.ws);
|
|
729
|
+
} catch {
|
|
730
|
+
this.pollLoop();
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
attachWsHandlers(ws) {
|
|
734
|
+
let startTs = Date.now();
|
|
735
|
+
const timeoutMs = this.timeout ? this.timeout * 1e3 : void 0;
|
|
736
|
+
ws.onmessage = (ev) => {
|
|
737
|
+
try {
|
|
738
|
+
const body = typeof ev.data === "string" ? JSON.parse(ev.data) : null;
|
|
739
|
+
if (!body) return;
|
|
740
|
+
const type = body.type;
|
|
741
|
+
if (type === "error") {
|
|
742
|
+
this.emit("error", { status: "failed", data: [], error: body.error, id: this.jobId });
|
|
743
|
+
return;
|
|
744
|
+
}
|
|
745
|
+
if (type === "catchup") {
|
|
746
|
+
const payload2 = body.data || {};
|
|
747
|
+
this.emitDocuments(payload2.data || []);
|
|
748
|
+
this.emitSnapshot(payload2);
|
|
749
|
+
return;
|
|
750
|
+
}
|
|
751
|
+
if (type === "document") {
|
|
752
|
+
const doc = body.data;
|
|
753
|
+
if (doc) this.emit("document", doc);
|
|
754
|
+
return;
|
|
755
|
+
}
|
|
756
|
+
if (type === "done") {
|
|
757
|
+
this.emit("done", { status: "completed", data: [], id: this.jobId });
|
|
758
|
+
this.close();
|
|
759
|
+
return;
|
|
760
|
+
}
|
|
761
|
+
const payload = body.data || body;
|
|
762
|
+
if (payload && payload.status) this.emitSnapshot(payload);
|
|
763
|
+
} catch {
|
|
764
|
+
}
|
|
765
|
+
if (timeoutMs && Date.now() - startTs > timeoutMs) this.close();
|
|
766
|
+
};
|
|
767
|
+
ws.onerror = () => {
|
|
768
|
+
this.emit("error", { status: "failed", data: [], error: "WebSocket error", id: this.jobId });
|
|
769
|
+
this.close();
|
|
770
|
+
};
|
|
771
|
+
ws.onclose = () => {
|
|
772
|
+
if (!this.closed) this.pollLoop();
|
|
773
|
+
};
|
|
774
|
+
}
|
|
775
|
+
emitDocuments(docs) {
|
|
776
|
+
for (const doc of docs) this.emit("document", { ...doc, id: this.jobId });
|
|
777
|
+
}
|
|
778
|
+
emitSnapshot(payload) {
|
|
779
|
+
const status = payload.status;
|
|
780
|
+
const data = payload.data || [];
|
|
781
|
+
const snap = this.kind === "crawl" ? {
|
|
782
|
+
status,
|
|
783
|
+
completed: payload.completed ?? 0,
|
|
784
|
+
total: payload.total ?? 0,
|
|
785
|
+
creditsUsed: payload.creditsUsed,
|
|
786
|
+
expiresAt: payload.expiresAt,
|
|
787
|
+
next: payload.next ?? null,
|
|
788
|
+
data
|
|
789
|
+
} : {
|
|
790
|
+
status,
|
|
791
|
+
completed: payload.completed ?? 0,
|
|
792
|
+
total: payload.total ?? 0,
|
|
793
|
+
creditsUsed: payload.creditsUsed,
|
|
794
|
+
expiresAt: payload.expiresAt,
|
|
795
|
+
next: payload.next ?? null,
|
|
796
|
+
data
|
|
797
|
+
};
|
|
798
|
+
this.emit("snapshot", snap);
|
|
799
|
+
if (["completed", "failed", "cancelled"].includes(status)) {
|
|
800
|
+
this.emit("done", { status, data, id: this.jobId });
|
|
801
|
+
this.close();
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
async pollLoop() {
|
|
805
|
+
const startTs = Date.now();
|
|
806
|
+
const timeoutMs = this.timeout ? this.timeout * 1e3 : void 0;
|
|
807
|
+
while (!this.closed) {
|
|
808
|
+
try {
|
|
809
|
+
const snap = this.kind === "crawl" ? await getCrawlStatus(this.http, this.jobId) : await getBatchScrapeStatus(this.http, this.jobId);
|
|
810
|
+
this.emit("snapshot", snap);
|
|
811
|
+
if (["completed", "failed", "cancelled"].includes(snap.status)) {
|
|
812
|
+
this.emit("done", { status: snap.status, data: snap.data, id: this.jobId });
|
|
813
|
+
this.close();
|
|
814
|
+
break;
|
|
815
|
+
}
|
|
816
|
+
} catch {
|
|
817
|
+
}
|
|
818
|
+
if (timeoutMs && Date.now() - startTs > timeoutMs) break;
|
|
819
|
+
await new Promise((r) => setTimeout(r, Math.max(1e3, this.pollInterval * 1e3)));
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
close() {
|
|
823
|
+
this.closed = true;
|
|
824
|
+
if (this.ws && this.ws.close) this.ws.close();
|
|
825
|
+
}
|
|
826
|
+
};
|
|
827
|
+
|
|
828
|
+
// src/v2/client.ts
|
|
829
|
+
var FirecrawlClient = class {
|
|
830
|
+
http;
|
|
831
|
+
/**
|
|
832
|
+
* Create a v2 client.
|
|
833
|
+
* @param options Transport configuration (API key, base URL, timeouts, retries).
|
|
834
|
+
*/
|
|
835
|
+
constructor(options = {}) {
|
|
836
|
+
const apiKey = options.apiKey ?? process.env.FIRECRAWL_API_KEY ?? "";
|
|
837
|
+
const apiUrl = (options.apiUrl ?? process.env.FIRECRAWL_API_URL ?? "https://api.firecrawl.dev").replace(/\/$/, "");
|
|
838
|
+
if (!apiKey) {
|
|
839
|
+
throw new Error("API key is required. Set FIRECRAWL_API_KEY env or pass apiKey.");
|
|
840
|
+
}
|
|
841
|
+
this.http = new HttpClient({
|
|
842
|
+
apiKey,
|
|
843
|
+
apiUrl,
|
|
844
|
+
timeoutMs: options.timeoutMs,
|
|
845
|
+
maxRetries: options.maxRetries,
|
|
846
|
+
backoffFactor: options.backoffFactor
|
|
847
|
+
});
|
|
848
|
+
}
|
|
849
|
+
async scrape(url, options) {
|
|
850
|
+
return scrape(this.http, url, options);
|
|
851
|
+
}
|
|
852
|
+
// Search
|
|
853
|
+
/**
|
|
854
|
+
* Search the web and optionally scrape each result.
|
|
855
|
+
* @param query Search query string.
|
|
856
|
+
* @param req Additional search options (sources, limit, scrapeOptions, etc.).
|
|
857
|
+
* @returns Structured search results.
|
|
858
|
+
*/
|
|
859
|
+
async search(query, req = {}) {
|
|
860
|
+
return search(this.http, { query, ...req });
|
|
861
|
+
}
|
|
862
|
+
// Map
|
|
863
|
+
/**
|
|
864
|
+
* Map a site to discover URLs (sitemap-aware).
|
|
865
|
+
* @param url Root URL to map.
|
|
866
|
+
* @param options Mapping options (sitemap mode, includeSubdomains, limit, timeout).
|
|
867
|
+
* @returns Discovered links.
|
|
868
|
+
*/
|
|
869
|
+
async map(url, options) {
|
|
870
|
+
return map(this.http, url, options);
|
|
871
|
+
}
|
|
872
|
+
// Crawl
|
|
873
|
+
/**
|
|
874
|
+
* Start a crawl job (async).
|
|
875
|
+
* @param url Root URL to crawl.
|
|
876
|
+
* @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.).
|
|
877
|
+
* @returns Job id and url.
|
|
878
|
+
*/
|
|
879
|
+
async startCrawl(url, req = {}) {
|
|
880
|
+
return startCrawl(this.http, { url, ...req });
|
|
881
|
+
}
|
|
882
|
+
/**
|
|
883
|
+
* Get the status and partial data of a crawl job.
|
|
884
|
+
* @param jobId Crawl job id.
|
|
885
|
+
*/
|
|
886
|
+
async getCrawlStatus(jobId) {
|
|
887
|
+
return getCrawlStatus(this.http, jobId);
|
|
888
|
+
}
|
|
889
|
+
/**
|
|
890
|
+
* Cancel a crawl job.
|
|
891
|
+
* @param jobId Crawl job id.
|
|
892
|
+
* @returns True if cancelled.
|
|
893
|
+
*/
|
|
894
|
+
async cancelCrawl(jobId) {
|
|
895
|
+
return cancelCrawl(this.http, jobId);
|
|
896
|
+
}
|
|
897
|
+
/**
|
|
898
|
+
* Convenience waiter: start a crawl and poll until it finishes.
|
|
899
|
+
* @param url Root URL to crawl.
|
|
900
|
+
* @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds).
|
|
901
|
+
* @returns Final job snapshot.
|
|
902
|
+
*/
|
|
903
|
+
async crawl(url, req = {}) {
|
|
904
|
+
return crawl(this.http, { url, ...req }, req.pollInterval, req.timeout);
|
|
905
|
+
}
|
|
906
|
+
/**
|
|
907
|
+
* Retrieve crawl errors and robots.txt blocks.
|
|
908
|
+
* @param crawlId Crawl job id.
|
|
909
|
+
*/
|
|
910
|
+
async getCrawlErrors(crawlId) {
|
|
911
|
+
return getCrawlErrors(this.http, crawlId);
|
|
912
|
+
}
|
|
913
|
+
/**
|
|
914
|
+
* List active crawls for the authenticated team.
|
|
915
|
+
*/
|
|
916
|
+
async getActiveCrawls() {
|
|
917
|
+
return getActiveCrawls(this.http);
|
|
918
|
+
}
|
|
919
|
+
/**
|
|
920
|
+
* Preview normalized crawl parameters produced by a natural-language prompt.
|
|
921
|
+
* @param url Root URL.
|
|
922
|
+
* @param prompt Natural-language instruction.
|
|
923
|
+
*/
|
|
924
|
+
async crawlParamsPreview(url, prompt) {
|
|
925
|
+
return crawlParamsPreview(this.http, url, prompt);
|
|
926
|
+
}
|
|
927
|
+
// Batch
|
|
928
|
+
/**
|
|
929
|
+
* Start a batch scrape job for multiple URLs (async).
|
|
930
|
+
* @param urls URLs to scrape.
|
|
931
|
+
* @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.).
|
|
932
|
+
* @returns Job id and url.
|
|
933
|
+
*/
|
|
934
|
+
async startBatchScrape(urls, opts) {
|
|
935
|
+
return startBatchScrape(this.http, urls, opts);
|
|
936
|
+
}
|
|
937
|
+
/**
|
|
938
|
+
* Get the status and partial data of a batch scrape job.
|
|
939
|
+
* @param jobId Batch job id.
|
|
940
|
+
*/
|
|
941
|
+
async getBatchScrapeStatus(jobId) {
|
|
942
|
+
return getBatchScrapeStatus(this.http, jobId);
|
|
943
|
+
}
|
|
944
|
+
/**
|
|
945
|
+
* Retrieve batch scrape errors and robots.txt blocks.
|
|
946
|
+
* @param jobId Batch job id.
|
|
947
|
+
*/
|
|
948
|
+
async getBatchScrapeErrors(jobId) {
|
|
949
|
+
return getBatchScrapeErrors(this.http, jobId);
|
|
950
|
+
}
|
|
951
|
+
/**
|
|
952
|
+
* Cancel a batch scrape job.
|
|
953
|
+
* @param jobId Batch job id.
|
|
954
|
+
* @returns True if cancelled.
|
|
955
|
+
*/
|
|
956
|
+
async cancelBatchScrape(jobId) {
|
|
957
|
+
return cancelBatchScrape(this.http, jobId);
|
|
958
|
+
}
|
|
959
|
+
/**
|
|
960
|
+
* Convenience waiter: start a batch scrape and poll until it finishes.
|
|
961
|
+
* @param urls URLs to scrape.
|
|
962
|
+
* @param opts Batch options plus waiter controls (pollInterval, timeout seconds).
|
|
963
|
+
* @returns Final job snapshot.
|
|
964
|
+
*/
|
|
965
|
+
async batchScrape(urls, opts) {
|
|
966
|
+
return batchScrape(this.http, urls, opts);
|
|
967
|
+
}
|
|
968
|
+
// Extract
|
|
969
|
+
/**
|
|
970
|
+
* Start an extract job (async).
|
|
971
|
+
* @param args Extraction request (urls, schema or prompt, flags).
|
|
972
|
+
* @returns Job id or processing state.
|
|
973
|
+
*/
|
|
974
|
+
async startExtract(args) {
|
|
975
|
+
return startExtract(this.http, args);
|
|
976
|
+
}
|
|
977
|
+
/**
|
|
978
|
+
* Get extract job status/data.
|
|
979
|
+
* @param jobId Extract job id.
|
|
980
|
+
*/
|
|
981
|
+
async getExtractStatus(jobId) {
|
|
982
|
+
return getExtractStatus(this.http, jobId);
|
|
983
|
+
}
|
|
984
|
+
/**
|
|
985
|
+
* Convenience waiter: start an extract and poll until it finishes.
|
|
986
|
+
* @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
|
|
987
|
+
* @returns Final extract response.
|
|
988
|
+
*/
|
|
989
|
+
async extract(args) {
|
|
990
|
+
return extract(this.http, args);
|
|
991
|
+
}
|
|
992
|
+
// Usage
|
|
993
|
+
/** Current concurrency usage. */
|
|
994
|
+
async getConcurrency() {
|
|
995
|
+
return getConcurrency(this.http);
|
|
996
|
+
}
|
|
997
|
+
/** Current credit usage. */
|
|
998
|
+
async getCreditUsage() {
|
|
999
|
+
return getCreditUsage(this.http);
|
|
1000
|
+
}
|
|
1001
|
+
/** Recent token usage. */
|
|
1002
|
+
async getTokenUsage() {
|
|
1003
|
+
return getTokenUsage(this.http);
|
|
1004
|
+
}
|
|
1005
|
+
// Watcher
|
|
1006
|
+
/**
|
|
1007
|
+
* Create a watcher for a crawl or batch job. Emits: `document`, `snapshot`, `done`, `error`.
|
|
1008
|
+
* @param jobId Job id.
|
|
1009
|
+
* @param opts Watcher options (kind, pollInterval, timeout seconds).
|
|
1010
|
+
*/
|
|
1011
|
+
watcher(jobId, opts = {}) {
|
|
1012
|
+
return new Watcher(this.http, jobId, opts);
|
|
1013
|
+
}
|
|
1014
|
+
};
|
|
1015
|
+
|
|
1016
|
+
// src/v1/index.ts
|
|
1017
|
+
var import_axios3 = __toESM(require("axios"), 1);
|
|
119
1018
|
var zt = require("zod");
|
|
120
|
-
var
|
|
1019
|
+
var import_zod_to_json_schema3 = require("zod-to-json-schema");
|
|
121
1020
|
|
|
122
1021
|
// node_modules/typescript-event-target/dist/index.mjs
|
|
123
1022
|
var e = class extends EventTarget {
|
|
@@ -126,7 +1025,7 @@ var e = class extends EventTarget {
|
|
|
126
1025
|
}
|
|
127
1026
|
};
|
|
128
1027
|
|
|
129
|
-
// src/index.ts
|
|
1028
|
+
// src/v1/index.ts
|
|
130
1029
|
var FirecrawlError = class extends Error {
|
|
131
1030
|
statusCode;
|
|
132
1031
|
details;
|
|
@@ -145,10 +1044,16 @@ var FirecrawlApp = class {
|
|
|
145
1044
|
}
|
|
146
1045
|
async getVersion() {
|
|
147
1046
|
try {
|
|
1047
|
+
if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
|
|
1048
|
+
return process.env.npm_package_version;
|
|
1049
|
+
}
|
|
148
1050
|
const packageJson = await Promise.resolve().then(() => __toESM(require_package(), 1));
|
|
149
1051
|
return packageJson.default.version;
|
|
150
1052
|
} catch (error) {
|
|
151
|
-
|
|
1053
|
+
const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
|
|
1054
|
+
if (!isTest) {
|
|
1055
|
+
console.error("Error getting version:", error);
|
|
1056
|
+
}
|
|
152
1057
|
return "1.25.1";
|
|
153
1058
|
}
|
|
154
1059
|
}
|
|
@@ -183,7 +1088,7 @@ var FirecrawlApp = class {
|
|
|
183
1088
|
if (jsonData?.extract?.schema) {
|
|
184
1089
|
let schema = jsonData.extract.schema;
|
|
185
1090
|
try {
|
|
186
|
-
schema = (0,
|
|
1091
|
+
schema = (0, import_zod_to_json_schema3.zodToJsonSchema)(schema);
|
|
187
1092
|
} catch (error) {
|
|
188
1093
|
}
|
|
189
1094
|
jsonData = {
|
|
@@ -197,7 +1102,7 @@ var FirecrawlApp = class {
|
|
|
197
1102
|
if (jsonData?.jsonOptions?.schema) {
|
|
198
1103
|
let schema = jsonData.jsonOptions.schema;
|
|
199
1104
|
try {
|
|
200
|
-
schema = (0,
|
|
1105
|
+
schema = (0, import_zod_to_json_schema3.zodToJsonSchema)(schema);
|
|
201
1106
|
} catch (error) {
|
|
202
1107
|
}
|
|
203
1108
|
jsonData = {
|
|
@@ -209,7 +1114,7 @@ var FirecrawlApp = class {
|
|
|
209
1114
|
};
|
|
210
1115
|
}
|
|
211
1116
|
try {
|
|
212
|
-
const response = await
|
|
1117
|
+
const response = await import_axios3.default.post(
|
|
213
1118
|
this.apiUrl + `/v1/scrape`,
|
|
214
1119
|
jsonData,
|
|
215
1120
|
{ headers, timeout: params?.timeout !== void 0 ? params.timeout + 5e3 : void 0 }
|
|
@@ -260,7 +1165,7 @@ var FirecrawlApp = class {
|
|
|
260
1165
|
if (jsonData?.scrapeOptions?.extract?.schema) {
|
|
261
1166
|
let schema = jsonData.scrapeOptions.extract.schema;
|
|
262
1167
|
try {
|
|
263
|
-
schema = (0,
|
|
1168
|
+
schema = (0, import_zod_to_json_schema3.zodToJsonSchema)(schema);
|
|
264
1169
|
} catch (error) {
|
|
265
1170
|
}
|
|
266
1171
|
jsonData = {
|
|
@@ -481,9 +1386,9 @@ var FirecrawlApp = class {
|
|
|
481
1386
|
* @returns A CrawlWatcher instance to monitor the crawl job.
|
|
482
1387
|
*/
|
|
483
1388
|
async crawlUrlAndWatch(url, params, idempotencyKey) {
|
|
484
|
-
const
|
|
485
|
-
if (
|
|
486
|
-
const id =
|
|
1389
|
+
const crawl2 = await this.asyncCrawlUrl(url, params, idempotencyKey);
|
|
1390
|
+
if (crawl2.success && crawl2.id) {
|
|
1391
|
+
const id = crawl2.id;
|
|
487
1392
|
return new CrawlWatcher(id, this);
|
|
488
1393
|
}
|
|
489
1394
|
throw new FirecrawlError("Crawl job failed to start", 400);
|
|
@@ -529,7 +1434,7 @@ var FirecrawlApp = class {
|
|
|
529
1434
|
if (jsonData?.extract?.schema) {
|
|
530
1435
|
let schema = jsonData.extract.schema;
|
|
531
1436
|
try {
|
|
532
|
-
schema = (0,
|
|
1437
|
+
schema = (0, import_zod_to_json_schema3.zodToJsonSchema)(schema);
|
|
533
1438
|
} catch (error) {
|
|
534
1439
|
}
|
|
535
1440
|
jsonData = {
|
|
@@ -543,7 +1448,7 @@ var FirecrawlApp = class {
|
|
|
543
1448
|
if (jsonData?.jsonOptions?.schema) {
|
|
544
1449
|
let schema = jsonData.jsonOptions.schema;
|
|
545
1450
|
try {
|
|
546
|
-
schema = (0,
|
|
1451
|
+
schema = (0, import_zod_to_json_schema3.zodToJsonSchema)(schema);
|
|
547
1452
|
} catch (error) {
|
|
548
1453
|
}
|
|
549
1454
|
jsonData = {
|
|
@@ -606,9 +1511,9 @@ var FirecrawlApp = class {
|
|
|
606
1511
|
* @returns A CrawlWatcher instance to monitor the crawl job.
|
|
607
1512
|
*/
|
|
608
1513
|
async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
609
|
-
const
|
|
610
|
-
if (
|
|
611
|
-
const id =
|
|
1514
|
+
const crawl2 = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
|
|
1515
|
+
if (crawl2.success && crawl2.id) {
|
|
1516
|
+
const id = crawl2.id;
|
|
612
1517
|
return new CrawlWatcher(id, this);
|
|
613
1518
|
}
|
|
614
1519
|
throw new FirecrawlError("Batch scrape job failed to start", 400);
|
|
@@ -722,7 +1627,7 @@ var FirecrawlApp = class {
|
|
|
722
1627
|
jsonSchema = void 0;
|
|
723
1628
|
} else {
|
|
724
1629
|
try {
|
|
725
|
-
jsonSchema = (0,
|
|
1630
|
+
jsonSchema = (0, import_zod_to_json_schema3.zodToJsonSchema)(params.schema);
|
|
726
1631
|
} catch (_) {
|
|
727
1632
|
jsonSchema = params.schema;
|
|
728
1633
|
}
|
|
@@ -786,7 +1691,7 @@ var FirecrawlApp = class {
|
|
|
786
1691
|
jsonSchema = void 0;
|
|
787
1692
|
} else {
|
|
788
1693
|
try {
|
|
789
|
-
jsonSchema = (0,
|
|
1694
|
+
jsonSchema = (0, import_zod_to_json_schema3.zodToJsonSchema)(params.schema);
|
|
790
1695
|
} catch (_) {
|
|
791
1696
|
jsonSchema = params.schema;
|
|
792
1697
|
}
|
|
@@ -850,7 +1755,7 @@ var FirecrawlApp = class {
|
|
|
850
1755
|
* @returns The response from the POST request.
|
|
851
1756
|
*/
|
|
852
1757
|
postRequest(url, data, headers) {
|
|
853
|
-
return
|
|
1758
|
+
return import_axios3.default.post(url, data, { headers, timeout: data?.timeout ? data.timeout + 5e3 : void 0 });
|
|
854
1759
|
}
|
|
855
1760
|
/**
|
|
856
1761
|
* Sends a GET request to the specified URL.
|
|
@@ -860,9 +1765,9 @@ var FirecrawlApp = class {
|
|
|
860
1765
|
*/
|
|
861
1766
|
async getRequest(url, headers) {
|
|
862
1767
|
try {
|
|
863
|
-
return await
|
|
1768
|
+
return await import_axios3.default.get(url, { headers });
|
|
864
1769
|
} catch (error) {
|
|
865
|
-
if (error instanceof
|
|
1770
|
+
if (error instanceof import_axios3.AxiosError && error.response) {
|
|
866
1771
|
return error.response;
|
|
867
1772
|
} else {
|
|
868
1773
|
throw error;
|
|
@@ -877,9 +1782,9 @@ var FirecrawlApp = class {
|
|
|
877
1782
|
*/
|
|
878
1783
|
async deleteRequest(url, headers) {
|
|
879
1784
|
try {
|
|
880
|
-
return await
|
|
1785
|
+
return await import_axios3.default.delete(url, { headers });
|
|
881
1786
|
} catch (error) {
|
|
882
|
-
if (error instanceof
|
|
1787
|
+
if (error instanceof import_axios3.AxiosError && error.response) {
|
|
883
1788
|
return error.response;
|
|
884
1789
|
} else {
|
|
885
1790
|
throw error;
|
|
@@ -958,7 +1863,7 @@ var FirecrawlApp = class {
|
|
|
958
1863
|
* @returns True if the error should be retried
|
|
959
1864
|
*/
|
|
960
1865
|
isRetryableError(error) {
|
|
961
|
-
if (error instanceof
|
|
1866
|
+
if (error instanceof import_axios3.AxiosError) {
|
|
962
1867
|
if (!error.response) {
|
|
963
1868
|
const code = error.code;
|
|
964
1869
|
const message = error.message?.toLowerCase() || "";
|
|
@@ -1077,7 +1982,7 @@ var FirecrawlApp = class {
|
|
|
1077
1982
|
if (jsonData?.jsonOptions?.schema) {
|
|
1078
1983
|
let schema = jsonData.jsonOptions.schema;
|
|
1079
1984
|
try {
|
|
1080
|
-
schema = (0,
|
|
1985
|
+
schema = (0, import_zod_to_json_schema3.zodToJsonSchema)(schema);
|
|
1081
1986
|
} catch (error) {
|
|
1082
1987
|
}
|
|
1083
1988
|
jsonData = {
|
|
@@ -1432,8 +2337,28 @@ var CrawlWatcher = class extends e {
|
|
|
1432
2337
|
this.ws.close();
|
|
1433
2338
|
}
|
|
1434
2339
|
};
|
|
2340
|
+
|
|
2341
|
+
// src/index.ts
|
|
2342
|
+
var Firecrawl = class extends FirecrawlClient {
|
|
2343
|
+
/** Feature‑frozen v1 client (lazy). */
|
|
2344
|
+
_v1;
|
|
2345
|
+
_v1Opts;
|
|
2346
|
+
/** @param opts API credentials and base URL. */
|
|
2347
|
+
constructor(opts = {}) {
|
|
2348
|
+
super(opts);
|
|
2349
|
+
this._v1Opts = opts;
|
|
2350
|
+
}
|
|
2351
|
+
/** Access the legacy v1 client (instantiated on first access). */
|
|
2352
|
+
get v1() {
|
|
2353
|
+
if (!this._v1) this._v1 = new FirecrawlApp(this._v1Opts);
|
|
2354
|
+
return this._v1;
|
|
2355
|
+
}
|
|
2356
|
+
};
|
|
2357
|
+
var src_default = Firecrawl;
|
|
1435
2358
|
// Annotate the CommonJS export names for ESM import in node:
|
|
1436
2359
|
0 && (module.exports = {
|
|
1437
|
-
|
|
1438
|
-
|
|
2360
|
+
Firecrawl,
|
|
2361
|
+
FirecrawlAppV1,
|
|
2362
|
+
FirecrawlClient,
|
|
2363
|
+
SdkError
|
|
1439
2364
|
});
|