firecrawl 1.11.2 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +116 -20
- package/dist/index.d.cts +14 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.js +116 -20
- package/package.json +1 -1
- package/src/index.js +1002 -0
- package/src/index.ts +137 -22
package/src/index.js
ADDED
|
@@ -0,0 +1,1002 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __extends = (this && this.__extends) || (function () {
|
|
3
|
+
var extendStatics = function (d, b) {
|
|
4
|
+
extendStatics = Object.setPrototypeOf ||
|
|
5
|
+
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
|
|
6
|
+
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
|
|
7
|
+
return extendStatics(d, b);
|
|
8
|
+
};
|
|
9
|
+
return function (d, b) {
|
|
10
|
+
if (typeof b !== "function" && b !== null)
|
|
11
|
+
throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
|
|
12
|
+
extendStatics(d, b);
|
|
13
|
+
function __() { this.constructor = d; }
|
|
14
|
+
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
|
|
15
|
+
};
|
|
16
|
+
})();
|
|
17
|
+
var __assign = (this && this.__assign) || function () {
|
|
18
|
+
__assign = Object.assign || function(t) {
|
|
19
|
+
for (var s, i = 1, n = arguments.length; i < n; i++) {
|
|
20
|
+
s = arguments[i];
|
|
21
|
+
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
|
|
22
|
+
t[p] = s[p];
|
|
23
|
+
}
|
|
24
|
+
return t;
|
|
25
|
+
};
|
|
26
|
+
return __assign.apply(this, arguments);
|
|
27
|
+
};
|
|
28
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
29
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
30
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
31
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
32
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
33
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
34
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
35
|
+
});
|
|
36
|
+
};
|
|
37
|
+
var __generator = (this && this.__generator) || function (thisArg, body) {
|
|
38
|
+
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
|
|
39
|
+
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
|
|
40
|
+
function verb(n) { return function (v) { return step([n, v]); }; }
|
|
41
|
+
function step(op) {
|
|
42
|
+
if (f) throw new TypeError("Generator is already executing.");
|
|
43
|
+
while (g && (g = 0, op[0] && (_ = 0)), _) try {
|
|
44
|
+
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
|
|
45
|
+
if (y = 0, t) op = [op[0] & 2, t.value];
|
|
46
|
+
switch (op[0]) {
|
|
47
|
+
case 0: case 1: t = op; break;
|
|
48
|
+
case 4: _.label++; return { value: op[1], done: false };
|
|
49
|
+
case 5: _.label++; y = op[1]; op = [0]; continue;
|
|
50
|
+
case 7: op = _.ops.pop(); _.trys.pop(); continue;
|
|
51
|
+
default:
|
|
52
|
+
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
|
|
53
|
+
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
|
|
54
|
+
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
|
|
55
|
+
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
|
|
56
|
+
if (t[2]) _.ops.pop();
|
|
57
|
+
_.trys.pop(); continue;
|
|
58
|
+
}
|
|
59
|
+
op = body.call(thisArg, _);
|
|
60
|
+
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
|
|
61
|
+
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
|
|
62
|
+
}
|
|
63
|
+
};
|
|
64
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
65
|
+
exports.CrawlWatcher = exports.FirecrawlError = void 0;
|
|
66
|
+
var axios_1 = require("axios");
|
|
67
|
+
var zt = require("zod");
|
|
68
|
+
var zod_to_json_schema_1 = require("zod-to-json-schema");
|
|
69
|
+
var isows_1 = require("isows");
|
|
70
|
+
var typescript_event_target_1 = require("typescript-event-target");
|
|
71
|
+
;
|
|
72
|
+
;
|
|
73
|
+
/**
|
|
74
|
+
* Custom error class for Firecrawl.
|
|
75
|
+
* Extends the built-in Error class to include a status code.
|
|
76
|
+
*/
|
|
77
|
+
var FirecrawlError = /** @class */ (function (_super) {
|
|
78
|
+
__extends(FirecrawlError, _super);
|
|
79
|
+
function FirecrawlError(message, statusCode) {
|
|
80
|
+
var _this = _super.call(this, message) || this;
|
|
81
|
+
_this.statusCode = statusCode;
|
|
82
|
+
return _this;
|
|
83
|
+
}
|
|
84
|
+
return FirecrawlError;
|
|
85
|
+
}(Error));
|
|
86
|
+
exports.FirecrawlError = FirecrawlError;
|
|
87
|
+
/**
|
|
88
|
+
* Main class for interacting with the Firecrawl API.
|
|
89
|
+
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
90
|
+
*/
|
|
91
|
+
var FirecrawlApp = /** @class */ (function () {
|
|
92
|
+
/**
|
|
93
|
+
* Initializes a new instance of the FirecrawlApp class.
|
|
94
|
+
* @param config - Configuration options for the FirecrawlApp instance.
|
|
95
|
+
*/
|
|
96
|
+
function FirecrawlApp(_a) {
|
|
97
|
+
var _b = _a.apiKey, apiKey = _b === void 0 ? null : _b, _c = _a.apiUrl, apiUrl = _c === void 0 ? null : _c;
|
|
98
|
+
var baseUrl = apiUrl || "https://api.firecrawl.dev";
|
|
99
|
+
if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
|
|
100
|
+
throw new FirecrawlError("No API key provided", 401);
|
|
101
|
+
}
|
|
102
|
+
this.apiKey = apiKey || '';
|
|
103
|
+
this.apiUrl = baseUrl;
|
|
104
|
+
}
|
|
105
|
+
FirecrawlApp.prototype.isCloudService = function (url) {
|
|
106
|
+
return url.includes('api.firecrawl.dev');
|
|
107
|
+
};
|
|
108
|
+
/**
|
|
109
|
+
* Scrapes a URL using the Firecrawl API.
|
|
110
|
+
* @param url - The URL to scrape.
|
|
111
|
+
* @param params - Additional parameters for the scrape request.
|
|
112
|
+
* @returns The response from the scrape operation.
|
|
113
|
+
*/
|
|
114
|
+
FirecrawlApp.prototype.scrapeUrl = function (url, params) {
|
|
115
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
116
|
+
var headers, jsonData, schema, response, responseData, error_1;
|
|
117
|
+
var _a;
|
|
118
|
+
return __generator(this, function (_b) {
|
|
119
|
+
switch (_b.label) {
|
|
120
|
+
case 0:
|
|
121
|
+
headers = {
|
|
122
|
+
"Content-Type": "application/json",
|
|
123
|
+
Authorization: "Bearer ".concat(this.apiKey),
|
|
124
|
+
};
|
|
125
|
+
jsonData = __assign({ url: url }, params);
|
|
126
|
+
if ((_a = jsonData === null || jsonData === void 0 ? void 0 : jsonData.extract) === null || _a === void 0 ? void 0 : _a.schema) {
|
|
127
|
+
schema = jsonData.extract.schema;
|
|
128
|
+
// Try parsing the schema as a Zod schema
|
|
129
|
+
try {
|
|
130
|
+
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
|
131
|
+
}
|
|
132
|
+
catch (error) {
|
|
133
|
+
}
|
|
134
|
+
jsonData = __assign(__assign({}, jsonData), { extract: __assign(__assign({}, jsonData.extract), { schema: schema }) });
|
|
135
|
+
}
|
|
136
|
+
_b.label = 1;
|
|
137
|
+
case 1:
|
|
138
|
+
_b.trys.push([1, 3, , 4]);
|
|
139
|
+
return [4 /*yield*/, axios_1.default.post(this.apiUrl + "/v1/scrape", jsonData, { headers: headers })];
|
|
140
|
+
case 2:
|
|
141
|
+
response = _b.sent();
|
|
142
|
+
if (response.status === 200) {
|
|
143
|
+
responseData = response.data;
|
|
144
|
+
if (responseData.success) {
|
|
145
|
+
return [2 /*return*/, __assign({ success: true, warning: responseData.warning, error: responseData.error }, responseData.data)];
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
throw new FirecrawlError("Failed to scrape URL. Error: ".concat(responseData.error), response.status);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
this.handleError(response, "scrape URL");
|
|
153
|
+
}
|
|
154
|
+
return [3 /*break*/, 4];
|
|
155
|
+
case 3:
|
|
156
|
+
error_1 = _b.sent();
|
|
157
|
+
this.handleError(error_1.response, "scrape URL");
|
|
158
|
+
return [3 /*break*/, 4];
|
|
159
|
+
case 4: return [2 /*return*/, { success: false, error: "Internal server error." }];
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
});
|
|
163
|
+
};
|
|
164
|
+
/**
|
|
165
|
+
* Searches using the Firecrawl API and optionally scrapes the results.
|
|
166
|
+
* @param query - The search query string.
|
|
167
|
+
* @param params - Optional parameters for the search request.
|
|
168
|
+
* @returns The response from the search operation.
|
|
169
|
+
*/
|
|
170
|
+
FirecrawlApp.prototype.search = function (query, params) {
|
|
171
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
172
|
+
var headers, jsonData, schema, response, responseData, error_2;
|
|
173
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k;
|
|
174
|
+
return __generator(this, function (_l) {
|
|
175
|
+
switch (_l.label) {
|
|
176
|
+
case 0:
|
|
177
|
+
headers = {
|
|
178
|
+
"Content-Type": "application/json",
|
|
179
|
+
Authorization: "Bearer ".concat(this.apiKey),
|
|
180
|
+
};
|
|
181
|
+
jsonData = {
|
|
182
|
+
query: query,
|
|
183
|
+
limit: (_a = params === null || params === void 0 ? void 0 : params.limit) !== null && _a !== void 0 ? _a : 5,
|
|
184
|
+
tbs: params === null || params === void 0 ? void 0 : params.tbs,
|
|
185
|
+
filter: params === null || params === void 0 ? void 0 : params.filter,
|
|
186
|
+
lang: (_b = params === null || params === void 0 ? void 0 : params.lang) !== null && _b !== void 0 ? _b : "en",
|
|
187
|
+
country: (_c = params === null || params === void 0 ? void 0 : params.country) !== null && _c !== void 0 ? _c : "us",
|
|
188
|
+
location: params === null || params === void 0 ? void 0 : params.location,
|
|
189
|
+
origin: (_d = params === null || params === void 0 ? void 0 : params.origin) !== null && _d !== void 0 ? _d : "api",
|
|
190
|
+
timeout: (_e = params === null || params === void 0 ? void 0 : params.timeout) !== null && _e !== void 0 ? _e : 60000,
|
|
191
|
+
scrapeOptions: (_f = params === null || params === void 0 ? void 0 : params.scrapeOptions) !== null && _f !== void 0 ? _f : { formats: [] },
|
|
192
|
+
};
|
|
193
|
+
if ((_h = (_g = jsonData === null || jsonData === void 0 ? void 0 : jsonData.scrapeOptions) === null || _g === void 0 ? void 0 : _g.extract) === null || _h === void 0 ? void 0 : _h.schema) {
|
|
194
|
+
schema = jsonData.scrapeOptions.extract.schema;
|
|
195
|
+
// Try parsing the schema as a Zod schema
|
|
196
|
+
try {
|
|
197
|
+
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
|
198
|
+
}
|
|
199
|
+
catch (error) {
|
|
200
|
+
}
|
|
201
|
+
jsonData = __assign(__assign({}, jsonData), { scrapeOptions: __assign(__assign({}, jsonData.scrapeOptions), { extract: __assign(__assign({}, jsonData.scrapeOptions.extract), { schema: schema }) }) });
|
|
202
|
+
}
|
|
203
|
+
_l.label = 1;
|
|
204
|
+
case 1:
|
|
205
|
+
_l.trys.push([1, 3, , 4]);
|
|
206
|
+
return [4 /*yield*/, this.postRequest(this.apiUrl + "/v1/search", jsonData, headers)];
|
|
207
|
+
case 2:
|
|
208
|
+
response = _l.sent();
|
|
209
|
+
if (response.status === 200) {
|
|
210
|
+
responseData = response.data;
|
|
211
|
+
if (responseData.success) {
|
|
212
|
+
return [2 /*return*/, {
|
|
213
|
+
success: true,
|
|
214
|
+
data: responseData.data,
|
|
215
|
+
warning: responseData.warning,
|
|
216
|
+
}];
|
|
217
|
+
}
|
|
218
|
+
else {
|
|
219
|
+
throw new FirecrawlError("Failed to search. Error: ".concat(responseData.error), response.status);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
else {
|
|
223
|
+
this.handleError(response, "search");
|
|
224
|
+
}
|
|
225
|
+
return [3 /*break*/, 4];
|
|
226
|
+
case 3:
|
|
227
|
+
error_2 = _l.sent();
|
|
228
|
+
if ((_k = (_j = error_2.response) === null || _j === void 0 ? void 0 : _j.data) === null || _k === void 0 ? void 0 : _k.error) {
|
|
229
|
+
throw new FirecrawlError("Request failed with status code ".concat(error_2.response.status, ". Error: ").concat(error_2.response.data.error, " ").concat(error_2.response.data.details ? " - ".concat(JSON.stringify(error_2.response.data.details)) : ''), error_2.response.status);
|
|
230
|
+
}
|
|
231
|
+
else {
|
|
232
|
+
throw new FirecrawlError(error_2.message, 500);
|
|
233
|
+
}
|
|
234
|
+
return [3 /*break*/, 4];
|
|
235
|
+
case 4: return [2 /*return*/, { success: false, error: "Internal server error.", data: [] }];
|
|
236
|
+
}
|
|
237
|
+
});
|
|
238
|
+
});
|
|
239
|
+
};
|
|
240
|
+
/**
|
|
241
|
+
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
242
|
+
* @param url - The URL to crawl.
|
|
243
|
+
* @param params - Additional parameters for the crawl request.
|
|
244
|
+
* @param pollInterval - Time in seconds for job status checks.
|
|
245
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
246
|
+
* @returns The response from the crawl operation.
|
|
247
|
+
*/
|
|
248
|
+
FirecrawlApp.prototype.crawlUrl = function (url_1, params_1) {
|
|
249
|
+
return __awaiter(this, arguments, void 0, function (url, params, pollInterval, idempotencyKey) {
|
|
250
|
+
var headers, jsonData, response, id, error_3;
|
|
251
|
+
var _a, _b;
|
|
252
|
+
if (pollInterval === void 0) { pollInterval = 2; }
|
|
253
|
+
return __generator(this, function (_c) {
|
|
254
|
+
switch (_c.label) {
|
|
255
|
+
case 0:
|
|
256
|
+
headers = this.prepareHeaders(idempotencyKey);
|
|
257
|
+
jsonData = __assign({ url: url }, params);
|
|
258
|
+
_c.label = 1;
|
|
259
|
+
case 1:
|
|
260
|
+
_c.trys.push([1, 3, , 4]);
|
|
261
|
+
return [4 /*yield*/, this.postRequest(this.apiUrl + "/v1/crawl", jsonData, headers)];
|
|
262
|
+
case 2:
|
|
263
|
+
response = _c.sent();
|
|
264
|
+
if (response.status === 200) {
|
|
265
|
+
id = response.data.id;
|
|
266
|
+
return [2 /*return*/, this.monitorJobStatus(id, headers, pollInterval)];
|
|
267
|
+
}
|
|
268
|
+
else {
|
|
269
|
+
this.handleError(response, "start crawl job");
|
|
270
|
+
}
|
|
271
|
+
return [3 /*break*/, 4];
|
|
272
|
+
case 3:
|
|
273
|
+
error_3 = _c.sent();
|
|
274
|
+
if ((_b = (_a = error_3.response) === null || _a === void 0 ? void 0 : _a.data) === null || _b === void 0 ? void 0 : _b.error) {
|
|
275
|
+
throw new FirecrawlError("Request failed with status code ".concat(error_3.response.status, ". Error: ").concat(error_3.response.data.error, " ").concat(error_3.response.data.details ? " - ".concat(JSON.stringify(error_3.response.data.details)) : ''), error_3.response.status);
|
|
276
|
+
}
|
|
277
|
+
else {
|
|
278
|
+
throw new FirecrawlError(error_3.message, 500);
|
|
279
|
+
}
|
|
280
|
+
return [3 /*break*/, 4];
|
|
281
|
+
case 4: return [2 /*return*/, { success: false, error: "Internal server error." }];
|
|
282
|
+
}
|
|
283
|
+
});
|
|
284
|
+
});
|
|
285
|
+
};
|
|
286
|
+
FirecrawlApp.prototype.asyncCrawlUrl = function (url, params, idempotencyKey) {
|
|
287
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
288
|
+
var headers, jsonData, response, error_4;
|
|
289
|
+
var _a, _b;
|
|
290
|
+
return __generator(this, function (_c) {
|
|
291
|
+
switch (_c.label) {
|
|
292
|
+
case 0:
|
|
293
|
+
headers = this.prepareHeaders(idempotencyKey);
|
|
294
|
+
jsonData = __assign({ url: url }, params);
|
|
295
|
+
_c.label = 1;
|
|
296
|
+
case 1:
|
|
297
|
+
_c.trys.push([1, 3, , 4]);
|
|
298
|
+
return [4 /*yield*/, this.postRequest(this.apiUrl + "/v1/crawl", jsonData, headers)];
|
|
299
|
+
case 2:
|
|
300
|
+
response = _c.sent();
|
|
301
|
+
if (response.status === 200) {
|
|
302
|
+
return [2 /*return*/, response.data];
|
|
303
|
+
}
|
|
304
|
+
else {
|
|
305
|
+
this.handleError(response, "start crawl job");
|
|
306
|
+
}
|
|
307
|
+
return [3 /*break*/, 4];
|
|
308
|
+
case 3:
|
|
309
|
+
error_4 = _c.sent();
|
|
310
|
+
if ((_b = (_a = error_4.response) === null || _a === void 0 ? void 0 : _a.data) === null || _b === void 0 ? void 0 : _b.error) {
|
|
311
|
+
throw new FirecrawlError("Request failed with status code ".concat(error_4.response.status, ". Error: ").concat(error_4.response.data.error, " ").concat(error_4.response.data.details ? " - ".concat(JSON.stringify(error_4.response.data.details)) : ''), error_4.response.status);
|
|
312
|
+
}
|
|
313
|
+
else {
|
|
314
|
+
throw new FirecrawlError(error_4.message, 500);
|
|
315
|
+
}
|
|
316
|
+
return [3 /*break*/, 4];
|
|
317
|
+
case 4: return [2 /*return*/, { success: false, error: "Internal server error." }];
|
|
318
|
+
}
|
|
319
|
+
});
|
|
320
|
+
});
|
|
321
|
+
};
|
|
322
|
+
/**
|
|
323
|
+
* Checks the status of a crawl job using the Firecrawl API.
|
|
324
|
+
* @param id - The ID of the crawl operation.
|
|
325
|
+
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
326
|
+
* @returns The response containing the job status.
|
|
327
|
+
*/
|
|
328
|
+
FirecrawlApp.prototype.checkCrawlStatus = function (id_1) {
|
|
329
|
+
return __awaiter(this, arguments, void 0, function (id, getAllData) {
|
|
330
|
+
var headers, response, allData, statusData, data, resp, error_5;
|
|
331
|
+
if (getAllData === void 0) { getAllData = false; }
|
|
332
|
+
return __generator(this, function (_a) {
|
|
333
|
+
switch (_a.label) {
|
|
334
|
+
case 0:
|
|
335
|
+
if (!id) {
|
|
336
|
+
throw new FirecrawlError("No crawl ID provided", 400);
|
|
337
|
+
}
|
|
338
|
+
headers = this.prepareHeaders();
|
|
339
|
+
_a.label = 1;
|
|
340
|
+
case 1:
|
|
341
|
+
_a.trys.push([1, 9, , 10]);
|
|
342
|
+
return [4 /*yield*/, this.getRequest("".concat(this.apiUrl, "/v1/crawl/").concat(id), headers)];
|
|
343
|
+
case 2:
|
|
344
|
+
response = _a.sent();
|
|
345
|
+
if (!(response.status === 200)) return [3 /*break*/, 7];
|
|
346
|
+
allData = response.data.data;
|
|
347
|
+
if (!(getAllData && response.data.status === "completed")) return [3 /*break*/, 6];
|
|
348
|
+
statusData = response.data;
|
|
349
|
+
if (!("data" in statusData)) return [3 /*break*/, 6];
|
|
350
|
+
data = statusData.data;
|
|
351
|
+
_a.label = 3;
|
|
352
|
+
case 3:
|
|
353
|
+
if (!(typeof statusData === 'object' && 'next' in statusData)) return [3 /*break*/, 5];
|
|
354
|
+
if (data.length === 0) {
|
|
355
|
+
return [3 /*break*/, 5];
|
|
356
|
+
}
|
|
357
|
+
return [4 /*yield*/, this.getRequest(statusData.next, headers)];
|
|
358
|
+
case 4:
|
|
359
|
+
statusData = (_a.sent()).data;
|
|
360
|
+
data = data.concat(statusData.data);
|
|
361
|
+
return [3 /*break*/, 3];
|
|
362
|
+
case 5:
|
|
363
|
+
allData = data;
|
|
364
|
+
_a.label = 6;
|
|
365
|
+
case 6:
|
|
366
|
+
resp = {
|
|
367
|
+
success: response.data.success,
|
|
368
|
+
status: response.data.status,
|
|
369
|
+
total: response.data.total,
|
|
370
|
+
completed: response.data.completed,
|
|
371
|
+
creditsUsed: response.data.creditsUsed,
|
|
372
|
+
expiresAt: new Date(response.data.expiresAt),
|
|
373
|
+
data: allData
|
|
374
|
+
};
|
|
375
|
+
if (!response.data.success && response.data.error) {
|
|
376
|
+
resp = __assign(__assign({}, resp), { success: false, error: response.data.error });
|
|
377
|
+
}
|
|
378
|
+
if (response.data.next) {
|
|
379
|
+
resp.next = response.data.next;
|
|
380
|
+
}
|
|
381
|
+
return [2 /*return*/, resp];
|
|
382
|
+
case 7:
|
|
383
|
+
this.handleError(response, "check crawl status");
|
|
384
|
+
_a.label = 8;
|
|
385
|
+
case 8: return [3 /*break*/, 10];
|
|
386
|
+
case 9:
|
|
387
|
+
error_5 = _a.sent();
|
|
388
|
+
throw new FirecrawlError(error_5.message, 500);
|
|
389
|
+
case 10: return [2 /*return*/, { success: false, error: "Internal server error." }];
|
|
390
|
+
}
|
|
391
|
+
});
|
|
392
|
+
});
|
|
393
|
+
};
|
|
394
|
+
/**
|
|
395
|
+
* Cancels a crawl job using the Firecrawl API.
|
|
396
|
+
* @param id - The ID of the crawl operation.
|
|
397
|
+
* @returns The response from the cancel crawl operation.
|
|
398
|
+
*/
|
|
399
|
+
FirecrawlApp.prototype.cancelCrawl = function (id) {
|
|
400
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
401
|
+
var headers, response, error_6;
|
|
402
|
+
return __generator(this, function (_a) {
|
|
403
|
+
switch (_a.label) {
|
|
404
|
+
case 0:
|
|
405
|
+
headers = this.prepareHeaders();
|
|
406
|
+
_a.label = 1;
|
|
407
|
+
case 1:
|
|
408
|
+
_a.trys.push([1, 3, , 4]);
|
|
409
|
+
return [4 /*yield*/, this.deleteRequest("".concat(this.apiUrl, "/v1/crawl/").concat(id), headers)];
|
|
410
|
+
case 2:
|
|
411
|
+
response = _a.sent();
|
|
412
|
+
if (response.status === 200) {
|
|
413
|
+
return [2 /*return*/, response.data];
|
|
414
|
+
}
|
|
415
|
+
else {
|
|
416
|
+
this.handleError(response, "cancel crawl job");
|
|
417
|
+
}
|
|
418
|
+
return [3 /*break*/, 4];
|
|
419
|
+
case 3:
|
|
420
|
+
error_6 = _a.sent();
|
|
421
|
+
throw new FirecrawlError(error_6.message, 500);
|
|
422
|
+
case 4: return [2 /*return*/, { success: false, error: "Internal server error." }];
|
|
423
|
+
}
|
|
424
|
+
});
|
|
425
|
+
});
|
|
426
|
+
};
|
|
427
|
+
/**
|
|
428
|
+
* Initiates a crawl job and returns a CrawlWatcher to monitor the job via WebSocket.
|
|
429
|
+
* @param url - The URL to crawl.
|
|
430
|
+
* @param params - Additional parameters for the crawl request.
|
|
431
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
432
|
+
* @returns A CrawlWatcher instance to monitor the crawl job.
|
|
433
|
+
*/
|
|
434
|
+
FirecrawlApp.prototype.crawlUrlAndWatch = function (url, params, idempotencyKey) {
|
|
435
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
436
|
+
var crawl, id;
|
|
437
|
+
return __generator(this, function (_a) {
|
|
438
|
+
switch (_a.label) {
|
|
439
|
+
case 0: return [4 /*yield*/, this.asyncCrawlUrl(url, params, idempotencyKey)];
|
|
440
|
+
case 1:
|
|
441
|
+
crawl = _a.sent();
|
|
442
|
+
if (crawl.success && crawl.id) {
|
|
443
|
+
id = crawl.id;
|
|
444
|
+
return [2 /*return*/, new CrawlWatcher(id, this)];
|
|
445
|
+
}
|
|
446
|
+
throw new FirecrawlError("Crawl job failed to start", 400);
|
|
447
|
+
}
|
|
448
|
+
});
|
|
449
|
+
});
|
|
450
|
+
};
|
|
451
|
+
/**
|
|
452
|
+
* Maps a URL using the Firecrawl API.
|
|
453
|
+
* @param url - The URL to map.
|
|
454
|
+
* @param params - Additional parameters for the map request.
|
|
455
|
+
* @returns The response from the map operation.
|
|
456
|
+
*/
|
|
457
|
+
FirecrawlApp.prototype.mapUrl = function (url, params) {
|
|
458
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
459
|
+
var headers, jsonData, response, error_7;
|
|
460
|
+
return __generator(this, function (_a) {
|
|
461
|
+
switch (_a.label) {
|
|
462
|
+
case 0:
|
|
463
|
+
headers = this.prepareHeaders();
|
|
464
|
+
jsonData = __assign({ url: url }, params);
|
|
465
|
+
_a.label = 1;
|
|
466
|
+
case 1:
|
|
467
|
+
_a.trys.push([1, 3, , 4]);
|
|
468
|
+
return [4 /*yield*/, this.postRequest(this.apiUrl + "/v1/map", jsonData, headers)];
|
|
469
|
+
case 2:
|
|
470
|
+
response = _a.sent();
|
|
471
|
+
if (response.status === 200) {
|
|
472
|
+
return [2 /*return*/, response.data];
|
|
473
|
+
}
|
|
474
|
+
else {
|
|
475
|
+
this.handleError(response, "map");
|
|
476
|
+
}
|
|
477
|
+
return [3 /*break*/, 4];
|
|
478
|
+
case 3:
|
|
479
|
+
error_7 = _a.sent();
|
|
480
|
+
throw new FirecrawlError(error_7.message, 500);
|
|
481
|
+
case 4: return [2 /*return*/, { success: false, error: "Internal server error." }];
|
|
482
|
+
}
|
|
483
|
+
});
|
|
484
|
+
});
|
|
485
|
+
};
|
|
486
|
+
/**
|
|
487
|
+
* Initiates a batch scrape job for multiple URLs using the Firecrawl API.
|
|
488
|
+
* @param url - The URLs to scrape.
|
|
489
|
+
* @param params - Additional parameters for the scrape request.
|
|
490
|
+
* @param pollInterval - Time in seconds for job status checks.
|
|
491
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
492
|
+
* @param webhook - Optional webhook for the batch scrape.
|
|
493
|
+
* @returns The response from the crawl operation.
|
|
494
|
+
*/
|
|
495
|
+
FirecrawlApp.prototype.batchScrapeUrls = function (urls_1, params_1) {
|
|
496
|
+
return __awaiter(this, arguments, void 0, function (urls, params, pollInterval, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
497
|
+
var headers, jsonData, schema, response, id, error_8;
|
|
498
|
+
var _a, _b, _c;
|
|
499
|
+
if (pollInterval === void 0) { pollInterval = 2; }
|
|
500
|
+
return __generator(this, function (_d) {
|
|
501
|
+
switch (_d.label) {
|
|
502
|
+
case 0:
|
|
503
|
+
headers = this.prepareHeaders(idempotencyKey);
|
|
504
|
+
jsonData = __assign({ urls: urls, webhook: webhook, ignoreInvalidURLs: ignoreInvalidURLs }, params);
|
|
505
|
+
if ((_a = jsonData === null || jsonData === void 0 ? void 0 : jsonData.extract) === null || _a === void 0 ? void 0 : _a.schema) {
|
|
506
|
+
schema = jsonData.extract.schema;
|
|
507
|
+
// Try parsing the schema as a Zod schema
|
|
508
|
+
try {
|
|
509
|
+
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
|
510
|
+
}
|
|
511
|
+
catch (error) {
|
|
512
|
+
}
|
|
513
|
+
jsonData = __assign(__assign({}, jsonData), { extract: __assign(__assign({}, jsonData.extract), { schema: schema }) });
|
|
514
|
+
}
|
|
515
|
+
_d.label = 1;
|
|
516
|
+
case 1:
|
|
517
|
+
_d.trys.push([1, 3, , 4]);
|
|
518
|
+
return [4 /*yield*/, this.postRequest(this.apiUrl + "/v1/batch/scrape", jsonData, headers)];
|
|
519
|
+
case 2:
|
|
520
|
+
response = _d.sent();
|
|
521
|
+
if (response.status === 200) {
|
|
522
|
+
id = response.data.id;
|
|
523
|
+
return [2 /*return*/, this.monitorJobStatus(id, headers, pollInterval)];
|
|
524
|
+
}
|
|
525
|
+
else {
|
|
526
|
+
this.handleError(response, "start batch scrape job");
|
|
527
|
+
}
|
|
528
|
+
return [3 /*break*/, 4];
|
|
529
|
+
case 3:
|
|
530
|
+
error_8 = _d.sent();
|
|
531
|
+
if ((_c = (_b = error_8.response) === null || _b === void 0 ? void 0 : _b.data) === null || _c === void 0 ? void 0 : _c.error) {
|
|
532
|
+
throw new FirecrawlError("Request failed with status code ".concat(error_8.response.status, ". Error: ").concat(error_8.response.data.error, " ").concat(error_8.response.data.details ? " - ".concat(JSON.stringify(error_8.response.data.details)) : ''), error_8.response.status);
|
|
533
|
+
}
|
|
534
|
+
else {
|
|
535
|
+
throw new FirecrawlError(error_8.message, 500);
|
|
536
|
+
}
|
|
537
|
+
return [3 /*break*/, 4];
|
|
538
|
+
case 4: return [2 /*return*/, { success: false, error: "Internal server error." }];
|
|
539
|
+
}
|
|
540
|
+
});
|
|
541
|
+
});
|
|
542
|
+
};
|
|
543
|
+
FirecrawlApp.prototype.asyncBatchScrapeUrls = function (urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
544
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
545
|
+
var headers, jsonData, response, error_9;
|
|
546
|
+
var _a, _b;
|
|
547
|
+
return __generator(this, function (_c) {
|
|
548
|
+
switch (_c.label) {
|
|
549
|
+
case 0:
|
|
550
|
+
headers = this.prepareHeaders(idempotencyKey);
|
|
551
|
+
jsonData = __assign({ urls: urls, webhook: webhook, ignoreInvalidURLs: ignoreInvalidURLs }, (params !== null && params !== void 0 ? params : {}));
|
|
552
|
+
_c.label = 1;
|
|
553
|
+
case 1:
|
|
554
|
+
_c.trys.push([1, 3, , 4]);
|
|
555
|
+
return [4 /*yield*/, this.postRequest(this.apiUrl + "/v1/batch/scrape", jsonData, headers)];
|
|
556
|
+
case 2:
|
|
557
|
+
response = _c.sent();
|
|
558
|
+
if (response.status === 200) {
|
|
559
|
+
return [2 /*return*/, response.data];
|
|
560
|
+
}
|
|
561
|
+
else {
|
|
562
|
+
this.handleError(response, "start batch scrape job");
|
|
563
|
+
}
|
|
564
|
+
return [3 /*break*/, 4];
|
|
565
|
+
case 3:
|
|
566
|
+
error_9 = _c.sent();
|
|
567
|
+
if ((_b = (_a = error_9.response) === null || _a === void 0 ? void 0 : _a.data) === null || _b === void 0 ? void 0 : _b.error) {
|
|
568
|
+
throw new FirecrawlError("Request failed with status code ".concat(error_9.response.status, ". Error: ").concat(error_9.response.data.error, " ").concat(error_9.response.data.details ? " - ".concat(JSON.stringify(error_9.response.data.details)) : ''), error_9.response.status);
|
|
569
|
+
}
|
|
570
|
+
else {
|
|
571
|
+
throw new FirecrawlError(error_9.message, 500);
|
|
572
|
+
}
|
|
573
|
+
return [3 /*break*/, 4];
|
|
574
|
+
case 4: return [2 /*return*/, { success: false, error: "Internal server error." }];
|
|
575
|
+
}
|
|
576
|
+
});
|
|
577
|
+
});
|
|
578
|
+
};
|
|
579
|
+
/**
|
|
580
|
+
* Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
|
|
581
|
+
* @param urls - The URL to scrape.
|
|
582
|
+
* @param params - Additional parameters for the scrape request.
|
|
583
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
584
|
+
* @returns A CrawlWatcher instance to monitor the crawl job.
|
|
585
|
+
*/
|
|
586
|
+
FirecrawlApp.prototype.batchScrapeUrlsAndWatch = function (urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
587
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
588
|
+
var crawl, id;
|
|
589
|
+
return __generator(this, function (_a) {
|
|
590
|
+
switch (_a.label) {
|
|
591
|
+
case 0: return [4 /*yield*/, this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs)];
|
|
592
|
+
case 1:
|
|
593
|
+
crawl = _a.sent();
|
|
594
|
+
if (crawl.success && crawl.id) {
|
|
595
|
+
id = crawl.id;
|
|
596
|
+
return [2 /*return*/, new CrawlWatcher(id, this)];
|
|
597
|
+
}
|
|
598
|
+
throw new FirecrawlError("Batch scrape job failed to start", 400);
|
|
599
|
+
}
|
|
600
|
+
});
|
|
601
|
+
});
|
|
602
|
+
};
|
|
603
|
+
/**
|
|
604
|
+
* Checks the status of a batch scrape job using the Firecrawl API.
|
|
605
|
+
* @param id - The ID of the batch scrape operation.
|
|
606
|
+
* @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
|
|
607
|
+
* @returns The response containing the job status.
|
|
608
|
+
*/
|
|
609
|
+
FirecrawlApp.prototype.checkBatchScrapeStatus = function (id_1) {
|
|
610
|
+
return __awaiter(this, arguments, void 0, function (id, getAllData) {
|
|
611
|
+
var headers, response, allData, statusData, data, resp, error_10;
|
|
612
|
+
if (getAllData === void 0) { getAllData = false; }
|
|
613
|
+
return __generator(this, function (_a) {
|
|
614
|
+
switch (_a.label) {
|
|
615
|
+
case 0:
|
|
616
|
+
if (!id) {
|
|
617
|
+
throw new FirecrawlError("No batch scrape ID provided", 400);
|
|
618
|
+
}
|
|
619
|
+
headers = this.prepareHeaders();
|
|
620
|
+
_a.label = 1;
|
|
621
|
+
case 1:
|
|
622
|
+
_a.trys.push([1, 9, , 10]);
|
|
623
|
+
return [4 /*yield*/, this.getRequest("".concat(this.apiUrl, "/v1/batch/scrape/").concat(id), headers)];
|
|
624
|
+
case 2:
|
|
625
|
+
response = _a.sent();
|
|
626
|
+
if (!(response.status === 200)) return [3 /*break*/, 7];
|
|
627
|
+
allData = response.data.data;
|
|
628
|
+
if (!(getAllData && response.data.status === "completed")) return [3 /*break*/, 6];
|
|
629
|
+
statusData = response.data;
|
|
630
|
+
if (!("data" in statusData)) return [3 /*break*/, 6];
|
|
631
|
+
data = statusData.data;
|
|
632
|
+
_a.label = 3;
|
|
633
|
+
case 3:
|
|
634
|
+
if (!(typeof statusData === 'object' && 'next' in statusData)) return [3 /*break*/, 5];
|
|
635
|
+
if (data.length === 0) {
|
|
636
|
+
return [3 /*break*/, 5];
|
|
637
|
+
}
|
|
638
|
+
return [4 /*yield*/, this.getRequest(statusData.next, headers)];
|
|
639
|
+
case 4:
|
|
640
|
+
statusData = (_a.sent()).data;
|
|
641
|
+
data = data.concat(statusData.data);
|
|
642
|
+
return [3 /*break*/, 3];
|
|
643
|
+
case 5:
|
|
644
|
+
allData = data;
|
|
645
|
+
_a.label = 6;
|
|
646
|
+
case 6:
|
|
647
|
+
resp = {
|
|
648
|
+
success: response.data.success,
|
|
649
|
+
status: response.data.status,
|
|
650
|
+
total: response.data.total,
|
|
651
|
+
completed: response.data.completed,
|
|
652
|
+
creditsUsed: response.data.creditsUsed,
|
|
653
|
+
expiresAt: new Date(response.data.expiresAt),
|
|
654
|
+
data: allData
|
|
655
|
+
};
|
|
656
|
+
if (!response.data.success && response.data.error) {
|
|
657
|
+
resp = __assign(__assign({}, resp), { success: false, error: response.data.error });
|
|
658
|
+
}
|
|
659
|
+
if (response.data.next) {
|
|
660
|
+
resp.next = response.data.next;
|
|
661
|
+
}
|
|
662
|
+
return [2 /*return*/, resp];
|
|
663
|
+
case 7:
|
|
664
|
+
this.handleError(response, "check batch scrape status");
|
|
665
|
+
_a.label = 8;
|
|
666
|
+
case 8: return [3 /*break*/, 10];
|
|
667
|
+
case 9:
|
|
668
|
+
error_10 = _a.sent();
|
|
669
|
+
throw new FirecrawlError(error_10.message, 500);
|
|
670
|
+
case 10: return [2 /*return*/, { success: false, error: "Internal server error." }];
|
|
671
|
+
}
|
|
672
|
+
});
|
|
673
|
+
});
|
|
674
|
+
};
|
|
675
|
+
/**
|
|
676
|
+
* Extracts information from URLs using the Firecrawl API.
|
|
677
|
+
* Currently in Beta. Expect breaking changes on future minor versions.
|
|
678
|
+
* @param url - The URL to extract information from.
|
|
679
|
+
* @param params - Additional parameters for the extract request.
|
|
680
|
+
* @returns The response from the extract operation.
|
|
681
|
+
*/
|
|
682
|
+
FirecrawlApp.prototype.extract = function (urls, params) {
|
|
683
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
684
|
+
var headers, jsonData, jsonSchema, response, jobId, extractStatus, statusResponse, error_11;
|
|
685
|
+
return __generator(this, function (_a) {
|
|
686
|
+
switch (_a.label) {
|
|
687
|
+
case 0:
|
|
688
|
+
headers = this.prepareHeaders();
|
|
689
|
+
jsonData = __assign({ urls: urls }, params);
|
|
690
|
+
try {
|
|
691
|
+
if (!(params === null || params === void 0 ? void 0 : params.schema)) {
|
|
692
|
+
jsonSchema = undefined;
|
|
693
|
+
}
|
|
694
|
+
else if (params.schema instanceof zt.ZodType) {
|
|
695
|
+
jsonSchema = (0, zod_to_json_schema_1.zodToJsonSchema)(params.schema);
|
|
696
|
+
}
|
|
697
|
+
else {
|
|
698
|
+
jsonSchema = params.schema;
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
catch (error) {
|
|
702
|
+
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
|
703
|
+
}
|
|
704
|
+
_a.label = 1;
|
|
705
|
+
case 1:
|
|
706
|
+
_a.trys.push([1, 10, , 11]);
|
|
707
|
+
return [4 /*yield*/, this.postRequest(this.apiUrl + "/v1/extract", __assign(__assign({}, jsonData), { schema: jsonSchema }), headers)];
|
|
708
|
+
case 2:
|
|
709
|
+
response = _a.sent();
|
|
710
|
+
if (!(response.status === 200)) return [3 /*break*/, 8];
|
|
711
|
+
jobId = response.data.id;
|
|
712
|
+
extractStatus = void 0;
|
|
713
|
+
_a.label = 3;
|
|
714
|
+
case 3: return [4 /*yield*/, this.getRequest("".concat(this.apiUrl, "/v1/extract/").concat(jobId), headers)];
|
|
715
|
+
case 4:
|
|
716
|
+
statusResponse = _a.sent();
|
|
717
|
+
extractStatus = statusResponse.data;
|
|
718
|
+
if (extractStatus.status === "completed") {
|
|
719
|
+
if (extractStatus.success) {
|
|
720
|
+
return [2 /*return*/, {
|
|
721
|
+
success: true,
|
|
722
|
+
data: extractStatus.data,
|
|
723
|
+
warning: extractStatus.warning,
|
|
724
|
+
error: extractStatus.error
|
|
725
|
+
}];
|
|
726
|
+
}
|
|
727
|
+
else {
|
|
728
|
+
throw new FirecrawlError("Failed to extract data. Error: ".concat(extractStatus.error), statusResponse.status);
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
|
|
732
|
+
throw new FirecrawlError("Extract job ".concat(extractStatus.status, ". Error: ").concat(extractStatus.error), statusResponse.status);
|
|
733
|
+
}
|
|
734
|
+
return [4 /*yield*/, new Promise(function (resolve) { return setTimeout(resolve, 1000); })];
|
|
735
|
+
case 5:
|
|
736
|
+
_a.sent(); // Polling interval
|
|
737
|
+
_a.label = 6;
|
|
738
|
+
case 6:
|
|
739
|
+
if (extractStatus.status !== "completed") return [3 /*break*/, 3];
|
|
740
|
+
_a.label = 7;
|
|
741
|
+
case 7: return [3 /*break*/, 9];
|
|
742
|
+
case 8:
|
|
743
|
+
this.handleError(response, "extract");
|
|
744
|
+
_a.label = 9;
|
|
745
|
+
case 9: return [3 /*break*/, 11];
|
|
746
|
+
case 10:
|
|
747
|
+
error_11 = _a.sent();
|
|
748
|
+
throw new FirecrawlError(error_11.message, 500);
|
|
749
|
+
case 11: return [2 /*return*/, { success: false, error: "Internal server error." }];
|
|
750
|
+
}
|
|
751
|
+
});
|
|
752
|
+
});
|
|
753
|
+
};
|
|
754
|
+
/**
|
|
755
|
+
* Prepares the headers for an API request.
|
|
756
|
+
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
757
|
+
* @returns The prepared headers.
|
|
758
|
+
*/
|
|
759
|
+
FirecrawlApp.prototype.prepareHeaders = function (idempotencyKey) {
|
|
760
|
+
return __assign({ "Content-Type": "application/json", Authorization: "Bearer ".concat(this.apiKey) }, (idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}));
|
|
761
|
+
};
|
|
762
|
+
/**
|
|
763
|
+
* Sends a POST request to the specified URL.
|
|
764
|
+
* @param url - The URL to send the request to.
|
|
765
|
+
* @param data - The data to send in the request.
|
|
766
|
+
* @param headers - The headers for the request.
|
|
767
|
+
* @returns The response from the POST request.
|
|
768
|
+
*/
|
|
769
|
+
FirecrawlApp.prototype.postRequest = function (url, data, headers) {
|
|
770
|
+
return axios_1.default.post(url, data, { headers: headers });
|
|
771
|
+
};
|
|
772
|
+
/**
|
|
773
|
+
* Sends a GET request to the specified URL.
|
|
774
|
+
* @param url - The URL to send the request to.
|
|
775
|
+
* @param headers - The headers for the request.
|
|
776
|
+
* @returns The response from the GET request.
|
|
777
|
+
*/
|
|
778
|
+
FirecrawlApp.prototype.getRequest = function (url, headers) {
|
|
779
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
780
|
+
var error_12;
|
|
781
|
+
return __generator(this, function (_a) {
|
|
782
|
+
switch (_a.label) {
|
|
783
|
+
case 0:
|
|
784
|
+
_a.trys.push([0, 2, , 3]);
|
|
785
|
+
return [4 /*yield*/, axios_1.default.get(url, { headers: headers })];
|
|
786
|
+
case 1: return [2 /*return*/, _a.sent()];
|
|
787
|
+
case 2:
|
|
788
|
+
error_12 = _a.sent();
|
|
789
|
+
if (error_12 instanceof axios_1.AxiosError && error_12.response) {
|
|
790
|
+
return [2 /*return*/, error_12.response];
|
|
791
|
+
}
|
|
792
|
+
else {
|
|
793
|
+
throw error_12;
|
|
794
|
+
}
|
|
795
|
+
return [3 /*break*/, 3];
|
|
796
|
+
case 3: return [2 /*return*/];
|
|
797
|
+
}
|
|
798
|
+
});
|
|
799
|
+
});
|
|
800
|
+
};
|
|
801
|
+
/**
|
|
802
|
+
* Sends a DELETE request to the specified URL.
|
|
803
|
+
* @param url - The URL to send the request to.
|
|
804
|
+
* @param headers - The headers for the request.
|
|
805
|
+
* @returns The response from the DELETE request.
|
|
806
|
+
*/
|
|
807
|
+
FirecrawlApp.prototype.deleteRequest = function (url, headers) {
|
|
808
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
809
|
+
var error_13;
|
|
810
|
+
return __generator(this, function (_a) {
|
|
811
|
+
switch (_a.label) {
|
|
812
|
+
case 0:
|
|
813
|
+
_a.trys.push([0, 2, , 3]);
|
|
814
|
+
return [4 /*yield*/, axios_1.default.delete(url, { headers: headers })];
|
|
815
|
+
case 1: return [2 /*return*/, _a.sent()];
|
|
816
|
+
case 2:
|
|
817
|
+
error_13 = _a.sent();
|
|
818
|
+
if (error_13 instanceof axios_1.AxiosError && error_13.response) {
|
|
819
|
+
return [2 /*return*/, error_13.response];
|
|
820
|
+
}
|
|
821
|
+
else {
|
|
822
|
+
throw error_13;
|
|
823
|
+
}
|
|
824
|
+
return [3 /*break*/, 3];
|
|
825
|
+
case 3: return [2 /*return*/];
|
|
826
|
+
}
|
|
827
|
+
});
|
|
828
|
+
});
|
|
829
|
+
};
|
|
830
|
+
/**
|
|
831
|
+
* Monitors the status of a crawl job until completion or failure.
|
|
832
|
+
* @param id - The ID of the crawl operation.
|
|
833
|
+
* @param headers - The headers for the request.
|
|
834
|
+
* @param checkInterval - Interval in seconds for job status checks.
|
|
835
|
+
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
|
836
|
+
* @returns The final job status or data.
|
|
837
|
+
*/
|
|
838
|
+
FirecrawlApp.prototype.monitorJobStatus = function (id, headers, checkInterval) {
|
|
839
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
840
|
+
var statusResponse, statusData, data, error_14;
|
|
841
|
+
return __generator(this, function (_a) {
|
|
842
|
+
switch (_a.label) {
|
|
843
|
+
case 0:
|
|
844
|
+
_a.trys.push([0, 15, , 16]);
|
|
845
|
+
_a.label = 1;
|
|
846
|
+
case 1:
|
|
847
|
+
if (!true) return [3 /*break*/, 14];
|
|
848
|
+
return [4 /*yield*/, this.getRequest("".concat(this.apiUrl, "/v1/crawl/").concat(id), headers)];
|
|
849
|
+
case 2:
|
|
850
|
+
statusResponse = _a.sent();
|
|
851
|
+
if (!(statusResponse.status === 200)) return [3 /*break*/, 12];
|
|
852
|
+
statusData = statusResponse.data;
|
|
853
|
+
if (!(statusData.status === "completed")) return [3 /*break*/, 8];
|
|
854
|
+
if (!("data" in statusData)) return [3 /*break*/, 6];
|
|
855
|
+
data = statusData.data;
|
|
856
|
+
_a.label = 3;
|
|
857
|
+
case 3:
|
|
858
|
+
if (!(typeof statusData === 'object' && 'next' in statusData)) return [3 /*break*/, 5];
|
|
859
|
+
if (data.length === 0) {
|
|
860
|
+
return [3 /*break*/, 5];
|
|
861
|
+
}
|
|
862
|
+
return [4 /*yield*/, this.getRequest(statusData.next, headers)];
|
|
863
|
+
case 4:
|
|
864
|
+
statusResponse = _a.sent();
|
|
865
|
+
statusData = statusResponse.data;
|
|
866
|
+
data = data.concat(statusData.data);
|
|
867
|
+
return [3 /*break*/, 3];
|
|
868
|
+
case 5:
|
|
869
|
+
statusData.data = data;
|
|
870
|
+
return [2 /*return*/, statusData];
|
|
871
|
+
case 6: throw new FirecrawlError("Crawl job completed but no data was returned", 500);
|
|
872
|
+
case 7: return [3 /*break*/, 11];
|
|
873
|
+
case 8:
|
|
874
|
+
if (!["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) return [3 /*break*/, 10];
|
|
875
|
+
checkInterval = Math.max(checkInterval, 2);
|
|
876
|
+
return [4 /*yield*/, new Promise(function (resolve) {
|
|
877
|
+
return setTimeout(resolve, checkInterval * 1000);
|
|
878
|
+
})];
|
|
879
|
+
case 9:
|
|
880
|
+
_a.sent();
|
|
881
|
+
return [3 /*break*/, 11];
|
|
882
|
+
case 10: throw new FirecrawlError("Crawl job failed or was stopped. Status: ".concat(statusData.status), 500);
|
|
883
|
+
case 11: return [3 /*break*/, 13];
|
|
884
|
+
case 12:
|
|
885
|
+
this.handleError(statusResponse, "check crawl status");
|
|
886
|
+
_a.label = 13;
|
|
887
|
+
case 13: return [3 /*break*/, 1];
|
|
888
|
+
case 14: return [3 /*break*/, 16];
|
|
889
|
+
case 15:
|
|
890
|
+
error_14 = _a.sent();
|
|
891
|
+
throw new FirecrawlError(error_14, 500);
|
|
892
|
+
case 16: return [2 /*return*/];
|
|
893
|
+
}
|
|
894
|
+
});
|
|
895
|
+
});
|
|
896
|
+
};
|
|
897
|
+
/**
|
|
898
|
+
* Handles errors from API responses.
|
|
899
|
+
* @param {AxiosResponse} response - The response from the API.
|
|
900
|
+
* @param {string} action - The action being performed when the error occurred.
|
|
901
|
+
*/
|
|
902
|
+
FirecrawlApp.prototype.handleError = function (response, action) {
|
|
903
|
+
if ([402, 408, 409, 500].includes(response.status)) {
|
|
904
|
+
var errorMessage = response.data.error || "Unknown error occurred";
|
|
905
|
+
throw new FirecrawlError("Failed to ".concat(action, ". Status code: ").concat(response.status, ". Error: ").concat(errorMessage), response.status);
|
|
906
|
+
}
|
|
907
|
+
else {
|
|
908
|
+
throw new FirecrawlError("Unexpected error occurred while trying to ".concat(action, ". Status code: ").concat(response.status), response.status);
|
|
909
|
+
}
|
|
910
|
+
};
|
|
911
|
+
return FirecrawlApp;
|
|
912
|
+
}());
|
|
913
|
+
exports.default = FirecrawlApp;
|
|
914
|
+
var CrawlWatcher = /** @class */ (function (_super) {
|
|
915
|
+
__extends(CrawlWatcher, _super);
|
|
916
|
+
function CrawlWatcher(id, app) {
|
|
917
|
+
var _this = _super.call(this) || this;
|
|
918
|
+
_this.id = id;
|
|
919
|
+
_this.ws = new isows_1.WebSocket("".concat(app.apiUrl, "/v1/crawl/").concat(id), app.apiKey);
|
|
920
|
+
_this.status = "scraping";
|
|
921
|
+
_this.data = [];
|
|
922
|
+
var messageHandler = function (msg) {
|
|
923
|
+
var _a;
|
|
924
|
+
var _b;
|
|
925
|
+
if (msg.type === "done") {
|
|
926
|
+
_this.status = "completed";
|
|
927
|
+
_this.dispatchTypedEvent("done", new CustomEvent("done", {
|
|
928
|
+
detail: {
|
|
929
|
+
status: _this.status,
|
|
930
|
+
data: _this.data,
|
|
931
|
+
id: _this.id,
|
|
932
|
+
},
|
|
933
|
+
}));
|
|
934
|
+
}
|
|
935
|
+
else if (msg.type === "error") {
|
|
936
|
+
_this.status = "failed";
|
|
937
|
+
_this.dispatchTypedEvent("error", new CustomEvent("error", {
|
|
938
|
+
detail: {
|
|
939
|
+
status: _this.status,
|
|
940
|
+
data: _this.data,
|
|
941
|
+
error: msg.error,
|
|
942
|
+
id: _this.id,
|
|
943
|
+
},
|
|
944
|
+
}));
|
|
945
|
+
}
|
|
946
|
+
else if (msg.type === "catchup") {
|
|
947
|
+
_this.status = msg.data.status;
|
|
948
|
+
(_a = _this.data).push.apply(_a, ((_b = msg.data.data) !== null && _b !== void 0 ? _b : []));
|
|
949
|
+
for (var _i = 0, _c = _this.data; _i < _c.length; _i++) {
|
|
950
|
+
var doc = _c[_i];
|
|
951
|
+
_this.dispatchTypedEvent("document", new CustomEvent("document", {
|
|
952
|
+
detail: __assign(__assign({}, doc), { id: _this.id }),
|
|
953
|
+
}));
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
else if (msg.type === "document") {
|
|
957
|
+
_this.dispatchTypedEvent("document", new CustomEvent("document", {
|
|
958
|
+
detail: __assign(__assign({}, msg.data), { id: _this.id }),
|
|
959
|
+
}));
|
|
960
|
+
}
|
|
961
|
+
};
|
|
962
|
+
_this.ws.onmessage = (function (ev) {
|
|
963
|
+
if (typeof ev.data !== "string") {
|
|
964
|
+
_this.ws.close();
|
|
965
|
+
return;
|
|
966
|
+
}
|
|
967
|
+
try {
|
|
968
|
+
var msg = JSON.parse(ev.data);
|
|
969
|
+
messageHandler(msg);
|
|
970
|
+
}
|
|
971
|
+
catch (error) {
|
|
972
|
+
console.error("Error on message", error);
|
|
973
|
+
}
|
|
974
|
+
}).bind(_this);
|
|
975
|
+
_this.ws.onclose = (function (ev) {
|
|
976
|
+
try {
|
|
977
|
+
var msg = JSON.parse(ev.reason);
|
|
978
|
+
messageHandler(msg);
|
|
979
|
+
}
|
|
980
|
+
catch (error) {
|
|
981
|
+
console.error("Error on close", error);
|
|
982
|
+
}
|
|
983
|
+
}).bind(_this);
|
|
984
|
+
_this.ws.onerror = (function (_) {
|
|
985
|
+
_this.status = "failed";
|
|
986
|
+
_this.dispatchTypedEvent("error", new CustomEvent("error", {
|
|
987
|
+
detail: {
|
|
988
|
+
status: _this.status,
|
|
989
|
+
data: _this.data,
|
|
990
|
+
error: "WebSocket error",
|
|
991
|
+
id: _this.id,
|
|
992
|
+
},
|
|
993
|
+
}));
|
|
994
|
+
}).bind(_this);
|
|
995
|
+
return _this;
|
|
996
|
+
}
|
|
997
|
+
CrawlWatcher.prototype.close = function () {
|
|
998
|
+
this.ws.close();
|
|
999
|
+
};
|
|
1000
|
+
return CrawlWatcher;
|
|
1001
|
+
}(typescript_event_target_1.TypedEventTarget));
|
|
1002
|
+
exports.CrawlWatcher = CrawlWatcher;
|