@mendable/firecrawl-js 0.0.29-beta.5 → 0.0.29-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +141 -138
- package/package.json +9 -11
- package/src/__tests__/e2e_withAuth/index.test.ts +1 -1
- package/src/__tests__/index.test.ts +1 -1
- package/tsconfig.json +108 -34
- package/build/__tests__/e2e_withAuth/index.test.js +0 -138
- package/build/__tests__/index.test.js +0 -41
- package/dist-cjs/__tests__/e2e_withAuth/index.test.js +0 -135
- package/dist-cjs/__tests__/index.test.js +0 -38
- package/dist-cjs/index.js +0 -263
- package/tsconfig.cjs.json +0 -14
- package/types/__tests__/e2e_withAuth/index.test.d.ts +0 -1
- package/types/__tests__/index.test.d.ts +0 -1
package/build/index.js
CHANGED
|
@@ -1,15 +1,19 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
return (
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
4
9
|
};
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
|
10
|
+
import axios from "axios";
|
|
11
|
+
import { z } from "zod";
|
|
12
|
+
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
9
13
|
/**
|
|
10
14
|
* Main class for interacting with the Firecrawl API.
|
|
11
15
|
*/
|
|
12
|
-
class FirecrawlApp {
|
|
16
|
+
export default class FirecrawlApp {
|
|
13
17
|
/**
|
|
14
18
|
* Initializes a new instance of the FirecrawlApp class.
|
|
15
19
|
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
@@ -27,46 +31,42 @@ class FirecrawlApp {
|
|
|
27
31
|
* @param {Params | null} params - Additional parameters for the scrape request.
|
|
28
32
|
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
|
29
33
|
*/
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
if (params?.extractorOptions?.extractionSchema) {
|
|
37
|
-
let schema = params.extractorOptions.extractionSchema;
|
|
38
|
-
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
|
39
|
-
if (schema instanceof zod_1.z.ZodSchema) {
|
|
40
|
-
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
|
41
|
-
}
|
|
42
|
-
jsonData = {
|
|
43
|
-
...jsonData,
|
|
44
|
-
extractorOptions: {
|
|
45
|
-
...params.extractorOptions,
|
|
46
|
-
extractionSchema: schema,
|
|
47
|
-
mode: params.extractorOptions.mode || "llm-extraction",
|
|
48
|
-
},
|
|
34
|
+
scrapeUrl(url_1) {
|
|
35
|
+
return __awaiter(this, arguments, void 0, function* (url, params = null) {
|
|
36
|
+
var _a;
|
|
37
|
+
const headers = {
|
|
38
|
+
"Content-Type": "application/json",
|
|
39
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
49
40
|
};
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
41
|
+
let jsonData = Object.assign({ url }, params);
|
|
42
|
+
if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) {
|
|
43
|
+
let schema = params.extractorOptions.extractionSchema;
|
|
44
|
+
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
|
45
|
+
if (schema instanceof z.ZodSchema) {
|
|
46
|
+
schema = zodToJsonSchema(schema);
|
|
47
|
+
}
|
|
48
|
+
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
|
|
49
|
+
}
|
|
50
|
+
try {
|
|
51
|
+
const response = yield axios.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
|
|
52
|
+
if (response.status === 200) {
|
|
53
|
+
const responseData = response.data;
|
|
54
|
+
if (responseData.success) {
|
|
55
|
+
return responseData;
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
59
|
+
}
|
|
57
60
|
}
|
|
58
61
|
else {
|
|
59
|
-
|
|
62
|
+
this.handleError(response, "scrape URL");
|
|
60
63
|
}
|
|
61
64
|
}
|
|
62
|
-
|
|
63
|
-
|
|
65
|
+
catch (error) {
|
|
66
|
+
throw new Error(error.message);
|
|
64
67
|
}
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
throw new Error(error.message);
|
|
68
|
-
}
|
|
69
|
-
return { success: false, error: "Internal server error." };
|
|
68
|
+
return { success: false, error: "Internal server error." };
|
|
69
|
+
});
|
|
70
70
|
}
|
|
71
71
|
/**
|
|
72
72
|
* Searches for a query using the Firecrawl API.
|
|
@@ -74,34 +74,36 @@ class FirecrawlApp {
|
|
|
74
74
|
* @param {Params | null} params - Additional parameters for the search request.
|
|
75
75
|
* @returns {Promise<SearchResponse>} The response from the search operation.
|
|
76
76
|
*/
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
77
|
+
search(query_1) {
|
|
78
|
+
return __awaiter(this, arguments, void 0, function* (query, params = null) {
|
|
79
|
+
const headers = {
|
|
80
|
+
"Content-Type": "application/json",
|
|
81
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
82
|
+
};
|
|
83
|
+
let jsonData = { query };
|
|
84
|
+
if (params) {
|
|
85
|
+
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
|
86
|
+
}
|
|
87
|
+
try {
|
|
88
|
+
const response = yield axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
|
89
|
+
if (response.status === 200) {
|
|
90
|
+
const responseData = response.data;
|
|
91
|
+
if (responseData.success) {
|
|
92
|
+
return responseData;
|
|
93
|
+
}
|
|
94
|
+
else {
|
|
95
|
+
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
96
|
+
}
|
|
92
97
|
}
|
|
93
98
|
else {
|
|
94
|
-
|
|
99
|
+
this.handleError(response, "search");
|
|
95
100
|
}
|
|
96
101
|
}
|
|
97
|
-
|
|
98
|
-
|
|
102
|
+
catch (error) {
|
|
103
|
+
throw new Error(error.message);
|
|
99
104
|
}
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
throw new Error(error.message);
|
|
103
|
-
}
|
|
104
|
-
return { success: false, error: "Internal server error." };
|
|
105
|
+
return { success: false, error: "Internal server error." };
|
|
106
|
+
});
|
|
105
107
|
}
|
|
106
108
|
/**
|
|
107
109
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
@@ -112,75 +114,75 @@ class FirecrawlApp {
|
|
|
112
114
|
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
|
113
115
|
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
|
114
116
|
*/
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
117
|
+
crawlUrl(url_1) {
|
|
118
|
+
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
|
119
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
120
|
+
let jsonData = { url };
|
|
121
|
+
if (params) {
|
|
122
|
+
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
|
123
|
+
}
|
|
124
|
+
try {
|
|
125
|
+
const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
|
|
126
|
+
if (response.status === 200) {
|
|
127
|
+
const jobId = response.data.jobId;
|
|
128
|
+
if (waitUntilDone) {
|
|
129
|
+
return this.monitorJobStatus(jobId, headers, pollInterval);
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
return { success: true, jobId };
|
|
133
|
+
}
|
|
127
134
|
}
|
|
128
135
|
else {
|
|
129
|
-
|
|
136
|
+
this.handleError(response, "start crawl job");
|
|
130
137
|
}
|
|
131
138
|
}
|
|
132
|
-
|
|
133
|
-
|
|
139
|
+
catch (error) {
|
|
140
|
+
console.log(error);
|
|
141
|
+
throw new Error(error.message);
|
|
134
142
|
}
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
console.log(error);
|
|
138
|
-
throw new Error(error.message);
|
|
139
|
-
}
|
|
140
|
-
return { success: false, error: "Internal server error." };
|
|
143
|
+
return { success: false, error: "Internal server error." };
|
|
144
|
+
});
|
|
141
145
|
}
|
|
142
146
|
/**
|
|
143
147
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
144
148
|
* @param {string} jobId - The job ID of the crawl operation.
|
|
145
149
|
* @returns {Promise<JobStatusResponse>} The response containing the job status.
|
|
146
150
|
*/
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
151
|
+
checkCrawlStatus(jobId) {
|
|
152
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
153
|
+
const headers = this.prepareHeaders();
|
|
154
|
+
try {
|
|
155
|
+
const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
|
156
|
+
if (response.status === 200) {
|
|
157
|
+
return {
|
|
158
|
+
success: true,
|
|
159
|
+
status: response.data.status,
|
|
160
|
+
data: response.data.data,
|
|
161
|
+
partial_data: !response.data.data
|
|
162
|
+
? response.data.partial_data
|
|
163
|
+
: undefined,
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
else {
|
|
167
|
+
this.handleError(response, "check crawl status");
|
|
168
|
+
}
|
|
160
169
|
}
|
|
161
|
-
|
|
162
|
-
|
|
170
|
+
catch (error) {
|
|
171
|
+
throw new Error(error.message);
|
|
163
172
|
}
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
status: "unknown",
|
|
171
|
-
error: "Internal server error.",
|
|
172
|
-
};
|
|
173
|
+
return {
|
|
174
|
+
success: false,
|
|
175
|
+
status: "unknown",
|
|
176
|
+
error: "Internal server error.",
|
|
177
|
+
};
|
|
178
|
+
});
|
|
173
179
|
}
|
|
174
180
|
/**
|
|
175
181
|
* Prepares the headers for an API request.
|
|
176
182
|
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
177
183
|
*/
|
|
178
184
|
prepareHeaders(idempotencyKey) {
|
|
179
|
-
return {
|
|
180
|
-
"Content-Type": "application/json",
|
|
181
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
182
|
-
...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
|
|
183
|
-
};
|
|
185
|
+
return Object.assign({ "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, (idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}));
|
|
184
186
|
}
|
|
185
187
|
/**
|
|
186
188
|
* Sends a POST request to the specified URL.
|
|
@@ -190,7 +192,7 @@ class FirecrawlApp {
|
|
|
190
192
|
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
|
191
193
|
*/
|
|
192
194
|
postRequest(url, data, headers) {
|
|
193
|
-
return
|
|
195
|
+
return axios.post(url, data, { headers });
|
|
194
196
|
}
|
|
195
197
|
/**
|
|
196
198
|
* Sends a GET request to the specified URL.
|
|
@@ -199,7 +201,7 @@ class FirecrawlApp {
|
|
|
199
201
|
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
|
200
202
|
*/
|
|
201
203
|
getRequest(url, headers) {
|
|
202
|
-
return
|
|
204
|
+
return axios.get(url, { headers });
|
|
203
205
|
}
|
|
204
206
|
/**
|
|
205
207
|
* Monitors the status of a crawl job until completion or failure.
|
|
@@ -208,33 +210,35 @@ class FirecrawlApp {
|
|
|
208
210
|
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
209
211
|
* @returns {Promise<any>} The final job status or data.
|
|
210
212
|
*/
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
if ("
|
|
218
|
-
|
|
213
|
+
monitorJobStatus(jobId, headers, checkInterval) {
|
|
214
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
215
|
+
while (true) {
|
|
216
|
+
const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
|
217
|
+
if (statusResponse.status === 200) {
|
|
218
|
+
const statusData = statusResponse.data;
|
|
219
|
+
if (statusData.status === "completed") {
|
|
220
|
+
if ("data" in statusData) {
|
|
221
|
+
return statusData.data;
|
|
222
|
+
}
|
|
223
|
+
else {
|
|
224
|
+
throw new Error("Crawl job completed but no data was returned");
|
|
225
|
+
}
|
|
219
226
|
}
|
|
220
|
-
else {
|
|
221
|
-
|
|
227
|
+
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
|
|
228
|
+
if (checkInterval < 2) {
|
|
229
|
+
checkInterval = 2;
|
|
230
|
+
}
|
|
231
|
+
yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
|
|
222
232
|
}
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
if (checkInterval < 2) {
|
|
226
|
-
checkInterval = 2;
|
|
233
|
+
else {
|
|
234
|
+
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
|
227
235
|
}
|
|
228
|
-
await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
|
|
229
236
|
}
|
|
230
237
|
else {
|
|
231
|
-
|
|
238
|
+
this.handleError(statusResponse, "check crawl status");
|
|
232
239
|
}
|
|
233
240
|
}
|
|
234
|
-
|
|
235
|
-
this.handleError(statusResponse, "check crawl status");
|
|
236
|
-
}
|
|
237
|
-
}
|
|
241
|
+
});
|
|
238
242
|
}
|
|
239
243
|
/**
|
|
240
244
|
* Handles errors from API responses.
|
|
@@ -251,4 +255,3 @@ class FirecrawlApp {
|
|
|
251
255
|
}
|
|
252
256
|
}
|
|
253
257
|
}
|
|
254
|
-
exports.default = FirecrawlApp;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mendable/firecrawl-js",
|
|
3
|
-
"version": "0.0.29-beta.
|
|
3
|
+
"version": "0.0.29-beta.7",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
5
|
"main": "build/index.js",
|
|
6
6
|
"types": "types/index.d.ts",
|
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
"scripts": {
|
|
9
9
|
"build": "tsc",
|
|
10
10
|
"build-and-publish": "npm run build && npm publish --access public",
|
|
11
|
-
"build-cjs": "tsc --outDir dist-cjs/ --project tsconfig.cjs.json",
|
|
12
11
|
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
13
12
|
"test": "jest src/__tests__/**/*.test.ts"
|
|
14
13
|
},
|
|
@@ -19,7 +18,6 @@
|
|
|
19
18
|
"author": "Mendable.ai",
|
|
20
19
|
"license": "MIT",
|
|
21
20
|
"dependencies": {
|
|
22
|
-
"@tsconfig/recommended": "^1.0.6",
|
|
23
21
|
"axios": "^1.6.8",
|
|
24
22
|
"dotenv": "^16.4.5",
|
|
25
23
|
"uuid": "^9.0.1",
|
|
@@ -40,13 +38,7 @@
|
|
|
40
38
|
"@types/uuid": "^9.0.8",
|
|
41
39
|
"jest": "^29.7.0",
|
|
42
40
|
"ts-jest": "^29.1.2",
|
|
43
|
-
"typescript": "
|
|
44
|
-
},
|
|
45
|
-
"exports": {
|
|
46
|
-
".": {
|
|
47
|
-
"import": "./build/index.js",
|
|
48
|
-
"require": "./dist-cjs/index.js"
|
|
49
|
-
}
|
|
41
|
+
"typescript": "^5.4.5"
|
|
50
42
|
},
|
|
51
43
|
"keywords": [
|
|
52
44
|
"firecrawl",
|
|
@@ -56,5 +48,11 @@
|
|
|
56
48
|
"scraper",
|
|
57
49
|
"api",
|
|
58
50
|
"sdk"
|
|
59
|
-
]
|
|
51
|
+
],
|
|
52
|
+
"exports": {
|
|
53
|
+
".": {
|
|
54
|
+
"import": "./build/index.js"
|
|
55
|
+
},
|
|
56
|
+
"./types": "./types/index.d.ts"
|
|
57
|
+
}
|
|
60
58
|
}
|
package/tsconfig.json
CHANGED
|
@@ -1,37 +1,111 @@
|
|
|
1
1
|
{
|
|
2
|
-
"extends": "@tsconfig/recommended",
|
|
3
2
|
"compilerOptions": {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
"
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
"
|
|
16
|
-
"
|
|
17
|
-
"
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
"
|
|
3
|
+
/* Visit https://aka.ms/tsconfig to read more about this file */
|
|
4
|
+
|
|
5
|
+
/* Projects */
|
|
6
|
+
// "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
|
|
7
|
+
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
|
|
8
|
+
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
|
|
9
|
+
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
|
|
10
|
+
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
|
|
11
|
+
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
|
|
12
|
+
|
|
13
|
+
/* Language and Environment */
|
|
14
|
+
"target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
|
|
15
|
+
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
|
|
16
|
+
// "jsx": "preserve", /* Specify what JSX code is generated. */
|
|
17
|
+
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
|
|
18
|
+
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
|
|
19
|
+
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
|
|
20
|
+
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
|
|
21
|
+
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
|
|
22
|
+
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
|
|
23
|
+
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
|
|
24
|
+
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
|
|
25
|
+
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
|
|
26
|
+
|
|
27
|
+
/* Modules */
|
|
28
|
+
"module": "NodeNext", /* Specify what module code is generated. */
|
|
29
|
+
"rootDir": "./src", /* Specify the root folder within your source files. */
|
|
30
|
+
"moduleResolution": "nodenext", /* Specify how TypeScript looks up a file from a given module specifier. */
|
|
31
|
+
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
|
|
32
|
+
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
|
|
33
|
+
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
|
|
34
|
+
// "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
|
|
35
|
+
// "types": [], /* Specify type package names to be included without being referenced in a source file. */
|
|
36
|
+
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
|
|
37
|
+
// "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
|
|
38
|
+
// "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
|
|
39
|
+
// "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */
|
|
40
|
+
// "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */
|
|
41
|
+
// "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
|
|
42
|
+
// "resolveJsonModule": true, /* Enable importing .json files. */
|
|
43
|
+
// "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */
|
|
44
|
+
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
|
|
45
|
+
|
|
46
|
+
/* JavaScript Support */
|
|
47
|
+
// "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
|
|
48
|
+
// "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
|
|
49
|
+
// "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
|
|
50
|
+
|
|
51
|
+
/* Emit */
|
|
52
|
+
"declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
|
|
53
|
+
// "declarationMap": true, /* Create sourcemaps for d.ts files. */
|
|
54
|
+
// "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
|
|
55
|
+
// "sourceMap": true, /* Create source map files for emitted JavaScript files. */
|
|
56
|
+
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
|
|
57
|
+
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
|
|
58
|
+
"outDir": "./build", /* Specify an output folder for all emitted files. */
|
|
59
|
+
// "removeComments": true, /* Disable emitting comments. */
|
|
60
|
+
// "noEmit": true, /* Disable emitting files from a compilation. */
|
|
61
|
+
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
|
|
62
|
+
// "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types. */
|
|
63
|
+
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
|
|
64
|
+
// "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
|
|
65
|
+
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
|
|
66
|
+
// "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
|
|
67
|
+
// "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
|
|
68
|
+
// "newLine": "crlf", /* Set the newline character for emitting files. */
|
|
69
|
+
// "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
|
|
70
|
+
// "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
|
|
71
|
+
// "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
|
|
72
|
+
// "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
|
|
73
|
+
"declarationDir": "./types", /* Specify the output directory for generated declaration files. */
|
|
74
|
+
// "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */
|
|
75
|
+
|
|
76
|
+
/* Interop Constraints */
|
|
77
|
+
// "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
|
|
78
|
+
// "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
|
|
79
|
+
// "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
|
|
80
|
+
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
|
|
81
|
+
// "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
|
|
82
|
+
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
|
|
83
|
+
|
|
84
|
+
/* Type Checking */
|
|
85
|
+
"strict": true, /* Enable all strict type-checking options. */
|
|
86
|
+
// "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
|
|
87
|
+
// "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
|
|
88
|
+
// "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
|
|
89
|
+
// "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
|
|
90
|
+
// "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
|
|
91
|
+
// "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
|
|
92
|
+
// "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
|
|
93
|
+
// "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
|
|
94
|
+
// "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
|
|
95
|
+
// "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
|
|
96
|
+
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
|
|
97
|
+
// "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
|
|
98
|
+
// "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
|
|
99
|
+
// "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
|
|
100
|
+
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
|
|
101
|
+
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
|
|
102
|
+
// "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
|
|
103
|
+
// "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
|
|
104
|
+
|
|
105
|
+
/* Completeness */
|
|
106
|
+
// "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
|
|
107
|
+
"skipLibCheck": true /* Skip type checking all .d.ts files. */
|
|
24
108
|
},
|
|
25
|
-
"include": [
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
"exclude": [
|
|
29
|
-
"node_modules",
|
|
30
|
-
"dist",
|
|
31
|
-
"docs"
|
|
32
|
-
],
|
|
33
|
-
".": {
|
|
34
|
-
"import": "./build/index.js",
|
|
35
|
-
"require": "./dist-cjs/index.js"
|
|
36
|
-
}
|
|
37
|
-
}
|
|
109
|
+
"include": ["src/**/*"],
|
|
110
|
+
"exclude": ["node_modules", "dist", "**/__tests__/*"]
|
|
111
|
+
}
|
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const index_1 = __importDefault(require("../../index"));
|
|
7
|
-
const uuid_1 = require("uuid");
|
|
8
|
-
const dotenv_1 = __importDefault(require("dotenv"));
|
|
9
|
-
dotenv_1.default.config();
|
|
10
|
-
const TEST_API_KEY = process.env.TEST_API_KEY;
|
|
11
|
-
const API_URL = "http://127.0.0.1:3002";
|
|
12
|
-
describe('FirecrawlApp E2E Tests', () => {
|
|
13
|
-
test.concurrent('should throw error for no API key', () => {
|
|
14
|
-
expect(() => {
|
|
15
|
-
new index_1.default({ apiKey: null, apiUrl: API_URL });
|
|
16
|
-
}).toThrow("No API key provided");
|
|
17
|
-
});
|
|
18
|
-
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
|
19
|
-
const invalidApp = new index_1.default({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
20
|
-
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
|
21
|
-
});
|
|
22
|
-
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
|
23
|
-
const app = new index_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
24
|
-
const blocklistedUrl = "https://facebook.com/fake-test";
|
|
25
|
-
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
|
26
|
-
});
|
|
27
|
-
test.concurrent('should return successful response with valid preview token', async () => {
|
|
28
|
-
const app = new index_1.default({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
|
29
|
-
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
|
30
|
-
expect(response).not.toBeNull();
|
|
31
|
-
expect(response.data?.content).toContain("_Roast_");
|
|
32
|
-
}, 30000); // 30 seconds timeout
|
|
33
|
-
test.concurrent('should return successful response for valid scrape', async () => {
|
|
34
|
-
const app = new index_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
35
|
-
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
|
36
|
-
expect(response).not.toBeNull();
|
|
37
|
-
expect(response.data?.content).toContain("_Roast_");
|
|
38
|
-
expect(response.data).toHaveProperty('markdown');
|
|
39
|
-
expect(response.data).toHaveProperty('metadata');
|
|
40
|
-
expect(response.data).not.toHaveProperty('html');
|
|
41
|
-
}, 30000); // 30 seconds timeout
|
|
42
|
-
test.concurrent('should return successful response with valid API key and include HTML', async () => {
|
|
43
|
-
const app = new index_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
44
|
-
const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } });
|
|
45
|
-
expect(response).not.toBeNull();
|
|
46
|
-
expect(response.data?.content).toContain("_Roast_");
|
|
47
|
-
expect(response.data?.markdown).toContain("_Roast_");
|
|
48
|
-
expect(response.data?.html).toContain("<h1");
|
|
49
|
-
}, 30000); // 30 seconds timeout
|
|
50
|
-
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
|
|
51
|
-
const app = new index_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
52
|
-
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
|
|
53
|
-
expect(response).not.toBeNull();
|
|
54
|
-
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
55
|
-
}, 30000); // 30 seconds timeout
|
|
56
|
-
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
|
57
|
-
const app = new index_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
58
|
-
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
|
|
59
|
-
expect(response).not.toBeNull();
|
|
60
|
-
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
61
|
-
}, 30000); // 30 seconds timeout
|
|
62
|
-
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
|
63
|
-
const invalidApp = new index_1.default({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
64
|
-
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
|
65
|
-
});
|
|
66
|
-
test.concurrent('should throw error for blocklisted URL on crawl', async () => {
|
|
67
|
-
const app = new index_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
68
|
-
const blocklistedUrl = "https://twitter.com/fake-test";
|
|
69
|
-
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
|
70
|
-
});
|
|
71
|
-
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
|
72
|
-
const app = new index_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
73
|
-
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
|
|
74
|
-
expect(response).not.toBeNull();
|
|
75
|
-
expect(response[0].content).toContain("_Roast_");
|
|
76
|
-
}, 60000); // 60 seconds timeout
|
|
77
|
-
test.concurrent('should handle idempotency key for crawl', async () => {
|
|
78
|
-
const app = new index_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
79
|
-
const uniqueIdempotencyKey = (0, uuid_1.v4)();
|
|
80
|
-
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
|
|
81
|
-
expect(response).not.toBeNull();
|
|
82
|
-
expect(response.jobId).toBeDefined();
|
|
83
|
-
await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
|
84
|
-
});
|
|
85
|
-
test.concurrent('should check crawl status', async () => {
|
|
86
|
-
const app = new index_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
87
|
-
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false);
|
|
88
|
-
expect(response).not.toBeNull();
|
|
89
|
-
expect(response.jobId).toBeDefined();
|
|
90
|
-
let statusResponse = await app.checkCrawlStatus(response.jobId);
|
|
91
|
-
const maxChecks = 15;
|
|
92
|
-
let checks = 0;
|
|
93
|
-
while (statusResponse.status === 'active' && checks < maxChecks) {
|
|
94
|
-
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
95
|
-
expect(statusResponse.partial_data).not.toBeNull();
|
|
96
|
-
statusResponse = await app.checkCrawlStatus(response.jobId);
|
|
97
|
-
checks++;
|
|
98
|
-
}
|
|
99
|
-
expect(statusResponse).not.toBeNull();
|
|
100
|
-
expect(statusResponse.status).toBe('completed');
|
|
101
|
-
expect(statusResponse?.data?.length).toBeGreaterThan(0);
|
|
102
|
-
}, 35000); // 35 seconds timeout
|
|
103
|
-
test.concurrent('should return successful response for search', async () => {
|
|
104
|
-
const app = new index_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
105
|
-
const response = await app.search("test query");
|
|
106
|
-
expect(response).not.toBeNull();
|
|
107
|
-
expect(response?.data?.[0]?.content).toBeDefined();
|
|
108
|
-
expect(response?.data?.length).toBeGreaterThan(2);
|
|
109
|
-
}, 30000); // 30 seconds timeout
|
|
110
|
-
test.concurrent('should throw error for invalid API key on search', async () => {
|
|
111
|
-
const invalidApp = new index_1.default({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
112
|
-
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
|
|
113
|
-
});
|
|
114
|
-
test.concurrent('should perform LLM extraction', async () => {
|
|
115
|
-
const app = new index_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
116
|
-
const response = await app.scrapeUrl("https://mendable.ai", {
|
|
117
|
-
extractorOptions: {
|
|
118
|
-
mode: 'llm-extraction',
|
|
119
|
-
extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
|
120
|
-
extractionSchema: {
|
|
121
|
-
type: 'object',
|
|
122
|
-
properties: {
|
|
123
|
-
company_mission: { type: 'string' },
|
|
124
|
-
supports_sso: { type: 'boolean' },
|
|
125
|
-
is_open_source: { type: 'boolean' }
|
|
126
|
-
},
|
|
127
|
-
required: ['company_mission', 'supports_sso', 'is_open_source']
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
});
|
|
131
|
-
expect(response).not.toBeNull();
|
|
132
|
-
expect(response.data?.llm_extraction).toBeDefined();
|
|
133
|
-
const llmExtraction = response.data?.llm_extraction;
|
|
134
|
-
expect(llmExtraction?.company_mission).toBeDefined();
|
|
135
|
-
expect(typeof llmExtraction?.supports_sso).toBe('boolean');
|
|
136
|
-
expect(typeof llmExtraction?.is_open_source).toBe('boolean');
|
|
137
|
-
}, 30000); // 30 seconds timeout
|
|
138
|
-
});
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
const globals_1 = require("@jest/globals");
|
|
7
|
-
const axios_1 = __importDefault(require("axios"));
|
|
8
|
-
const index_1 = __importDefault(require("../index"));
|
|
9
|
-
const promises_1 = require("fs/promises");
|
|
10
|
-
const path_1 = require("path");
|
|
11
|
-
// Mock jest and set the type
|
|
12
|
-
globals_1.jest.mock('axios');
|
|
13
|
-
const mockedAxios = axios_1.default;
|
|
14
|
-
// Get the fixure data from the JSON file in ./fixtures
|
|
15
|
-
async function loadFixture(name) {
|
|
16
|
-
return await (0, promises_1.readFile)((0, path_1.join)(__dirname, 'fixtures', `${name}.json`), 'utf-8');
|
|
17
|
-
}
|
|
18
|
-
(0, globals_1.describe)('the firecrawl JS SDK', () => {
|
|
19
|
-
(0, globals_1.test)('Should require an API key to instantiate FirecrawlApp', async () => {
|
|
20
|
-
const fn = () => {
|
|
21
|
-
new index_1.default({ apiKey: undefined });
|
|
22
|
-
};
|
|
23
|
-
(0, globals_1.expect)(fn).toThrow('No API key provided');
|
|
24
|
-
});
|
|
25
|
-
(0, globals_1.test)('Should return scraped data from a /scrape API call', async () => {
|
|
26
|
-
const mockData = await loadFixture('scrape');
|
|
27
|
-
mockedAxios.post.mockResolvedValue({
|
|
28
|
-
status: 200,
|
|
29
|
-
data: JSON.parse(mockData),
|
|
30
|
-
});
|
|
31
|
-
const apiKey = 'YOUR_API_KEY';
|
|
32
|
-
const app = new index_1.default({ apiKey });
|
|
33
|
-
// Scrape a single URL
|
|
34
|
-
const url = 'https://mendable.ai';
|
|
35
|
-
const scrapedData = await app.scrapeUrl(url);
|
|
36
|
-
(0, globals_1.expect)(mockedAxios.post).toHaveBeenCalledTimes(1);
|
|
37
|
-
(0, globals_1.expect)(mockedAxios.post).toHaveBeenCalledWith(globals_1.expect.stringMatching(/^https:\/\/api.firecrawl.dev/), globals_1.expect.objectContaining({ url }), globals_1.expect.objectContaining({ headers: globals_1.expect.objectContaining({ 'Authorization': `Bearer ${apiKey}` }) }));
|
|
38
|
-
(0, globals_1.expect)(scrapedData.success).toBe(true);
|
|
39
|
-
(0, globals_1.expect)(scrapedData?.data?.metadata.title).toEqual('Mendable');
|
|
40
|
-
});
|
|
41
|
-
});
|
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const index_js_1 = require("../../index.js");
|
|
4
|
-
const uuid_1 = require("uuid");
|
|
5
|
-
const dotenv_1 = require("dotenv");
|
|
6
|
-
dotenv_1.default.config();
|
|
7
|
-
const TEST_API_KEY = process.env.TEST_API_KEY;
|
|
8
|
-
const API_URL = "http://127.0.0.1:3002";
|
|
9
|
-
describe('FirecrawlApp E2E Tests', () => {
|
|
10
|
-
test.concurrent('should throw error for no API key', () => {
|
|
11
|
-
expect(() => {
|
|
12
|
-
new index_js_1.default({ apiKey: null, apiUrl: API_URL });
|
|
13
|
-
}).toThrow("No API key provided");
|
|
14
|
-
});
|
|
15
|
-
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
|
16
|
-
const invalidApp = new index_js_1.default({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
17
|
-
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
|
18
|
-
});
|
|
19
|
-
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
|
20
|
-
const app = new index_js_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
21
|
-
const blocklistedUrl = "https://facebook.com/fake-test";
|
|
22
|
-
await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
|
23
|
-
});
|
|
24
|
-
test.concurrent('should return successful response with valid preview token', async () => {
|
|
25
|
-
const app = new index_js_1.default({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
|
26
|
-
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
|
27
|
-
expect(response).not.toBeNull();
|
|
28
|
-
expect(response.data?.content).toContain("_Roast_");
|
|
29
|
-
}, 30000); // 30 seconds timeout
|
|
30
|
-
test.concurrent('should return successful response for valid scrape', async () => {
|
|
31
|
-
const app = new index_js_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
32
|
-
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
|
33
|
-
expect(response).not.toBeNull();
|
|
34
|
-
expect(response.data?.content).toContain("_Roast_");
|
|
35
|
-
expect(response.data).toHaveProperty('markdown');
|
|
36
|
-
expect(response.data).toHaveProperty('metadata');
|
|
37
|
-
expect(response.data).not.toHaveProperty('html');
|
|
38
|
-
}, 30000); // 30 seconds timeout
|
|
39
|
-
test.concurrent('should return successful response with valid API key and include HTML', async () => {
|
|
40
|
-
const app = new index_js_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
41
|
-
const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } });
|
|
42
|
-
expect(response).not.toBeNull();
|
|
43
|
-
expect(response.data?.content).toContain("_Roast_");
|
|
44
|
-
expect(response.data?.markdown).toContain("_Roast_");
|
|
45
|
-
expect(response.data?.html).toContain("<h1");
|
|
46
|
-
}, 30000); // 30 seconds timeout
|
|
47
|
-
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
|
|
48
|
-
const app = new index_js_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
49
|
-
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
|
|
50
|
-
expect(response).not.toBeNull();
|
|
51
|
-
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
52
|
-
}, 30000); // 30 seconds timeout
|
|
53
|
-
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
|
54
|
-
const app = new index_js_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
55
|
-
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
|
|
56
|
-
expect(response).not.toBeNull();
|
|
57
|
-
expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
58
|
-
}, 30000); // 30 seconds timeout
|
|
59
|
-
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
|
60
|
-
const invalidApp = new index_js_1.default({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
61
|
-
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
|
|
62
|
-
});
|
|
63
|
-
test.concurrent('should throw error for blocklisted URL on crawl', async () => {
|
|
64
|
-
const app = new index_js_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
65
|
-
const blocklistedUrl = "https://twitter.com/fake-test";
|
|
66
|
-
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
|
|
67
|
-
});
|
|
68
|
-
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
|
69
|
-
const app = new index_js_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
70
|
-
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
|
|
71
|
-
expect(response).not.toBeNull();
|
|
72
|
-
expect(response[0].content).toContain("_Roast_");
|
|
73
|
-
}, 60000); // 60 seconds timeout
|
|
74
|
-
test.concurrent('should handle idempotency key for crawl', async () => {
|
|
75
|
-
const app = new index_js_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
76
|
-
const uniqueIdempotencyKey = (0, uuid_1.v4)();
|
|
77
|
-
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
|
|
78
|
-
expect(response).not.toBeNull();
|
|
79
|
-
expect(response.jobId).toBeDefined();
|
|
80
|
-
await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
|
81
|
-
});
|
|
82
|
-
test.concurrent('should check crawl status', async () => {
|
|
83
|
-
const app = new index_js_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
84
|
-
const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false);
|
|
85
|
-
expect(response).not.toBeNull();
|
|
86
|
-
expect(response.jobId).toBeDefined();
|
|
87
|
-
let statusResponse = await app.checkCrawlStatus(response.jobId);
|
|
88
|
-
const maxChecks = 15;
|
|
89
|
-
let checks = 0;
|
|
90
|
-
while (statusResponse.status === 'active' && checks < maxChecks) {
|
|
91
|
-
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
92
|
-
expect(statusResponse.partial_data).not.toBeNull();
|
|
93
|
-
statusResponse = await app.checkCrawlStatus(response.jobId);
|
|
94
|
-
checks++;
|
|
95
|
-
}
|
|
96
|
-
expect(statusResponse).not.toBeNull();
|
|
97
|
-
expect(statusResponse.status).toBe('completed');
|
|
98
|
-
expect(statusResponse?.data?.length).toBeGreaterThan(0);
|
|
99
|
-
}, 35000); // 35 seconds timeout
|
|
100
|
-
test.concurrent('should return successful response for search', async () => {
|
|
101
|
-
const app = new index_js_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
102
|
-
const response = await app.search("test query");
|
|
103
|
-
expect(response).not.toBeNull();
|
|
104
|
-
expect(response?.data?.[0]?.content).toBeDefined();
|
|
105
|
-
expect(response?.data?.length).toBeGreaterThan(2);
|
|
106
|
-
}, 30000); // 30 seconds timeout
|
|
107
|
-
test.concurrent('should throw error for invalid API key on search', async () => {
|
|
108
|
-
const invalidApp = new index_js_1.default({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
109
|
-
await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
|
|
110
|
-
});
|
|
111
|
-
test.concurrent('should perform LLM extraction', async () => {
|
|
112
|
-
const app = new index_js_1.default({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
113
|
-
const response = await app.scrapeUrl("https://mendable.ai", {
|
|
114
|
-
extractorOptions: {
|
|
115
|
-
mode: 'llm-extraction',
|
|
116
|
-
extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
|
|
117
|
-
extractionSchema: {
|
|
118
|
-
type: 'object',
|
|
119
|
-
properties: {
|
|
120
|
-
company_mission: { type: 'string' },
|
|
121
|
-
supports_sso: { type: 'boolean' },
|
|
122
|
-
is_open_source: { type: 'boolean' }
|
|
123
|
-
},
|
|
124
|
-
required: ['company_mission', 'supports_sso', 'is_open_source']
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
});
|
|
128
|
-
expect(response).not.toBeNull();
|
|
129
|
-
expect(response.data?.llm_extraction).toBeDefined();
|
|
130
|
-
const llmExtraction = response.data?.llm_extraction;
|
|
131
|
-
expect(llmExtraction?.company_mission).toBeDefined();
|
|
132
|
-
expect(typeof llmExtraction?.supports_sso).toBe('boolean');
|
|
133
|
-
expect(typeof llmExtraction?.is_open_source).toBe('boolean');
|
|
134
|
-
}, 30000); // 30 seconds timeout
|
|
135
|
-
});
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const globals_1 = require("@jest/globals");
|
|
4
|
-
const axios_1 = require("axios");
|
|
5
|
-
const index_js_1 = require("../index.js");
|
|
6
|
-
const promises_1 = require("fs/promises");
|
|
7
|
-
const path_1 = require("path");
|
|
8
|
-
// Mock jest and set the type
|
|
9
|
-
globals_1.jest.mock('axios');
|
|
10
|
-
const mockedAxios = axios_1.default;
|
|
11
|
-
// Get the fixure data from the JSON file in ./fixtures
|
|
12
|
-
async function loadFixture(name) {
|
|
13
|
-
return await (0, promises_1.readFile)((0, path_1.join)(__dirname, 'fixtures', `${name}.json`), 'utf-8');
|
|
14
|
-
}
|
|
15
|
-
(0, globals_1.describe)('the firecrawl JS SDK', () => {
|
|
16
|
-
(0, globals_1.test)('Should require an API key to instantiate FirecrawlApp', async () => {
|
|
17
|
-
const fn = () => {
|
|
18
|
-
new index_js_1.default({ apiKey: undefined });
|
|
19
|
-
};
|
|
20
|
-
(0, globals_1.expect)(fn).toThrow('No API key provided');
|
|
21
|
-
});
|
|
22
|
-
(0, globals_1.test)('Should return scraped data from a /scrape API call', async () => {
|
|
23
|
-
const mockData = await loadFixture('scrape');
|
|
24
|
-
mockedAxios.post.mockResolvedValue({
|
|
25
|
-
status: 200,
|
|
26
|
-
data: JSON.parse(mockData),
|
|
27
|
-
});
|
|
28
|
-
const apiKey = 'YOUR_API_KEY';
|
|
29
|
-
const app = new index_js_1.default({ apiKey });
|
|
30
|
-
// Scrape a single URL
|
|
31
|
-
const url = 'https://mendable.ai';
|
|
32
|
-
const scrapedData = await app.scrapeUrl(url);
|
|
33
|
-
(0, globals_1.expect)(mockedAxios.post).toHaveBeenCalledTimes(1);
|
|
34
|
-
(0, globals_1.expect)(mockedAxios.post).toHaveBeenCalledWith(globals_1.expect.stringMatching(/^https:\/\/api.firecrawl.dev/), globals_1.expect.objectContaining({ url }), globals_1.expect.objectContaining({ headers: globals_1.expect.objectContaining({ 'Authorization': `Bearer ${apiKey}` }) }));
|
|
35
|
-
(0, globals_1.expect)(scrapedData.success).toBe(true);
|
|
36
|
-
(0, globals_1.expect)(scrapedData?.data?.metadata.title).toEqual('Mendable');
|
|
37
|
-
});
|
|
38
|
-
});
|
package/dist-cjs/index.js
DELETED
|
@@ -1,263 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
const axios_1 = require("axios");
|
|
4
|
-
const zod_1 = require("zod");
|
|
5
|
-
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
|
6
|
-
/**
|
|
7
|
-
* Main class for interacting with the Firecrawl API.
|
|
8
|
-
*/
|
|
9
|
-
class FirecrawlApp {
|
|
10
|
-
/**
|
|
11
|
-
* Initializes a new instance of the FirecrawlApp class.
|
|
12
|
-
* @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
|
|
13
|
-
*/
|
|
14
|
-
constructor({ apiKey = null, apiUrl = null }) {
|
|
15
|
-
Object.defineProperty(this, "apiKey", {
|
|
16
|
-
enumerable: true,
|
|
17
|
-
configurable: true,
|
|
18
|
-
writable: true,
|
|
19
|
-
value: void 0
|
|
20
|
-
});
|
|
21
|
-
Object.defineProperty(this, "apiUrl", {
|
|
22
|
-
enumerable: true,
|
|
23
|
-
configurable: true,
|
|
24
|
-
writable: true,
|
|
25
|
-
value: void 0
|
|
26
|
-
});
|
|
27
|
-
this.apiKey = apiKey || "";
|
|
28
|
-
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
29
|
-
if (!this.apiKey) {
|
|
30
|
-
throw new Error("No API key provided");
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
/**
|
|
34
|
-
* Scrapes a URL using the Firecrawl API.
|
|
35
|
-
* @param {string} url - The URL to scrape.
|
|
36
|
-
* @param {Params | null} params - Additional parameters for the scrape request.
|
|
37
|
-
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
|
38
|
-
*/
|
|
39
|
-
async scrapeUrl(url, params = null) {
|
|
40
|
-
const headers = {
|
|
41
|
-
"Content-Type": "application/json",
|
|
42
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
43
|
-
};
|
|
44
|
-
let jsonData = { url, ...params };
|
|
45
|
-
if (params?.extractorOptions?.extractionSchema) {
|
|
46
|
-
let schema = params.extractorOptions.extractionSchema;
|
|
47
|
-
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
|
48
|
-
if (schema instanceof zod_1.z.ZodSchema) {
|
|
49
|
-
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
|
50
|
-
}
|
|
51
|
-
jsonData = {
|
|
52
|
-
...jsonData,
|
|
53
|
-
extractorOptions: {
|
|
54
|
-
...params.extractorOptions,
|
|
55
|
-
extractionSchema: schema,
|
|
56
|
-
mode: params.extractorOptions.mode || "llm-extraction",
|
|
57
|
-
},
|
|
58
|
-
};
|
|
59
|
-
}
|
|
60
|
-
try {
|
|
61
|
-
const response = await axios_1.default.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
|
|
62
|
-
if (response.status === 200) {
|
|
63
|
-
const responseData = response.data;
|
|
64
|
-
if (responseData.success) {
|
|
65
|
-
return responseData;
|
|
66
|
-
}
|
|
67
|
-
else {
|
|
68
|
-
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
else {
|
|
72
|
-
this.handleError(response, "scrape URL");
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
catch (error) {
|
|
76
|
-
throw new Error(error.message);
|
|
77
|
-
}
|
|
78
|
-
return { success: false, error: "Internal server error." };
|
|
79
|
-
}
|
|
80
|
-
/**
|
|
81
|
-
* Searches for a query using the Firecrawl API.
|
|
82
|
-
* @param {string} query - The query to search for.
|
|
83
|
-
* @param {Params | null} params - Additional parameters for the search request.
|
|
84
|
-
* @returns {Promise<SearchResponse>} The response from the search operation.
|
|
85
|
-
*/
|
|
86
|
-
async search(query, params = null) {
|
|
87
|
-
const headers = {
|
|
88
|
-
"Content-Type": "application/json",
|
|
89
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
90
|
-
};
|
|
91
|
-
let jsonData = { query };
|
|
92
|
-
if (params) {
|
|
93
|
-
jsonData = { ...jsonData, ...params };
|
|
94
|
-
}
|
|
95
|
-
try {
|
|
96
|
-
const response = await axios_1.default.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
|
97
|
-
if (response.status === 200) {
|
|
98
|
-
const responseData = response.data;
|
|
99
|
-
if (responseData.success) {
|
|
100
|
-
return responseData;
|
|
101
|
-
}
|
|
102
|
-
else {
|
|
103
|
-
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
else {
|
|
107
|
-
this.handleError(response, "search");
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
catch (error) {
|
|
111
|
-
throw new Error(error.message);
|
|
112
|
-
}
|
|
113
|
-
return { success: false, error: "Internal server error." };
|
|
114
|
-
}
|
|
115
|
-
/**
|
|
116
|
-
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
117
|
-
* @param {string} url - The URL to crawl.
|
|
118
|
-
* @param {Params | null} params - Additional parameters for the crawl request.
|
|
119
|
-
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
|
120
|
-
* @param {number} pollInterval - Time in seconds for job status checks.
|
|
121
|
-
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
|
122
|
-
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
|
123
|
-
*/
|
|
124
|
-
async crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
|
125
|
-
const headers = this.prepareHeaders(idempotencyKey);
|
|
126
|
-
let jsonData = { url };
|
|
127
|
-
if (params) {
|
|
128
|
-
jsonData = { ...jsonData, ...params };
|
|
129
|
-
}
|
|
130
|
-
try {
|
|
131
|
-
const response = await this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
|
|
132
|
-
if (response.status === 200) {
|
|
133
|
-
const jobId = response.data.jobId;
|
|
134
|
-
if (waitUntilDone) {
|
|
135
|
-
return this.monitorJobStatus(jobId, headers, pollInterval);
|
|
136
|
-
}
|
|
137
|
-
else {
|
|
138
|
-
return { success: true, jobId };
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
else {
|
|
142
|
-
this.handleError(response, "start crawl job");
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
catch (error) {
|
|
146
|
-
console.log(error);
|
|
147
|
-
throw new Error(error.message);
|
|
148
|
-
}
|
|
149
|
-
return { success: false, error: "Internal server error." };
|
|
150
|
-
}
|
|
151
|
-
/**
|
|
152
|
-
* Checks the status of a crawl job using the Firecrawl API.
|
|
153
|
-
* @param {string} jobId - The job ID of the crawl operation.
|
|
154
|
-
* @returns {Promise<JobStatusResponse>} The response containing the job status.
|
|
155
|
-
*/
|
|
156
|
-
async checkCrawlStatus(jobId) {
|
|
157
|
-
const headers = this.prepareHeaders();
|
|
158
|
-
try {
|
|
159
|
-
const response = await this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
|
160
|
-
if (response.status === 200) {
|
|
161
|
-
return {
|
|
162
|
-
success: true,
|
|
163
|
-
status: response.data.status,
|
|
164
|
-
data: response.data.data,
|
|
165
|
-
partial_data: !response.data.data
|
|
166
|
-
? response.data.partial_data
|
|
167
|
-
: undefined,
|
|
168
|
-
};
|
|
169
|
-
}
|
|
170
|
-
else {
|
|
171
|
-
this.handleError(response, "check crawl status");
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
catch (error) {
|
|
175
|
-
throw new Error(error.message);
|
|
176
|
-
}
|
|
177
|
-
return {
|
|
178
|
-
success: false,
|
|
179
|
-
status: "unknown",
|
|
180
|
-
error: "Internal server error.",
|
|
181
|
-
};
|
|
182
|
-
}
|
|
183
|
-
/**
|
|
184
|
-
* Prepares the headers for an API request.
|
|
185
|
-
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
186
|
-
*/
|
|
187
|
-
prepareHeaders(idempotencyKey) {
|
|
188
|
-
return {
|
|
189
|
-
"Content-Type": "application/json",
|
|
190
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
191
|
-
...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
|
|
192
|
-
};
|
|
193
|
-
}
|
|
194
|
-
/**
|
|
195
|
-
* Sends a POST request to the specified URL.
|
|
196
|
-
* @param {string} url - The URL to send the request to.
|
|
197
|
-
* @param {Params} data - The data to send in the request.
|
|
198
|
-
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
199
|
-
* @returns {Promise<AxiosResponse>} The response from the POST request.
|
|
200
|
-
*/
|
|
201
|
-
postRequest(url, data, headers) {
|
|
202
|
-
return axios_1.default.post(url, data, { headers });
|
|
203
|
-
}
|
|
204
|
-
/**
|
|
205
|
-
* Sends a GET request to the specified URL.
|
|
206
|
-
* @param {string} url - The URL to send the request to.
|
|
207
|
-
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
208
|
-
* @returns {Promise<AxiosResponse>} The response from the GET request.
|
|
209
|
-
*/
|
|
210
|
-
getRequest(url, headers) {
|
|
211
|
-
return axios_1.default.get(url, { headers });
|
|
212
|
-
}
|
|
213
|
-
/**
|
|
214
|
-
* Monitors the status of a crawl job until completion or failure.
|
|
215
|
-
* @param {string} jobId - The job ID of the crawl operation.
|
|
216
|
-
* @param {AxiosRequestHeaders} headers - The headers for the request.
|
|
217
|
-
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
218
|
-
* @returns {Promise<any>} The final job status or data.
|
|
219
|
-
*/
|
|
220
|
-
async monitorJobStatus(jobId, headers, checkInterval) {
|
|
221
|
-
while (true) {
|
|
222
|
-
const statusResponse = await this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
|
223
|
-
if (statusResponse.status === 200) {
|
|
224
|
-
const statusData = statusResponse.data;
|
|
225
|
-
if (statusData.status === "completed") {
|
|
226
|
-
if ("data" in statusData) {
|
|
227
|
-
return statusData.data;
|
|
228
|
-
}
|
|
229
|
-
else {
|
|
230
|
-
throw new Error("Crawl job completed but no data was returned");
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
|
|
234
|
-
if (checkInterval < 2) {
|
|
235
|
-
checkInterval = 2;
|
|
236
|
-
}
|
|
237
|
-
await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
|
|
238
|
-
}
|
|
239
|
-
else {
|
|
240
|
-
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
else {
|
|
244
|
-
this.handleError(statusResponse, "check crawl status");
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
/**
|
|
249
|
-
* Handles errors from API responses.
|
|
250
|
-
* @param {AxiosResponse} response - The response from the API.
|
|
251
|
-
* @param {string} action - The action being performed when the error occurred.
|
|
252
|
-
*/
|
|
253
|
-
handleError(response, action) {
|
|
254
|
-
if ([402, 408, 409, 500].includes(response.status)) {
|
|
255
|
-
const errorMessage = response.data.error || "Unknown error occurred";
|
|
256
|
-
throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
|
|
257
|
-
}
|
|
258
|
-
else {
|
|
259
|
-
throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
exports.default = FirecrawlApp;
|
package/tsconfig.cjs.json
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|