@mendable/firecrawl-js 0.0.28 → 0.0.29-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +127 -136
- package/package.json +1 -1
- package/tsconfig.json +9 -3
package/build/index.js
CHANGED
|
@@ -1,12 +1,3 @@
|
|
|
1
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
-
});
|
|
9
|
-
};
|
|
10
1
|
import axios from "axios";
|
|
11
2
|
import { z } from "zod";
|
|
12
3
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
@@ -31,42 +22,46 @@ export default class FirecrawlApp {
|
|
|
31
22
|
* @param {Params | null} params - Additional parameters for the scrape request.
|
|
32
23
|
* @returns {Promise<ScrapeResponse>} The response from the scrape operation.
|
|
33
24
|
*/
|
|
34
|
-
scrapeUrl(
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
let
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
if (schema instanceof z.ZodSchema) {
|
|
46
|
-
schema = zodToJsonSchema(schema);
|
|
47
|
-
}
|
|
48
|
-
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
|
|
25
|
+
async scrapeUrl(url, params = null) {
|
|
26
|
+
const headers = {
|
|
27
|
+
"Content-Type": "application/json",
|
|
28
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
29
|
+
};
|
|
30
|
+
let jsonData = { url, ...params };
|
|
31
|
+
if (params?.extractorOptions?.extractionSchema) {
|
|
32
|
+
let schema = params.extractorOptions.extractionSchema;
|
|
33
|
+
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
|
34
|
+
if (schema instanceof z.ZodSchema) {
|
|
35
|
+
schema = zodToJsonSchema(schema);
|
|
49
36
|
}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
37
|
+
jsonData = {
|
|
38
|
+
...jsonData,
|
|
39
|
+
extractorOptions: {
|
|
40
|
+
...params.extractorOptions,
|
|
41
|
+
extractionSchema: schema,
|
|
42
|
+
mode: params.extractorOptions.mode || "llm-extraction",
|
|
43
|
+
},
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
try {
|
|
47
|
+
const response = await axios.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
|
|
48
|
+
if (response.status === 200) {
|
|
49
|
+
const responseData = response.data;
|
|
50
|
+
if (responseData.success) {
|
|
51
|
+
return responseData;
|
|
60
52
|
}
|
|
61
53
|
else {
|
|
62
|
-
|
|
54
|
+
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
63
55
|
}
|
|
64
56
|
}
|
|
65
|
-
|
|
66
|
-
|
|
57
|
+
else {
|
|
58
|
+
this.handleError(response, "scrape URL");
|
|
67
59
|
}
|
|
68
|
-
|
|
69
|
-
|
|
60
|
+
}
|
|
61
|
+
catch (error) {
|
|
62
|
+
throw new Error(error.message);
|
|
63
|
+
}
|
|
64
|
+
return { success: false, error: "Internal server error." };
|
|
70
65
|
}
|
|
71
66
|
/**
|
|
72
67
|
* Searches for a query using the Firecrawl API.
|
|
@@ -74,36 +69,34 @@ export default class FirecrawlApp {
|
|
|
74
69
|
* @param {Params | null} params - Additional parameters for the search request.
|
|
75
70
|
* @returns {Promise<SearchResponse>} The response from the search operation.
|
|
76
71
|
*/
|
|
77
|
-
search(
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
return responseData;
|
|
93
|
-
}
|
|
94
|
-
else {
|
|
95
|
-
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
96
|
-
}
|
|
72
|
+
async search(query, params = null) {
|
|
73
|
+
const headers = {
|
|
74
|
+
"Content-Type": "application/json",
|
|
75
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
76
|
+
};
|
|
77
|
+
let jsonData = { query };
|
|
78
|
+
if (params) {
|
|
79
|
+
jsonData = { ...jsonData, ...params };
|
|
80
|
+
}
|
|
81
|
+
try {
|
|
82
|
+
const response = await axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
|
83
|
+
if (response.status === 200) {
|
|
84
|
+
const responseData = response.data;
|
|
85
|
+
if (responseData.success) {
|
|
86
|
+
return responseData;
|
|
97
87
|
}
|
|
98
88
|
else {
|
|
99
|
-
|
|
89
|
+
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
100
90
|
}
|
|
101
91
|
}
|
|
102
|
-
|
|
103
|
-
|
|
92
|
+
else {
|
|
93
|
+
this.handleError(response, "search");
|
|
104
94
|
}
|
|
105
|
-
|
|
106
|
-
|
|
95
|
+
}
|
|
96
|
+
catch (error) {
|
|
97
|
+
throw new Error(error.message);
|
|
98
|
+
}
|
|
99
|
+
return { success: false, error: "Internal server error." };
|
|
107
100
|
}
|
|
108
101
|
/**
|
|
109
102
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
@@ -114,75 +107,75 @@ export default class FirecrawlApp {
|
|
|
114
107
|
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
|
115
108
|
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
|
116
109
|
*/
|
|
117
|
-
crawlUrl(
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
return this.monitorJobStatus(jobId, headers, pollInterval);
|
|
130
|
-
}
|
|
131
|
-
else {
|
|
132
|
-
return { success: true, jobId };
|
|
133
|
-
}
|
|
110
|
+
async crawlUrl(url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
|
111
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
112
|
+
let jsonData = { url };
|
|
113
|
+
if (params) {
|
|
114
|
+
jsonData = { ...jsonData, ...params };
|
|
115
|
+
}
|
|
116
|
+
try {
|
|
117
|
+
const response = await this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
|
|
118
|
+
if (response.status === 200) {
|
|
119
|
+
const jobId = response.data.jobId;
|
|
120
|
+
if (waitUntilDone) {
|
|
121
|
+
return this.monitorJobStatus(jobId, headers, pollInterval);
|
|
134
122
|
}
|
|
135
123
|
else {
|
|
136
|
-
|
|
124
|
+
return { success: true, jobId };
|
|
137
125
|
}
|
|
138
126
|
}
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
throw new Error(error.message);
|
|
127
|
+
else {
|
|
128
|
+
this.handleError(response, "start crawl job");
|
|
142
129
|
}
|
|
143
|
-
|
|
144
|
-
|
|
130
|
+
}
|
|
131
|
+
catch (error) {
|
|
132
|
+
console.log(error);
|
|
133
|
+
throw new Error(error.message);
|
|
134
|
+
}
|
|
135
|
+
return { success: false, error: "Internal server error." };
|
|
145
136
|
}
|
|
146
137
|
/**
|
|
147
138
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
148
139
|
* @param {string} jobId - The job ID of the crawl operation.
|
|
149
140
|
* @returns {Promise<JobStatusResponse>} The response containing the job status.
|
|
150
141
|
*/
|
|
151
|
-
checkCrawlStatus(jobId) {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
};
|
|
165
|
-
}
|
|
166
|
-
else {
|
|
167
|
-
this.handleError(response, "check crawl status");
|
|
168
|
-
}
|
|
142
|
+
async checkCrawlStatus(jobId) {
|
|
143
|
+
const headers = this.prepareHeaders();
|
|
144
|
+
try {
|
|
145
|
+
const response = await this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
|
146
|
+
if (response.status === 200) {
|
|
147
|
+
return {
|
|
148
|
+
success: true,
|
|
149
|
+
status: response.data.status,
|
|
150
|
+
data: response.data.data,
|
|
151
|
+
partial_data: !response.data.data
|
|
152
|
+
? response.data.partial_data
|
|
153
|
+
: undefined,
|
|
154
|
+
};
|
|
169
155
|
}
|
|
170
|
-
|
|
171
|
-
|
|
156
|
+
else {
|
|
157
|
+
this.handleError(response, "check crawl status");
|
|
172
158
|
}
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
159
|
+
}
|
|
160
|
+
catch (error) {
|
|
161
|
+
throw new Error(error.message);
|
|
162
|
+
}
|
|
163
|
+
return {
|
|
164
|
+
success: false,
|
|
165
|
+
status: "unknown",
|
|
166
|
+
error: "Internal server error.",
|
|
167
|
+
};
|
|
179
168
|
}
|
|
180
169
|
/**
|
|
181
170
|
* Prepares the headers for an API request.
|
|
182
171
|
* @returns {AxiosRequestHeaders} The prepared headers.
|
|
183
172
|
*/
|
|
184
173
|
prepareHeaders(idempotencyKey) {
|
|
185
|
-
return
|
|
174
|
+
return {
|
|
175
|
+
"Content-Type": "application/json",
|
|
176
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
177
|
+
...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
|
|
178
|
+
};
|
|
186
179
|
}
|
|
187
180
|
/**
|
|
188
181
|
* Sends a POST request to the specified URL.
|
|
@@ -210,35 +203,33 @@ export default class FirecrawlApp {
|
|
|
210
203
|
* @param {number} timeout - Timeout in seconds for job status checks.
|
|
211
204
|
* @returns {Promise<any>} The final job status or data.
|
|
212
205
|
*/
|
|
213
|
-
monitorJobStatus(jobId, headers, checkInterval) {
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
if (
|
|
220
|
-
|
|
221
|
-
return statusData.data;
|
|
222
|
-
}
|
|
223
|
-
else {
|
|
224
|
-
throw new Error("Crawl job completed but no data was returned");
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
|
|
228
|
-
if (checkInterval < 2) {
|
|
229
|
-
checkInterval = 2;
|
|
230
|
-
}
|
|
231
|
-
yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
|
|
206
|
+
async monitorJobStatus(jobId, headers, checkInterval) {
|
|
207
|
+
while (true) {
|
|
208
|
+
const statusResponse = await this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
|
|
209
|
+
if (statusResponse.status === 200) {
|
|
210
|
+
const statusData = statusResponse.data;
|
|
211
|
+
if (statusData.status === "completed") {
|
|
212
|
+
if ("data" in statusData) {
|
|
213
|
+
return statusData.data;
|
|
232
214
|
}
|
|
233
215
|
else {
|
|
234
|
-
throw new Error(
|
|
216
|
+
throw new Error("Crawl job completed but no data was returned");
|
|
235
217
|
}
|
|
236
218
|
}
|
|
219
|
+
else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
|
|
220
|
+
if (checkInterval < 2) {
|
|
221
|
+
checkInterval = 2;
|
|
222
|
+
}
|
|
223
|
+
await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
|
|
224
|
+
}
|
|
237
225
|
else {
|
|
238
|
-
|
|
226
|
+
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
|
239
227
|
}
|
|
240
228
|
}
|
|
241
|
-
|
|
229
|
+
else {
|
|
230
|
+
this.handleError(statusResponse, "check crawl status");
|
|
231
|
+
}
|
|
232
|
+
}
|
|
242
233
|
}
|
|
243
234
|
/**
|
|
244
235
|
* Handles errors from API responses.
|
package/package.json
CHANGED
package/tsconfig.json
CHANGED
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
|
|
12
12
|
|
|
13
13
|
/* Language and Environment */
|
|
14
|
-
"target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
|
|
15
14
|
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
|
|
16
15
|
// "jsx": "preserve", /* Specify what JSX code is generated. */
|
|
17
16
|
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
|
|
@@ -25,9 +24,16 @@
|
|
|
25
24
|
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
|
|
26
25
|
|
|
27
26
|
/* Modules */
|
|
28
|
-
"module": "NodeNext", /* Specify what module code is generated. */
|
|
29
27
|
"rootDir": "./src", /* Specify the root folder within your source files. */
|
|
30
|
-
|
|
28
|
+
|
|
29
|
+
"target": "ES2021",
|
|
30
|
+
"lib": [
|
|
31
|
+
"ES2021",
|
|
32
|
+
"ES2022.Object",
|
|
33
|
+
"DOM"
|
|
34
|
+
],
|
|
35
|
+
"module": "NodeNext",
|
|
36
|
+
"moduleResolution": "nodenext",/* Specify how TypeScript looks up a file from a given module specifier. */
|
|
31
37
|
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
|
|
32
38
|
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
|
|
33
39
|
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
|