@mendable/firecrawl-js 0.0.36 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/cjs/index.js +216 -147
- package/build/esm/index.js +216 -147
- package/package.json +2 -2
- package/src/__tests__/e2e_withAuth/index.test.ts +299 -128
- package/src/__tests__/index.test.ts +1 -1
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +312 -0
- package/src/index.ts +385 -108
- package/tsconfig.json +3 -3
- package/types/index.d.ts +232 -53
package/src/index.ts
CHANGED
|
@@ -1,16 +1,22 @@
|
|
|
1
1
|
import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
|
+
|
|
4
5
|
/**
|
|
5
6
|
* Configuration interface for FirecrawlApp.
|
|
7
|
+
* @param apiKey - Optional API key for authentication.
|
|
8
|
+
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
|
|
9
|
+
* @param version - API version, either 'v0' or 'v1'.
|
|
6
10
|
*/
|
|
7
11
|
export interface FirecrawlAppConfig {
|
|
8
12
|
apiKey?: string | null;
|
|
9
13
|
apiUrl?: string | null;
|
|
14
|
+
version?: "v0" | "v1";
|
|
10
15
|
}
|
|
11
16
|
|
|
12
17
|
/**
|
|
13
18
|
* Metadata for a Firecrawl document.
|
|
19
|
+
* Includes various optional properties for document metadata.
|
|
14
20
|
*/
|
|
15
21
|
export interface FirecrawlDocumentMetadata {
|
|
16
22
|
title?: string;
|
|
@@ -43,6 +49,17 @@ export interface FirecrawlDocumentMetadata {
|
|
|
43
49
|
articleTag?: string;
|
|
44
50
|
articleSection?: string;
|
|
45
51
|
sourceURL?: string;
|
|
52
|
+
statusCode?: number;
|
|
53
|
+
error?: string;
|
|
54
|
+
[key: string]: any; // Allows for additional metadata properties not explicitly defined.
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Metadata for a Firecrawl document on v0.
|
|
59
|
+
* Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
|
|
60
|
+
*/
|
|
61
|
+
export interface FirecrawlDocumentMetadataV0 {
|
|
62
|
+
// Similar properties as FirecrawlDocumentMetadata with additional v0 specific adjustments
|
|
46
63
|
pageStatusCode?: number;
|
|
47
64
|
pageError?: string;
|
|
48
65
|
[key: string]: any;
|
|
@@ -50,8 +67,23 @@ export interface FirecrawlDocumentMetadata {
|
|
|
50
67
|
|
|
51
68
|
/**
|
|
52
69
|
* Document interface for Firecrawl.
|
|
70
|
+
* Represents a document retrieved or processed by Firecrawl.
|
|
53
71
|
*/
|
|
54
72
|
export interface FirecrawlDocument {
|
|
73
|
+
url?: string;
|
|
74
|
+
markdown?: string;
|
|
75
|
+
html?: string;
|
|
76
|
+
rawHtml?: string;
|
|
77
|
+
links?: string[];
|
|
78
|
+
screenshot?: string;
|
|
79
|
+
metadata: FirecrawlDocumentMetadata;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Document interface for Firecrawl on v0.
|
|
84
|
+
* Represents a document specifically for API version v0 with additional properties.
|
|
85
|
+
*/
|
|
86
|
+
export interface FirecrawlDocumentV0 {
|
|
55
87
|
id?: string;
|
|
56
88
|
url?: string;
|
|
57
89
|
content: string;
|
|
@@ -61,79 +93,242 @@ export interface FirecrawlDocument {
|
|
|
61
93
|
createdAt?: Date;
|
|
62
94
|
updatedAt?: Date;
|
|
63
95
|
type?: string;
|
|
64
|
-
metadata:
|
|
96
|
+
metadata: FirecrawlDocumentMetadataV0;
|
|
65
97
|
childrenLinks?: string[];
|
|
66
98
|
provider?: string;
|
|
67
99
|
warning?: string;
|
|
68
|
-
|
|
69
100
|
index?: number;
|
|
70
101
|
}
|
|
71
102
|
|
|
103
|
+
/**
|
|
104
|
+
* Parameters for scraping operations.
|
|
105
|
+
* Defines the options and configurations available for scraping web content.
|
|
106
|
+
*/
|
|
107
|
+
export interface ScrapeParams {
|
|
108
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
|
|
109
|
+
headers?: Record<string, string>;
|
|
110
|
+
includeTags?: string[];
|
|
111
|
+
excludeTags?: string[];
|
|
112
|
+
onlyMainContent?: boolean;
|
|
113
|
+
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
|
|
114
|
+
waitFor?: number;
|
|
115
|
+
timeout?: number;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Parameters for scraping operations on v0.
|
|
120
|
+
* Includes page and extractor options specific to API version v0.
|
|
121
|
+
*/
|
|
122
|
+
export interface ScrapeParamsV0 {
|
|
123
|
+
pageOptions?: {
|
|
124
|
+
headers?: Record<string, string>;
|
|
125
|
+
includeHtml?: boolean;
|
|
126
|
+
includeRawHtml?: boolean;
|
|
127
|
+
onlyIncludeTags?: string[];
|
|
128
|
+
onlyMainContent?: boolean;
|
|
129
|
+
removeTags?: string[];
|
|
130
|
+
replaceAllPathsWithAbsolutePaths?: boolean;
|
|
131
|
+
screenshot?: boolean;
|
|
132
|
+
fullPageScreenshot?: boolean;
|
|
133
|
+
waitFor?: number;
|
|
134
|
+
};
|
|
135
|
+
extractorOptions?: {
|
|
136
|
+
mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
|
|
137
|
+
extractionPrompt?: string;
|
|
138
|
+
extractionSchema?: Record<string, any> | z.ZodSchema | any;
|
|
139
|
+
};
|
|
140
|
+
timeout?: number;
|
|
141
|
+
}
|
|
142
|
+
|
|
72
143
|
/**
|
|
73
144
|
* Response interface for scraping operations.
|
|
145
|
+
* Defines the structure of the response received after a scraping operation.
|
|
74
146
|
*/
|
|
75
|
-
export interface ScrapeResponse {
|
|
147
|
+
export interface ScrapeResponse extends FirecrawlDocument {
|
|
76
148
|
success: boolean;
|
|
77
|
-
|
|
149
|
+
warning?: string;
|
|
78
150
|
error?: string;
|
|
79
151
|
}
|
|
152
|
+
|
|
80
153
|
/**
|
|
81
|
-
* Response interface for
|
|
154
|
+
* Response interface for scraping operations on v0.
|
|
155
|
+
* Similar to ScrapeResponse but tailored for responses from API version v0.
|
|
82
156
|
*/
|
|
83
|
-
export interface
|
|
157
|
+
export interface ScrapeResponseV0 {
|
|
84
158
|
success: boolean;
|
|
85
|
-
data?:
|
|
159
|
+
data?: FirecrawlDocumentV0;
|
|
86
160
|
error?: string;
|
|
87
161
|
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Parameters for crawling operations.
|
|
165
|
+
* Includes options for both scraping and mapping during a crawl.
|
|
166
|
+
*/
|
|
167
|
+
export interface CrawlParams {
|
|
168
|
+
scrapeOptions?: ScrapeParams;
|
|
169
|
+
crawlerOptions?: {
|
|
170
|
+
includePaths?: string[]
|
|
171
|
+
excludePaths?: string[]
|
|
172
|
+
maxDepth?: number
|
|
173
|
+
limit?: number
|
|
174
|
+
allowBackwardLinks?: boolean
|
|
175
|
+
allowExternalLinks?: boolean
|
|
176
|
+
ignoreSitemap?: boolean
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Parameters for crawling operations on v0.
|
|
182
|
+
* Tailored for API version v0, includes specific options for crawling.
|
|
183
|
+
*/
|
|
184
|
+
export interface CrawlParamsV0 {
|
|
185
|
+
crawlerOptions?: {
|
|
186
|
+
includes?: string[];
|
|
187
|
+
excludes?: string[];
|
|
188
|
+
generateImgAltText?: boolean;
|
|
189
|
+
returnOnlyUrls?: boolean;
|
|
190
|
+
maxDepth?: number;
|
|
191
|
+
mode?: "default" | "fast";
|
|
192
|
+
ignoreSitemap?: boolean;
|
|
193
|
+
limit?: number;
|
|
194
|
+
allowBackwardCrawling?: boolean;
|
|
195
|
+
allowExternalContentLinks?: boolean;
|
|
196
|
+
};
|
|
197
|
+
pageOptions?: {
|
|
198
|
+
headers?: Record<string, string>;
|
|
199
|
+
includeHtml?: boolean;
|
|
200
|
+
includeRawHtml?: boolean;
|
|
201
|
+
onlyIncludeTags?: string[];
|
|
202
|
+
onlyMainContent?: boolean;
|
|
203
|
+
removeTags?: string[];
|
|
204
|
+
replaceAllPathsWithAbsolutePaths?: boolean;
|
|
205
|
+
screenshot?: boolean;
|
|
206
|
+
fullPageScreenshot?: boolean;
|
|
207
|
+
waitFor?: number;
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
|
|
88
211
|
/**
|
|
89
212
|
* Response interface for crawling operations.
|
|
213
|
+
* Defines the structure of the response received after initiating a crawl.
|
|
90
214
|
*/
|
|
91
215
|
export interface CrawlResponse {
|
|
216
|
+
id?: string;
|
|
217
|
+
url?: string;
|
|
92
218
|
success: boolean;
|
|
219
|
+
error?: string;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Response interface for crawling operations on v0.
|
|
224
|
+
* Similar to CrawlResponse but tailored for responses from API version v0.
|
|
225
|
+
*/
|
|
226
|
+
export interface CrawlResponseV0 {
|
|
93
227
|
jobId?: string;
|
|
94
|
-
|
|
228
|
+
success: boolean;
|
|
95
229
|
error?: string;
|
|
96
230
|
}
|
|
231
|
+
|
|
97
232
|
/**
|
|
98
233
|
* Response interface for job status checks.
|
|
234
|
+
* Provides detailed status of a crawl job including progress and results.
|
|
99
235
|
*/
|
|
100
|
-
export interface
|
|
236
|
+
export interface CrawlStatusResponse {
|
|
237
|
+
success: boolean;
|
|
238
|
+
total: number;
|
|
239
|
+
completed: number;
|
|
240
|
+
creditsUsed: number;
|
|
241
|
+
expiresAt: Date;
|
|
242
|
+
status: "scraping" | "completed" | "failed";
|
|
243
|
+
next: string;
|
|
244
|
+
data?: FirecrawlDocument[];
|
|
245
|
+
error?: string;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Response interface for job status checks on v0.
|
|
250
|
+
* Tailored for API version v0, provides status and partial data of a crawl job.
|
|
251
|
+
*/
|
|
252
|
+
export interface CrawlStatusResponseV0 {
|
|
101
253
|
success: boolean;
|
|
102
254
|
status: string;
|
|
103
255
|
current?: number;
|
|
104
256
|
current_url?: string;
|
|
105
257
|
current_step?: string;
|
|
106
258
|
total?: number;
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
partial_data?: FirecrawlDocument[];
|
|
259
|
+
data?: FirecrawlDocumentV0[];
|
|
260
|
+
partial_data?: FirecrawlDocumentV0[];
|
|
110
261
|
error?: string;
|
|
111
262
|
}
|
|
263
|
+
|
|
264
|
+
|
|
112
265
|
/**
|
|
113
|
-
*
|
|
266
|
+
* Parameters for mapping operations.
|
|
267
|
+
* Defines options for mapping URLs during a crawl.
|
|
114
268
|
*/
|
|
115
|
-
export interface
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
269
|
+
export interface MapParams {
|
|
270
|
+
includePaths?: string[]
|
|
271
|
+
excludePaths?: string[]
|
|
272
|
+
maxDepth?: number
|
|
273
|
+
limit?: number
|
|
274
|
+
allowBackwardLinks?: boolean
|
|
275
|
+
allowExternalLinks?: boolean
|
|
276
|
+
ignoreSitemap?: boolean
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Response interface for mapping operations.
|
|
281
|
+
* Defines the structure of the response received after a mapping operation.
|
|
282
|
+
*/
|
|
283
|
+
export interface MapResponse {
|
|
284
|
+
success: boolean;
|
|
285
|
+
links?: string[];
|
|
286
|
+
error?: string;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Parameters for searching operations on v0.
|
|
291
|
+
* Tailored for API version v0, includes specific options for searching content.
|
|
292
|
+
*/
|
|
293
|
+
export interface SearchParamsV0 {
|
|
294
|
+
pageOptions?: {
|
|
295
|
+
onlyMainContent?: boolean;
|
|
296
|
+
fetchPageContent?: boolean;
|
|
297
|
+
includeHtml?: boolean;
|
|
298
|
+
includeRawHtml?: boolean;
|
|
121
299
|
};
|
|
300
|
+
searchOptions?: {
|
|
301
|
+
limit?: number;
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Response interface for searching operations on v0.
|
|
307
|
+
* Defines the structure of the response received after a search operation on v0.
|
|
308
|
+
*/
|
|
309
|
+
export interface SearchResponseV0 {
|
|
310
|
+
success: boolean;
|
|
311
|
+
data?: FirecrawlDocumentV0[];
|
|
312
|
+
error?: string;
|
|
122
313
|
}
|
|
314
|
+
|
|
123
315
|
/**
|
|
124
316
|
* Main class for interacting with the Firecrawl API.
|
|
317
|
+
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
125
318
|
*/
|
|
126
|
-
export default class FirecrawlApp {
|
|
319
|
+
export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
127
320
|
private apiKey: string;
|
|
128
321
|
private apiUrl: string;
|
|
322
|
+
public version: T;
|
|
129
323
|
|
|
130
324
|
/**
|
|
131
325
|
* Initializes a new instance of the FirecrawlApp class.
|
|
132
|
-
* @param
|
|
326
|
+
* @param config - Configuration options for the FirecrawlApp instance.
|
|
133
327
|
*/
|
|
134
|
-
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
|
|
328
|
+
constructor({ apiKey = null, apiUrl = null, version = "v1" }: FirecrawlAppConfig) {
|
|
135
329
|
this.apiKey = apiKey || "";
|
|
136
330
|
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
331
|
+
this.version = version as T;
|
|
137
332
|
if (!this.apiKey) {
|
|
138
333
|
throw new Error("No API key provided");
|
|
139
334
|
}
|
|
@@ -141,21 +336,21 @@ export default class FirecrawlApp {
|
|
|
141
336
|
|
|
142
337
|
/**
|
|
143
338
|
* Scrapes a URL using the Firecrawl API.
|
|
144
|
-
* @param
|
|
145
|
-
* @param
|
|
146
|
-
* @returns
|
|
339
|
+
* @param url - The URL to scrape.
|
|
340
|
+
* @param params - Additional parameters for the scrape request.
|
|
341
|
+
* @returns The response from the scrape operation.
|
|
147
342
|
*/
|
|
148
343
|
async scrapeUrl(
|
|
149
344
|
url: string,
|
|
150
|
-
params
|
|
151
|
-
): Promise<ScrapeResponse> {
|
|
345
|
+
params?: ScrapeParams | ScrapeParamsV0
|
|
346
|
+
): Promise<this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse> {
|
|
152
347
|
const headers: AxiosRequestHeaders = {
|
|
153
348
|
"Content-Type": "application/json",
|
|
154
349
|
Authorization: `Bearer ${this.apiKey}`,
|
|
155
350
|
} as AxiosRequestHeaders;
|
|
156
|
-
let jsonData:
|
|
157
|
-
if (
|
|
158
|
-
let schema =
|
|
351
|
+
let jsonData: any = { url, ...params };
|
|
352
|
+
if (jsonData?.extractorOptions?.extractionSchema) {
|
|
353
|
+
let schema = jsonData.extractorOptions.extractionSchema;
|
|
159
354
|
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
|
160
355
|
if (schema instanceof z.ZodSchema) {
|
|
161
356
|
schema = zodToJsonSchema(schema);
|
|
@@ -163,22 +358,27 @@ export default class FirecrawlApp {
|
|
|
163
358
|
jsonData = {
|
|
164
359
|
...jsonData,
|
|
165
360
|
extractorOptions: {
|
|
166
|
-
...
|
|
361
|
+
...jsonData.extractorOptions,
|
|
167
362
|
extractionSchema: schema,
|
|
168
|
-
mode:
|
|
363
|
+
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
|
169
364
|
},
|
|
170
365
|
};
|
|
171
366
|
}
|
|
172
367
|
try {
|
|
173
368
|
const response: AxiosResponse = await axios.post(
|
|
174
|
-
this.apiUrl +
|
|
369
|
+
this.apiUrl + `/${this.version}/scrape`,
|
|
175
370
|
jsonData,
|
|
176
371
|
{ headers }
|
|
177
372
|
);
|
|
178
373
|
if (response.status === 200) {
|
|
179
374
|
const responseData = response.data;
|
|
180
375
|
if (responseData.success) {
|
|
181
|
-
return responseData
|
|
376
|
+
return (this.version === 'v0' ? responseData as ScrapeResponseV0 : {
|
|
377
|
+
success: true,
|
|
378
|
+
warning: responseData.warning,
|
|
379
|
+
error: responseData.error,
|
|
380
|
+
...responseData.data
|
|
381
|
+
}) as ScrapeResponse;
|
|
182
382
|
} else {
|
|
183
383
|
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
184
384
|
}
|
|
@@ -188,24 +388,28 @@ export default class FirecrawlApp {
|
|
|
188
388
|
} catch (error: any) {
|
|
189
389
|
throw new Error(error.message);
|
|
190
390
|
}
|
|
191
|
-
return { success: false, error: "Internal server error." };
|
|
391
|
+
return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse;
|
|
192
392
|
}
|
|
193
393
|
|
|
194
394
|
/**
|
|
195
395
|
* Searches for a query using the Firecrawl API.
|
|
196
|
-
* @param
|
|
197
|
-
* @param
|
|
198
|
-
* @returns
|
|
396
|
+
* @param query - The query to search for.
|
|
397
|
+
* @param params - Additional parameters for the search request.
|
|
398
|
+
* @returns The response from the search operation.
|
|
199
399
|
*/
|
|
200
400
|
async search(
|
|
201
401
|
query: string,
|
|
202
|
-
params
|
|
203
|
-
): Promise<
|
|
402
|
+
params?: SearchParamsV0
|
|
403
|
+
): Promise<SearchResponseV0> {
|
|
404
|
+
if (this.version === "v1") {
|
|
405
|
+
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
|
406
|
+
}
|
|
407
|
+
|
|
204
408
|
const headers: AxiosRequestHeaders = {
|
|
205
409
|
"Content-Type": "application/json",
|
|
206
410
|
Authorization: `Bearer ${this.apiKey}`,
|
|
207
411
|
} as AxiosRequestHeaders;
|
|
208
|
-
let jsonData:
|
|
412
|
+
let jsonData: any = { query };
|
|
209
413
|
if (params) {
|
|
210
414
|
jsonData = { ...jsonData, ...params };
|
|
211
415
|
}
|
|
@@ -233,93 +437,160 @@ export default class FirecrawlApp {
|
|
|
233
437
|
|
|
234
438
|
/**
|
|
235
439
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
236
|
-
* @param
|
|
237
|
-
* @param
|
|
238
|
-
* @param
|
|
239
|
-
* @param
|
|
240
|
-
* @param
|
|
241
|
-
* @returns
|
|
440
|
+
* @param url - The URL to crawl.
|
|
441
|
+
* @param params - Additional parameters for the crawl request.
|
|
442
|
+
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
|
443
|
+
* @param pollInterval - Time in seconds for job status checks.
|
|
444
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
445
|
+
* @returns The response from the crawl operation.
|
|
242
446
|
*/
|
|
243
447
|
async crawlUrl(
|
|
244
448
|
url: string,
|
|
245
|
-
params
|
|
449
|
+
params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams,
|
|
246
450
|
waitUntilDone: boolean = true,
|
|
247
451
|
pollInterval: number = 2,
|
|
248
452
|
idempotencyKey?: string
|
|
249
|
-
): Promise<
|
|
453
|
+
): Promise<
|
|
454
|
+
this['version'] extends 'v0'
|
|
455
|
+
? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[]
|
|
456
|
+
: CrawlResponse | CrawlStatusResponse
|
|
457
|
+
> {
|
|
250
458
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
251
|
-
let jsonData:
|
|
252
|
-
if (params) {
|
|
253
|
-
jsonData = { ...jsonData, ...params };
|
|
254
|
-
}
|
|
459
|
+
let jsonData: any = { url, ...params };
|
|
255
460
|
try {
|
|
256
461
|
const response: AxiosResponse = await this.postRequest(
|
|
257
|
-
this.apiUrl +
|
|
462
|
+
this.apiUrl + `/${this.version}/crawl`,
|
|
258
463
|
jsonData,
|
|
259
464
|
headers
|
|
260
465
|
);
|
|
261
466
|
if (response.status === 200) {
|
|
262
|
-
const
|
|
467
|
+
const id: string = this.version === 'v0' ? response.data.jobId : response.data.id;
|
|
468
|
+
let checkUrl: string | undefined = undefined;
|
|
263
469
|
if (waitUntilDone) {
|
|
264
|
-
|
|
470
|
+
if (this.version === 'v1') { checkUrl = response.data.url }
|
|
471
|
+
return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
|
|
265
472
|
} else {
|
|
266
|
-
|
|
473
|
+
if (this.version === 'v0') {
|
|
474
|
+
return {
|
|
475
|
+
success: true,
|
|
476
|
+
jobId: id
|
|
477
|
+
} as CrawlResponseV0;
|
|
478
|
+
} else {
|
|
479
|
+
return {
|
|
480
|
+
success: true,
|
|
481
|
+
id: id
|
|
482
|
+
} as CrawlResponse;
|
|
483
|
+
}
|
|
267
484
|
}
|
|
268
485
|
} else {
|
|
269
486
|
this.handleError(response, "start crawl job");
|
|
270
487
|
}
|
|
271
488
|
} catch (error: any) {
|
|
272
|
-
|
|
273
|
-
|
|
489
|
+
if (error.response?.data?.error) {
|
|
490
|
+
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
|
491
|
+
} else {
|
|
492
|
+
throw new Error(error.message);
|
|
493
|
+
}
|
|
274
494
|
}
|
|
275
|
-
return { success: false, error: "Internal server error." };
|
|
495
|
+
return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? CrawlResponseV0 : CrawlResponse;
|
|
276
496
|
}
|
|
277
497
|
|
|
278
498
|
/**
|
|
279
499
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
280
|
-
* @param
|
|
281
|
-
* @returns
|
|
500
|
+
* @param id - The ID of the crawl operation.
|
|
501
|
+
* @returns The response containing the job status.
|
|
282
502
|
*/
|
|
283
|
-
async checkCrawlStatus(
|
|
503
|
+
async checkCrawlStatus(id?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse> {
|
|
504
|
+
if (!id) {
|
|
505
|
+
throw new Error("No crawl ID provided");
|
|
506
|
+
}
|
|
507
|
+
|
|
284
508
|
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
|
285
509
|
try {
|
|
286
510
|
const response: AxiosResponse = await this.getRequest(
|
|
287
|
-
this.
|
|
511
|
+
this.version === 'v1' ?
|
|
512
|
+
`${this.apiUrl}/${this.version}/crawl/${id}` :
|
|
513
|
+
`${this.apiUrl}/${this.version}/crawl/status/${id}`,
|
|
288
514
|
headers
|
|
289
515
|
);
|
|
290
516
|
if (response.status === 200) {
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
517
|
+
if (this.version === 'v0') {
|
|
518
|
+
return ({
|
|
519
|
+
success: true,
|
|
520
|
+
status: response.data.status,
|
|
521
|
+
current: response.data.current,
|
|
522
|
+
current_url: response.data.current_url,
|
|
523
|
+
current_step: response.data.current_step,
|
|
524
|
+
total: response.data.total,
|
|
525
|
+
data: response.data.data,
|
|
526
|
+
partial_data: !response.data.data
|
|
527
|
+
? response.data.partial_data
|
|
528
|
+
: undefined,
|
|
529
|
+
} as CrawlStatusResponseV0) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
|
|
530
|
+
} else {
|
|
531
|
+
return ({
|
|
532
|
+
success: true,
|
|
533
|
+
status: response.data.status,
|
|
534
|
+
total: response.data.total,
|
|
535
|
+
completed: response.data.completed,
|
|
536
|
+
creditsUsed: response.data.creditsUsed,
|
|
537
|
+
expiresAt: new Date(response.data.expiresAt),
|
|
538
|
+
next: response.data.next,
|
|
539
|
+
data: response.data.data,
|
|
540
|
+
error: response.data.error
|
|
541
|
+
} as CrawlStatusResponse) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
|
|
542
|
+
}
|
|
303
543
|
} else {
|
|
304
544
|
this.handleError(response, "check crawl status");
|
|
305
545
|
}
|
|
306
546
|
} catch (error: any) {
|
|
307
547
|
throw new Error(error.message);
|
|
308
548
|
}
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
549
|
+
|
|
550
|
+
return this.version === 'v0' ?
|
|
551
|
+
({
|
|
552
|
+
success: false,
|
|
553
|
+
status: "unknown",
|
|
554
|
+
current: 0,
|
|
555
|
+
current_url: "",
|
|
556
|
+
current_step: "",
|
|
557
|
+
total: 0,
|
|
558
|
+
error: "Internal server error.",
|
|
559
|
+
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse) :
|
|
560
|
+
({
|
|
561
|
+
success: false,
|
|
562
|
+
error: "Internal server error.",
|
|
563
|
+
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
async mapUrl(url: string, params?: MapParams): Promise<MapResponse> {
|
|
567
|
+
if (this.version == 'v0') {
|
|
568
|
+
throw new Error("Map is not supported in v0");
|
|
569
|
+
}
|
|
570
|
+
const headers = this.prepareHeaders();
|
|
571
|
+
let jsonData: { url: string } & MapParams = { url, ...params };
|
|
572
|
+
|
|
573
|
+
try {
|
|
574
|
+
const response: AxiosResponse = await this.postRequest(
|
|
575
|
+
this.apiUrl + `/${this.version}/map`,
|
|
576
|
+
jsonData,
|
|
577
|
+
headers
|
|
578
|
+
);
|
|
579
|
+
if (response.status === 200) {
|
|
580
|
+
return response.data as MapResponse;
|
|
581
|
+
} else {
|
|
582
|
+
this.handleError(response, "map");
|
|
583
|
+
}
|
|
584
|
+
} catch (error: any) {
|
|
585
|
+
throw new Error(error.message);
|
|
586
|
+
}
|
|
587
|
+
return { success: false, error: "Internal server error." } as MapResponse;
|
|
318
588
|
}
|
|
319
589
|
|
|
320
590
|
/**
|
|
321
591
|
* Prepares the headers for an API request.
|
|
322
|
-
* @
|
|
592
|
+
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
593
|
+
* @returns The prepared headers.
|
|
323
594
|
*/
|
|
324
595
|
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
|
|
325
596
|
return {
|
|
@@ -331,14 +602,14 @@ export default class FirecrawlApp {
|
|
|
331
602
|
|
|
332
603
|
/**
|
|
333
604
|
* Sends a POST request to the specified URL.
|
|
334
|
-
* @param
|
|
335
|
-
* @param
|
|
336
|
-
* @param
|
|
337
|
-
* @returns
|
|
605
|
+
* @param url - The URL to send the request to.
|
|
606
|
+
* @param data - The data to send in the request.
|
|
607
|
+
* @param headers - The headers for the request.
|
|
608
|
+
* @returns The response from the POST request.
|
|
338
609
|
*/
|
|
339
610
|
postRequest(
|
|
340
611
|
url: string,
|
|
341
|
-
data:
|
|
612
|
+
data: any,
|
|
342
613
|
headers: AxiosRequestHeaders
|
|
343
614
|
): Promise<AxiosResponse> {
|
|
344
615
|
return axios.post(url, data, { headers });
|
|
@@ -346,9 +617,9 @@ export default class FirecrawlApp {
|
|
|
346
617
|
|
|
347
618
|
/**
|
|
348
619
|
* Sends a GET request to the specified URL.
|
|
349
|
-
* @param
|
|
350
|
-
* @param
|
|
351
|
-
* @returns
|
|
620
|
+
* @param url - The URL to send the request to.
|
|
621
|
+
* @param headers - The headers for the request.
|
|
622
|
+
* @returns The response from the GET request.
|
|
352
623
|
*/
|
|
353
624
|
getRequest(
|
|
354
625
|
url: string,
|
|
@@ -359,38 +630,44 @@ export default class FirecrawlApp {
|
|
|
359
630
|
|
|
360
631
|
/**
|
|
361
632
|
* Monitors the status of a crawl job until completion or failure.
|
|
362
|
-
* @param
|
|
363
|
-
* @param
|
|
364
|
-
* @param
|
|
365
|
-
* @
|
|
633
|
+
* @param id - The ID of the crawl operation.
|
|
634
|
+
* @param headers - The headers for the request.
|
|
635
|
+
* @param checkInterval - Interval in seconds for job status checks.
|
|
636
|
+
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
|
637
|
+
* @returns The final job status or data.
|
|
366
638
|
*/
|
|
367
639
|
async monitorJobStatus(
|
|
368
|
-
|
|
640
|
+
id: string,
|
|
369
641
|
headers: AxiosRequestHeaders,
|
|
370
|
-
checkInterval: number
|
|
371
|
-
|
|
642
|
+
checkInterval: number,
|
|
643
|
+
checkUrl?: string
|
|
644
|
+
): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse> {
|
|
645
|
+
let apiUrl: string = '';
|
|
372
646
|
while (true) {
|
|
647
|
+
if (this.version === 'v1') {
|
|
648
|
+
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
|
|
649
|
+
} else if (this.version === 'v0') {
|
|
650
|
+
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
|
|
651
|
+
}
|
|
373
652
|
const statusResponse: AxiosResponse = await this.getRequest(
|
|
374
|
-
|
|
653
|
+
apiUrl,
|
|
375
654
|
headers
|
|
376
655
|
);
|
|
377
656
|
if (statusResponse.status === 200) {
|
|
378
657
|
const statusData = statusResponse.data;
|
|
379
658
|
if (statusData.status === "completed") {
|
|
380
659
|
if ("data" in statusData) {
|
|
381
|
-
return statusData.data;
|
|
660
|
+
return this.version === 'v0' ? statusData.data : statusData;
|
|
382
661
|
} else {
|
|
383
662
|
throw new Error("Crawl job completed but no data was returned");
|
|
384
663
|
}
|
|
385
664
|
} else if (
|
|
386
|
-
["active", "paused", "pending", "queued"].includes(statusData.status)
|
|
665
|
+
["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)
|
|
387
666
|
) {
|
|
388
|
-
|
|
389
|
-
checkInterval = 2;
|
|
390
|
-
}
|
|
667
|
+
checkInterval = Math.max(checkInterval, 2);
|
|
391
668
|
await new Promise((resolve) =>
|
|
392
669
|
setTimeout(resolve, checkInterval * 1000)
|
|
393
|
-
);
|
|
670
|
+
);
|
|
394
671
|
} else {
|
|
395
672
|
throw new Error(
|
|
396
673
|
`Crawl job failed or was stopped. Status: ${statusData.status}`
|