@mendable/firecrawl-js 1.0.4 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -9
- package/build/cjs/index.js +140 -133
- package/build/esm/index.js +138 -133
- package/package.json +3 -1
- package/src/__tests__/e2e_withAuth/index.test.ts +0 -1
- package/src/index.ts +223 -306
- package/types/index.d.ts +54 -162
package/src/index.ts
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
|
+
import { WebSocket } from "isows";
|
|
5
|
+
import { TypedEventTarget } from "typescript-event-target";
|
|
4
6
|
|
|
5
7
|
/**
|
|
6
8
|
* Configuration interface for FirecrawlApp.
|
|
7
9
|
* @param apiKey - Optional API key for authentication.
|
|
8
10
|
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
|
|
9
|
-
* @param version - API version, either 'v0' or 'v1'.
|
|
10
11
|
*/
|
|
11
12
|
export interface FirecrawlAppConfig {
|
|
12
13
|
apiKey?: string | null;
|
|
13
14
|
apiUrl?: string | null;
|
|
14
|
-
version?: "v0" | "v1";
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
/**
|
|
@@ -54,17 +54,6 @@ export interface FirecrawlDocumentMetadata {
|
|
|
54
54
|
[key: string]: any; // Allows for additional metadata properties not explicitly defined.
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
-
/**
|
|
58
|
-
* Metadata for a Firecrawl document on v0.
|
|
59
|
-
* Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
|
|
60
|
-
*/
|
|
61
|
-
export interface FirecrawlDocumentMetadataV0 {
|
|
62
|
-
// Similar properties as FirecrawlDocumentMetadata with additional v0 specific adjustments
|
|
63
|
-
pageStatusCode?: number;
|
|
64
|
-
pageError?: string;
|
|
65
|
-
[key: string]: any;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
57
|
/**
|
|
69
58
|
* Document interface for Firecrawl.
|
|
70
59
|
* Represents a document retrieved or processed by Firecrawl.
|
|
@@ -75,29 +64,9 @@ export interface FirecrawlDocument {
|
|
|
75
64
|
html?: string;
|
|
76
65
|
rawHtml?: string;
|
|
77
66
|
links?: string[];
|
|
67
|
+
extract?: Record<any, any>;
|
|
78
68
|
screenshot?: string;
|
|
79
|
-
metadata
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
/**
|
|
83
|
-
* Document interface for Firecrawl on v0.
|
|
84
|
-
* Represents a document specifically for API version v0 with additional properties.
|
|
85
|
-
*/
|
|
86
|
-
export interface FirecrawlDocumentV0 {
|
|
87
|
-
id?: string;
|
|
88
|
-
url?: string;
|
|
89
|
-
content: string;
|
|
90
|
-
markdown?: string;
|
|
91
|
-
html?: string;
|
|
92
|
-
llm_extraction?: Record<string, any>;
|
|
93
|
-
createdAt?: Date;
|
|
94
|
-
updatedAt?: Date;
|
|
95
|
-
type?: string;
|
|
96
|
-
metadata: FirecrawlDocumentMetadataV0;
|
|
97
|
-
childrenLinks?: string[];
|
|
98
|
-
provider?: string;
|
|
99
|
-
warning?: string;
|
|
100
|
-
index?: number;
|
|
69
|
+
metadata?: FirecrawlDocumentMetadata;
|
|
101
70
|
}
|
|
102
71
|
|
|
103
72
|
/**
|
|
@@ -105,38 +74,17 @@ export interface FirecrawlDocumentV0 {
|
|
|
105
74
|
* Defines the options and configurations available for scraping web content.
|
|
106
75
|
*/
|
|
107
76
|
export interface ScrapeParams {
|
|
108
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
|
|
77
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
|
|
109
78
|
headers?: Record<string, string>;
|
|
110
79
|
includeTags?: string[];
|
|
111
80
|
excludeTags?: string[];
|
|
112
81
|
onlyMainContent?: boolean;
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* Parameters for scraping operations on v0.
|
|
120
|
-
* Includes page and extractor options specific to API version v0.
|
|
121
|
-
*/
|
|
122
|
-
export interface ScrapeParamsV0 {
|
|
123
|
-
pageOptions?: {
|
|
124
|
-
headers?: Record<string, string>;
|
|
125
|
-
includeHtml?: boolean;
|
|
126
|
-
includeRawHtml?: boolean;
|
|
127
|
-
onlyIncludeTags?: string[];
|
|
128
|
-
onlyMainContent?: boolean;
|
|
129
|
-
removeTags?: string[];
|
|
130
|
-
replaceAllPathsWithAbsolutePaths?: boolean;
|
|
131
|
-
screenshot?: boolean;
|
|
132
|
-
fullPageScreenshot?: boolean;
|
|
133
|
-
waitFor?: number;
|
|
134
|
-
};
|
|
135
|
-
extractorOptions?: {
|
|
136
|
-
mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
|
|
137
|
-
extractionPrompt?: string;
|
|
138
|
-
extractionSchema?: Record<string, any> | z.ZodSchema | any;
|
|
82
|
+
extract?: {
|
|
83
|
+
prompt?: string;
|
|
84
|
+
schema?: z.ZodSchema | any;
|
|
85
|
+
systemPrompt?: string;
|
|
139
86
|
};
|
|
87
|
+
waitFor?: number;
|
|
140
88
|
timeout?: number;
|
|
141
89
|
}
|
|
142
90
|
|
|
@@ -145,21 +93,11 @@ export interface ScrapeParamsV0 {
|
|
|
145
93
|
* Defines the structure of the response received after a scraping operation.
|
|
146
94
|
*/
|
|
147
95
|
export interface ScrapeResponse extends FirecrawlDocument {
|
|
148
|
-
success:
|
|
96
|
+
success: true;
|
|
149
97
|
warning?: string;
|
|
150
98
|
error?: string;
|
|
151
99
|
}
|
|
152
100
|
|
|
153
|
-
/**
|
|
154
|
-
* Response interface for scraping operations on v0.
|
|
155
|
-
* Similar to ScrapeResponse but tailored for responses from API version v0.
|
|
156
|
-
*/
|
|
157
|
-
export interface ScrapeResponseV0 {
|
|
158
|
-
success: boolean;
|
|
159
|
-
data?: FirecrawlDocumentV0;
|
|
160
|
-
error?: string;
|
|
161
|
-
}
|
|
162
|
-
|
|
163
101
|
/**
|
|
164
102
|
* Parameters for crawling operations.
|
|
165
103
|
* Includes options for both scraping and mapping during a crawl.
|
|
@@ -175,37 +113,6 @@ export interface CrawlParams {
|
|
|
175
113
|
scrapeOptions?: ScrapeParams;
|
|
176
114
|
}
|
|
177
115
|
|
|
178
|
-
/**
|
|
179
|
-
* Parameters for crawling operations on v0.
|
|
180
|
-
* Tailored for API version v0, includes specific options for crawling.
|
|
181
|
-
*/
|
|
182
|
-
export interface CrawlParamsV0 {
|
|
183
|
-
crawlerOptions?: {
|
|
184
|
-
includes?: string[];
|
|
185
|
-
excludes?: string[];
|
|
186
|
-
generateImgAltText?: boolean;
|
|
187
|
-
returnOnlyUrls?: boolean;
|
|
188
|
-
maxDepth?: number;
|
|
189
|
-
mode?: "default" | "fast";
|
|
190
|
-
ignoreSitemap?: boolean;
|
|
191
|
-
limit?: number;
|
|
192
|
-
allowBackwardCrawling?: boolean;
|
|
193
|
-
allowExternalContentLinks?: boolean;
|
|
194
|
-
};
|
|
195
|
-
pageOptions?: {
|
|
196
|
-
headers?: Record<string, string>;
|
|
197
|
-
includeHtml?: boolean;
|
|
198
|
-
includeRawHtml?: boolean;
|
|
199
|
-
onlyIncludeTags?: string[];
|
|
200
|
-
onlyMainContent?: boolean;
|
|
201
|
-
removeTags?: string[];
|
|
202
|
-
replaceAllPathsWithAbsolutePaths?: boolean;
|
|
203
|
-
screenshot?: boolean;
|
|
204
|
-
fullPageScreenshot?: boolean;
|
|
205
|
-
waitFor?: number;
|
|
206
|
-
};
|
|
207
|
-
}
|
|
208
|
-
|
|
209
116
|
/**
|
|
210
117
|
* Response interface for crawling operations.
|
|
211
118
|
* Defines the structure of the response received after initiating a crawl.
|
|
@@ -213,17 +120,7 @@ export interface CrawlParamsV0 {
|
|
|
213
120
|
export interface CrawlResponse {
|
|
214
121
|
id?: string;
|
|
215
122
|
url?: string;
|
|
216
|
-
success:
|
|
217
|
-
error?: string;
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
/**
|
|
221
|
-
* Response interface for crawling operations on v0.
|
|
222
|
-
* Similar to CrawlResponse but tailored for responses from API version v0.
|
|
223
|
-
*/
|
|
224
|
-
export interface CrawlResponseV0 {
|
|
225
|
-
jobId?: string;
|
|
226
|
-
success: boolean;
|
|
123
|
+
success: true;
|
|
227
124
|
error?: string;
|
|
228
125
|
}
|
|
229
126
|
|
|
@@ -232,7 +129,7 @@ export interface CrawlResponseV0 {
|
|
|
232
129
|
* Provides detailed status of a crawl job including progress and results.
|
|
233
130
|
*/
|
|
234
131
|
export interface CrawlStatusResponse {
|
|
235
|
-
success:
|
|
132
|
+
success: true;
|
|
236
133
|
total: number;
|
|
237
134
|
completed: number;
|
|
238
135
|
creditsUsed: number;
|
|
@@ -243,23 +140,6 @@ export interface CrawlStatusResponse {
|
|
|
243
140
|
error?: string;
|
|
244
141
|
}
|
|
245
142
|
|
|
246
|
-
/**
|
|
247
|
-
* Response interface for job status checks on v0.
|
|
248
|
-
* Tailored for API version v0, provides status and partial data of a crawl job.
|
|
249
|
-
*/
|
|
250
|
-
export interface CrawlStatusResponseV0 {
|
|
251
|
-
success: boolean;
|
|
252
|
-
status: string;
|
|
253
|
-
current?: number;
|
|
254
|
-
current_url?: string;
|
|
255
|
-
current_step?: string;
|
|
256
|
-
total?: number;
|
|
257
|
-
data?: FirecrawlDocumentV0[];
|
|
258
|
-
partial_data?: FirecrawlDocumentV0[];
|
|
259
|
-
error?: string;
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
|
|
263
143
|
/**
|
|
264
144
|
* Parameters for mapping operations.
|
|
265
145
|
* Defines options for mapping URLs during a crawl.
|
|
@@ -276,57 +156,35 @@ export interface MapParams {
|
|
|
276
156
|
* Defines the structure of the response received after a mapping operation.
|
|
277
157
|
*/
|
|
278
158
|
export interface MapResponse {
|
|
279
|
-
success:
|
|
159
|
+
success: true;
|
|
280
160
|
links?: string[];
|
|
281
161
|
error?: string;
|
|
282
162
|
}
|
|
283
163
|
|
|
284
164
|
/**
|
|
285
|
-
*
|
|
286
|
-
*
|
|
165
|
+
* Error response interface.
|
|
166
|
+
* Defines the structure of the response received when an error occurs.
|
|
287
167
|
*/
|
|
288
|
-
export interface
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
fetchPageContent?: boolean;
|
|
292
|
-
includeHtml?: boolean;
|
|
293
|
-
includeRawHtml?: boolean;
|
|
294
|
-
};
|
|
295
|
-
searchOptions?: {
|
|
296
|
-
limit?: number;
|
|
297
|
-
};
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
/**
|
|
301
|
-
* Response interface for searching operations on v0.
|
|
302
|
-
* Defines the structure of the response received after a search operation on v0.
|
|
303
|
-
*/
|
|
304
|
-
export interface SearchResponseV0 {
|
|
305
|
-
success: boolean;
|
|
306
|
-
data?: FirecrawlDocumentV0[];
|
|
307
|
-
error?: string;
|
|
168
|
+
export interface ErrorResponse {
|
|
169
|
+
success: false;
|
|
170
|
+
error: string;
|
|
308
171
|
}
|
|
309
172
|
|
|
310
173
|
/**
|
|
311
174
|
* Main class for interacting with the Firecrawl API.
|
|
312
175
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
313
176
|
*/
|
|
314
|
-
export default class FirecrawlApp
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
public version: T;
|
|
177
|
+
export default class FirecrawlApp {
|
|
178
|
+
public apiKey: string;
|
|
179
|
+
public apiUrl: string;
|
|
318
180
|
|
|
319
181
|
/**
|
|
320
182
|
* Initializes a new instance of the FirecrawlApp class.
|
|
321
183
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
322
184
|
*/
|
|
323
|
-
constructor({ apiKey = null, apiUrl = null
|
|
185
|
+
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
|
|
324
186
|
this.apiKey = apiKey || "";
|
|
325
187
|
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
326
|
-
this.version = version as T;
|
|
327
|
-
if (!this.apiKey) {
|
|
328
|
-
throw new Error("No API key provided");
|
|
329
|
-
}
|
|
330
188
|
}
|
|
331
189
|
|
|
332
190
|
/**
|
|
@@ -337,43 +195,45 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
337
195
|
*/
|
|
338
196
|
async scrapeUrl(
|
|
339
197
|
url: string,
|
|
340
|
-
params?: ScrapeParams
|
|
341
|
-
): Promise<
|
|
198
|
+
params?: ScrapeParams
|
|
199
|
+
): Promise<ScrapeResponse | ErrorResponse> {
|
|
342
200
|
const headers: AxiosRequestHeaders = {
|
|
343
201
|
"Content-Type": "application/json",
|
|
344
202
|
Authorization: `Bearer ${this.apiKey}`,
|
|
345
203
|
} as AxiosRequestHeaders;
|
|
346
204
|
let jsonData: any = { url, ...params };
|
|
347
|
-
if (jsonData?.
|
|
348
|
-
let schema = jsonData.
|
|
349
|
-
|
|
350
|
-
|
|
205
|
+
if (jsonData?.extract?.schema) {
|
|
206
|
+
let schema = jsonData.extract.schema;
|
|
207
|
+
|
|
208
|
+
// Try parsing the schema as a Zod schema
|
|
209
|
+
try {
|
|
351
210
|
schema = zodToJsonSchema(schema);
|
|
211
|
+
} catch (error) {
|
|
212
|
+
|
|
352
213
|
}
|
|
353
214
|
jsonData = {
|
|
354
215
|
...jsonData,
|
|
355
|
-
|
|
356
|
-
...jsonData.
|
|
357
|
-
|
|
358
|
-
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
|
216
|
+
extract: {
|
|
217
|
+
...jsonData.extract,
|
|
218
|
+
schema: schema,
|
|
359
219
|
},
|
|
360
220
|
};
|
|
361
221
|
}
|
|
362
222
|
try {
|
|
363
223
|
const response: AxiosResponse = await axios.post(
|
|
364
|
-
this.apiUrl +
|
|
224
|
+
this.apiUrl + `/v1/scrape`,
|
|
365
225
|
jsonData,
|
|
366
226
|
{ headers }
|
|
367
227
|
);
|
|
368
228
|
if (response.status === 200) {
|
|
369
229
|
const responseData = response.data;
|
|
370
230
|
if (responseData.success) {
|
|
371
|
-
return
|
|
231
|
+
return {
|
|
372
232
|
success: true,
|
|
373
233
|
warning: responseData.warning,
|
|
374
234
|
error: responseData.error,
|
|
375
235
|
...responseData.data
|
|
376
|
-
}
|
|
236
|
+
};
|
|
377
237
|
} else {
|
|
378
238
|
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
379
239
|
}
|
|
@@ -383,100 +243,47 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
383
243
|
} catch (error: any) {
|
|
384
244
|
throw new Error(error.message);
|
|
385
245
|
}
|
|
386
|
-
return { success: false, error: "Internal server error." }
|
|
246
|
+
return { success: false, error: "Internal server error." };
|
|
387
247
|
}
|
|
388
248
|
|
|
389
249
|
/**
|
|
390
|
-
*
|
|
391
|
-
* @param query - The query
|
|
392
|
-
* @param params - Additional parameters for the search
|
|
393
|
-
* @returns
|
|
250
|
+
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
|
251
|
+
* @param query - The search query string.
|
|
252
|
+
* @param params - Additional parameters for the search.
|
|
253
|
+
* @returns Throws an error advising to use version 0 of the API.
|
|
394
254
|
*/
|
|
395
255
|
async search(
|
|
396
256
|
query: string,
|
|
397
|
-
params?:
|
|
398
|
-
): Promise<
|
|
399
|
-
|
|
400
|
-
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
const headers: AxiosRequestHeaders = {
|
|
404
|
-
"Content-Type": "application/json",
|
|
405
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
406
|
-
} as AxiosRequestHeaders;
|
|
407
|
-
let jsonData: any = { query };
|
|
408
|
-
if (params) {
|
|
409
|
-
jsonData = { ...jsonData, ...params };
|
|
410
|
-
}
|
|
411
|
-
try {
|
|
412
|
-
const response: AxiosResponse = await axios.post(
|
|
413
|
-
this.apiUrl + "/v0/search",
|
|
414
|
-
jsonData,
|
|
415
|
-
{ headers }
|
|
416
|
-
);
|
|
417
|
-
if (response.status === 200) {
|
|
418
|
-
const responseData = response.data;
|
|
419
|
-
if (responseData.success) {
|
|
420
|
-
return responseData;
|
|
421
|
-
} else {
|
|
422
|
-
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
423
|
-
}
|
|
424
|
-
} else {
|
|
425
|
-
this.handleError(response, "search");
|
|
426
|
-
}
|
|
427
|
-
} catch (error: any) {
|
|
428
|
-
throw new Error(error.message);
|
|
429
|
-
}
|
|
430
|
-
return { success: false, error: "Internal server error." };
|
|
257
|
+
params?: any
|
|
258
|
+
): Promise<any> {
|
|
259
|
+
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
|
431
260
|
}
|
|
432
261
|
|
|
433
262
|
/**
|
|
434
263
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
435
264
|
* @param url - The URL to crawl.
|
|
436
265
|
* @param params - Additional parameters for the crawl request.
|
|
437
|
-
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
|
438
266
|
* @param pollInterval - Time in seconds for job status checks.
|
|
439
267
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
440
268
|
* @returns The response from the crawl operation.
|
|
441
269
|
*/
|
|
442
270
|
async crawlUrl(
|
|
443
271
|
url: string,
|
|
444
|
-
params?:
|
|
445
|
-
waitUntilDone: boolean = true,
|
|
272
|
+
params?: CrawlParams,
|
|
446
273
|
pollInterval: number = 2,
|
|
447
274
|
idempotencyKey?: string
|
|
448
|
-
): Promise<
|
|
449
|
-
this['version'] extends 'v0'
|
|
450
|
-
? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[]
|
|
451
|
-
: CrawlResponse | CrawlStatusResponse
|
|
452
|
-
> {
|
|
275
|
+
): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
453
276
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
454
277
|
let jsonData: any = { url, ...params };
|
|
455
278
|
try {
|
|
456
279
|
const response: AxiosResponse = await this.postRequest(
|
|
457
|
-
this.apiUrl +
|
|
280
|
+
this.apiUrl + `/v1/crawl`,
|
|
458
281
|
jsonData,
|
|
459
282
|
headers
|
|
460
283
|
);
|
|
461
284
|
if (response.status === 200) {
|
|
462
|
-
const id: string =
|
|
463
|
-
|
|
464
|
-
if (waitUntilDone) {
|
|
465
|
-
if (this.version === 'v1') { checkUrl = response.data.url }
|
|
466
|
-
return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
|
|
467
|
-
} else {
|
|
468
|
-
if (this.version === 'v0') {
|
|
469
|
-
return {
|
|
470
|
-
success: true,
|
|
471
|
-
jobId: id
|
|
472
|
-
} as CrawlResponseV0;
|
|
473
|
-
} else {
|
|
474
|
-
return {
|
|
475
|
-
success: true,
|
|
476
|
-
id: id
|
|
477
|
-
} as CrawlResponse;
|
|
478
|
-
}
|
|
479
|
-
}
|
|
285
|
+
const id: string = response.data.id;
|
|
286
|
+
return this.monitorJobStatus(id, headers, pollInterval);
|
|
480
287
|
} else {
|
|
481
288
|
this.handleError(response, "start crawl job");
|
|
482
289
|
}
|
|
@@ -487,7 +294,35 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
487
294
|
throw new Error(error.message);
|
|
488
295
|
}
|
|
489
296
|
}
|
|
490
|
-
return { success: false, error: "Internal server error." }
|
|
297
|
+
return { success: false, error: "Internal server error." };
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
async asyncCrawlUrl(
|
|
301
|
+
url: string,
|
|
302
|
+
params?: CrawlParams,
|
|
303
|
+
idempotencyKey?: string
|
|
304
|
+
): Promise<CrawlResponse | ErrorResponse> {
|
|
305
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
306
|
+
let jsonData: any = { url, ...params };
|
|
307
|
+
try {
|
|
308
|
+
const response: AxiosResponse = await this.postRequest(
|
|
309
|
+
this.apiUrl + `/v1/crawl`,
|
|
310
|
+
jsonData,
|
|
311
|
+
headers
|
|
312
|
+
);
|
|
313
|
+
if (response.status === 200) {
|
|
314
|
+
return response.data;
|
|
315
|
+
} else {
|
|
316
|
+
this.handleError(response, "start crawl job");
|
|
317
|
+
}
|
|
318
|
+
} catch (error: any) {
|
|
319
|
+
if (error.response?.data?.error) {
|
|
320
|
+
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
|
321
|
+
} else {
|
|
322
|
+
throw new Error(error.message);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
return { success: false, error: "Internal server error." };
|
|
491
326
|
}
|
|
492
327
|
|
|
493
328
|
/**
|
|
@@ -495,7 +330,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
495
330
|
* @param id - The ID of the crawl operation.
|
|
496
331
|
* @returns The response containing the job status.
|
|
497
332
|
*/
|
|
498
|
-
async checkCrawlStatus(id?: string): Promise<
|
|
333
|
+
async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
499
334
|
if (!id) {
|
|
500
335
|
throw new Error("No crawl ID provided");
|
|
501
336
|
}
|
|
@@ -503,71 +338,52 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
503
338
|
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
|
504
339
|
try {
|
|
505
340
|
const response: AxiosResponse = await this.getRequest(
|
|
506
|
-
this.
|
|
507
|
-
`${this.apiUrl}/${this.version}/crawl/${id}` :
|
|
508
|
-
`${this.apiUrl}/${this.version}/crawl/status/${id}`,
|
|
341
|
+
`${this.apiUrl}/v1/crawl/${id}`,
|
|
509
342
|
headers
|
|
510
343
|
);
|
|
511
344
|
if (response.status === 200) {
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
: undefined,
|
|
524
|
-
} as CrawlStatusResponseV0) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
|
|
525
|
-
} else {
|
|
526
|
-
return ({
|
|
527
|
-
success: true,
|
|
528
|
-
status: response.data.status,
|
|
529
|
-
total: response.data.total,
|
|
530
|
-
completed: response.data.completed,
|
|
531
|
-
creditsUsed: response.data.creditsUsed,
|
|
532
|
-
expiresAt: new Date(response.data.expiresAt),
|
|
533
|
-
next: response.data.next,
|
|
534
|
-
data: response.data.data,
|
|
535
|
-
error: response.data.error
|
|
536
|
-
} as CrawlStatusResponse) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
|
|
537
|
-
}
|
|
345
|
+
return ({
|
|
346
|
+
success: true,
|
|
347
|
+
status: response.data.status,
|
|
348
|
+
total: response.data.total,
|
|
349
|
+
completed: response.data.completed,
|
|
350
|
+
creditsUsed: response.data.creditsUsed,
|
|
351
|
+
expiresAt: new Date(response.data.expiresAt),
|
|
352
|
+
next: response.data.next,
|
|
353
|
+
data: response.data.data,
|
|
354
|
+
error: response.data.error
|
|
355
|
+
})
|
|
538
356
|
} else {
|
|
539
357
|
this.handleError(response, "check crawl status");
|
|
540
358
|
}
|
|
541
359
|
} catch (error: any) {
|
|
542
360
|
throw new Error(error.message);
|
|
543
361
|
}
|
|
544
|
-
|
|
545
|
-
return this.version === 'v0' ?
|
|
546
|
-
({
|
|
547
|
-
success: false,
|
|
548
|
-
status: "unknown",
|
|
549
|
-
current: 0,
|
|
550
|
-
current_url: "",
|
|
551
|
-
current_step: "",
|
|
552
|
-
total: 0,
|
|
553
|
-
error: "Internal server error.",
|
|
554
|
-
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse) :
|
|
555
|
-
({
|
|
556
|
-
success: false,
|
|
557
|
-
error: "Internal server error.",
|
|
558
|
-
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
|
|
362
|
+
return { success: false, error: "Internal server error." };
|
|
559
363
|
}
|
|
560
364
|
|
|
561
|
-
async
|
|
562
|
-
|
|
563
|
-
|
|
365
|
+
async crawlUrlAndWatch(
|
|
366
|
+
url: string,
|
|
367
|
+
params?: CrawlParams,
|
|
368
|
+
idempotencyKey?: string,
|
|
369
|
+
) {
|
|
370
|
+
const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
|
|
371
|
+
|
|
372
|
+
if (crawl.success && crawl.id) {
|
|
373
|
+
const id = crawl.id;
|
|
374
|
+
return new CrawlWatcher(id, this);
|
|
564
375
|
}
|
|
376
|
+
|
|
377
|
+
throw new Error("Crawl job failed to start");
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
|
|
565
381
|
const headers = this.prepareHeaders();
|
|
566
382
|
let jsonData: { url: string } & MapParams = { url, ...params };
|
|
567
383
|
|
|
568
384
|
try {
|
|
569
385
|
const response: AxiosResponse = await this.postRequest(
|
|
570
|
-
this.apiUrl +
|
|
386
|
+
this.apiUrl + `/v1/map`,
|
|
571
387
|
jsonData,
|
|
572
388
|
headers
|
|
573
389
|
);
|
|
@@ -579,7 +395,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
579
395
|
} catch (error: any) {
|
|
580
396
|
throw new Error(error.message);
|
|
581
397
|
}
|
|
582
|
-
return { success: false, error: "Internal server error." }
|
|
398
|
+
return { success: false, error: "Internal server error." };
|
|
583
399
|
}
|
|
584
400
|
|
|
585
401
|
/**
|
|
@@ -634,25 +450,18 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
634
450
|
async monitorJobStatus(
|
|
635
451
|
id: string,
|
|
636
452
|
headers: AxiosRequestHeaders,
|
|
637
|
-
checkInterval: number
|
|
638
|
-
|
|
639
|
-
): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse> {
|
|
640
|
-
let apiUrl: string = '';
|
|
453
|
+
checkInterval: number
|
|
454
|
+
): Promise<CrawlStatusResponse> {
|
|
641
455
|
while (true) {
|
|
642
|
-
if (this.version === 'v1') {
|
|
643
|
-
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
|
|
644
|
-
} else if (this.version === 'v0') {
|
|
645
|
-
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
|
|
646
|
-
}
|
|
647
456
|
const statusResponse: AxiosResponse = await this.getRequest(
|
|
648
|
-
apiUrl
|
|
457
|
+
`${this.apiUrl}/v1/crawl/${id}`,
|
|
649
458
|
headers
|
|
650
459
|
);
|
|
651
460
|
if (statusResponse.status === 200) {
|
|
652
461
|
const statusData = statusResponse.data;
|
|
653
462
|
if (statusData.status === "completed") {
|
|
654
463
|
if ("data" in statusData) {
|
|
655
|
-
return
|
|
464
|
+
return statusData;
|
|
656
465
|
} else {
|
|
657
466
|
throw new Error("Crawl job completed but no data was returned");
|
|
658
467
|
}
|
|
@@ -693,3 +502,111 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
693
502
|
}
|
|
694
503
|
}
|
|
695
504
|
}
|
|
505
|
+
|
|
506
|
+
interface CrawlWatcherEvents {
|
|
507
|
+
document: CustomEvent<FirecrawlDocument>,
|
|
508
|
+
done: CustomEvent<{
|
|
509
|
+
status: CrawlStatusResponse["status"];
|
|
510
|
+
data: FirecrawlDocument[];
|
|
511
|
+
}>,
|
|
512
|
+
error: CustomEvent<{
|
|
513
|
+
status: CrawlStatusResponse["status"],
|
|
514
|
+
data: FirecrawlDocument[],
|
|
515
|
+
error: string,
|
|
516
|
+
}>,
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
520
|
+
private ws: WebSocket;
|
|
521
|
+
public data: FirecrawlDocument[];
|
|
522
|
+
public status: CrawlStatusResponse["status"];
|
|
523
|
+
|
|
524
|
+
constructor(id: string, app: FirecrawlApp) {
|
|
525
|
+
super();
|
|
526
|
+
this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
|
|
527
|
+
this.status = "scraping";
|
|
528
|
+
this.data = [];
|
|
529
|
+
|
|
530
|
+
type ErrorMessage = {
|
|
531
|
+
type: "error",
|
|
532
|
+
error: string,
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
type CatchupMessage = {
|
|
536
|
+
type: "catchup",
|
|
537
|
+
data: CrawlStatusResponse,
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
type DocumentMessage = {
|
|
541
|
+
type: "document",
|
|
542
|
+
data: FirecrawlDocument,
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
type DoneMessage = { type: "done" }
|
|
546
|
+
|
|
547
|
+
type Message = ErrorMessage | CatchupMessage | DoneMessage | DocumentMessage;
|
|
548
|
+
|
|
549
|
+
const messageHandler = (msg: Message) => {
|
|
550
|
+
if (msg.type === "done") {
|
|
551
|
+
this.status = "completed";
|
|
552
|
+
this.dispatchTypedEvent("done", new CustomEvent("done", {
|
|
553
|
+
detail: {
|
|
554
|
+
status: this.status,
|
|
555
|
+
data: this.data,
|
|
556
|
+
},
|
|
557
|
+
}));
|
|
558
|
+
} else if (msg.type === "error") {
|
|
559
|
+
this.status = "failed";
|
|
560
|
+
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
|
561
|
+
detail: {
|
|
562
|
+
status: this.status,
|
|
563
|
+
data: this.data,
|
|
564
|
+
error: msg.error,
|
|
565
|
+
},
|
|
566
|
+
}));
|
|
567
|
+
} else if (msg.type === "catchup") {
|
|
568
|
+
this.status = msg.data.status;
|
|
569
|
+
this.data.push(...(msg.data.data ?? []));
|
|
570
|
+
for (const doc of this.data) {
|
|
571
|
+
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
|
572
|
+
detail: doc,
|
|
573
|
+
}));
|
|
574
|
+
}
|
|
575
|
+
} else if (msg.type === "document") {
|
|
576
|
+
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
|
577
|
+
detail: msg.data,
|
|
578
|
+
}));
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
this.ws.onmessage = ((ev: MessageEvent) => {
|
|
583
|
+
if (typeof ev.data !== "string") {
|
|
584
|
+
this.ws.close();
|
|
585
|
+
return;
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
const msg = JSON.parse(ev.data) as Message;
|
|
589
|
+
messageHandler(msg);
|
|
590
|
+
}).bind(this);
|
|
591
|
+
|
|
592
|
+
this.ws.onclose = ((ev: CloseEvent) => {
|
|
593
|
+
const msg = JSON.parse(ev.reason) as Message;
|
|
594
|
+
messageHandler(msg);
|
|
595
|
+
}).bind(this);
|
|
596
|
+
|
|
597
|
+
this.ws.onerror = ((_: Event) => {
|
|
598
|
+
this.status = "failed"
|
|
599
|
+
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
|
600
|
+
detail: {
|
|
601
|
+
status: this.status,
|
|
602
|
+
data: this.data,
|
|
603
|
+
error: "WebSocket error",
|
|
604
|
+
},
|
|
605
|
+
}));
|
|
606
|
+
}).bind(this);
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
close() {
|
|
610
|
+
this.ws.close();
|
|
611
|
+
}
|
|
612
|
+
}
|