@mendable/firecrawl-js 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -9
- package/build/cjs/index.js +131 -124
- package/build/esm/index.js +129 -124
- package/package.json +3 -1
- package/src/__tests__/e2e_withAuth/index.test.ts +0 -1
- package/src/index.ts +211 -305
- package/types/index.d.ts +51 -169
package/src/index.ts
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
|
+
import { WebSocket } from "isows";
|
|
5
|
+
import { TypedEventTarget } from "typescript-event-target";
|
|
4
6
|
|
|
5
7
|
/**
|
|
6
8
|
* Configuration interface for FirecrawlApp.
|
|
7
9
|
* @param apiKey - Optional API key for authentication.
|
|
8
10
|
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
|
|
9
|
-
* @param version - API version, either 'v0' or 'v1'.
|
|
10
11
|
*/
|
|
11
12
|
export interface FirecrawlAppConfig {
|
|
12
13
|
apiKey?: string | null;
|
|
13
14
|
apiUrl?: string | null;
|
|
14
|
-
version?: "v0" | "v1";
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
/**
|
|
@@ -54,17 +54,6 @@ export interface FirecrawlDocumentMetadata {
|
|
|
54
54
|
[key: string]: any; // Allows for additional metadata properties not explicitly defined.
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
-
/**
|
|
58
|
-
* Metadata for a Firecrawl document on v0.
|
|
59
|
-
* Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
|
|
60
|
-
*/
|
|
61
|
-
export interface FirecrawlDocumentMetadataV0 {
|
|
62
|
-
// Similar properties as FirecrawlDocumentMetadata with additional v0 specific adjustments
|
|
63
|
-
pageStatusCode?: number;
|
|
64
|
-
pageError?: string;
|
|
65
|
-
[key: string]: any;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
57
|
/**
|
|
69
58
|
* Document interface for Firecrawl.
|
|
70
59
|
* Represents a document retrieved or processed by Firecrawl.
|
|
@@ -76,28 +65,7 @@ export interface FirecrawlDocument {
|
|
|
76
65
|
rawHtml?: string;
|
|
77
66
|
links?: string[];
|
|
78
67
|
screenshot?: string;
|
|
79
|
-
metadata
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
/**
|
|
83
|
-
* Document interface for Firecrawl on v0.
|
|
84
|
-
* Represents a document specifically for API version v0 with additional properties.
|
|
85
|
-
*/
|
|
86
|
-
export interface FirecrawlDocumentV0 {
|
|
87
|
-
id?: string;
|
|
88
|
-
url?: string;
|
|
89
|
-
content: string;
|
|
90
|
-
markdown?: string;
|
|
91
|
-
html?: string;
|
|
92
|
-
llm_extraction?: Record<string, any>;
|
|
93
|
-
createdAt?: Date;
|
|
94
|
-
updatedAt?: Date;
|
|
95
|
-
type?: string;
|
|
96
|
-
metadata: FirecrawlDocumentMetadataV0;
|
|
97
|
-
childrenLinks?: string[];
|
|
98
|
-
provider?: string;
|
|
99
|
-
warning?: string;
|
|
100
|
-
index?: number;
|
|
68
|
+
metadata?: FirecrawlDocumentMetadata;
|
|
101
69
|
}
|
|
102
70
|
|
|
103
71
|
/**
|
|
@@ -105,38 +73,12 @@ export interface FirecrawlDocumentV0 {
|
|
|
105
73
|
* Defines the options and configurations available for scraping web content.
|
|
106
74
|
*/
|
|
107
75
|
export interface ScrapeParams {
|
|
108
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
|
|
76
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "full@scrennshot")[];
|
|
109
77
|
headers?: Record<string, string>;
|
|
110
78
|
includeTags?: string[];
|
|
111
79
|
excludeTags?: string[];
|
|
112
80
|
onlyMainContent?: boolean;
|
|
113
|
-
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
|
|
114
|
-
waitFor?: number;
|
|
115
|
-
timeout?: number;
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* Parameters for scraping operations on v0.
|
|
120
|
-
* Includes page and extractor options specific to API version v0.
|
|
121
|
-
*/
|
|
122
|
-
export interface ScrapeParamsV0 {
|
|
123
|
-
pageOptions?: {
|
|
124
|
-
headers?: Record<string, string>;
|
|
125
|
-
includeHtml?: boolean;
|
|
126
|
-
includeRawHtml?: boolean;
|
|
127
|
-
onlyIncludeTags?: string[];
|
|
128
|
-
onlyMainContent?: boolean;
|
|
129
|
-
removeTags?: string[];
|
|
130
|
-
replaceAllPathsWithAbsolutePaths?: boolean;
|
|
131
|
-
screenshot?: boolean;
|
|
132
|
-
fullPageScreenshot?: boolean;
|
|
133
81
|
waitFor?: number;
|
|
134
|
-
};
|
|
135
|
-
extractorOptions?: {
|
|
136
|
-
mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
|
|
137
|
-
extractionPrompt?: string;
|
|
138
|
-
extractionSchema?: Record<string, any> | z.ZodSchema | any;
|
|
139
|
-
};
|
|
140
82
|
timeout?: number;
|
|
141
83
|
}
|
|
142
84
|
|
|
@@ -145,21 +87,11 @@ export interface ScrapeParamsV0 {
|
|
|
145
87
|
* Defines the structure of the response received after a scraping operation.
|
|
146
88
|
*/
|
|
147
89
|
export interface ScrapeResponse extends FirecrawlDocument {
|
|
148
|
-
success:
|
|
90
|
+
success: true;
|
|
149
91
|
warning?: string;
|
|
150
92
|
error?: string;
|
|
151
93
|
}
|
|
152
94
|
|
|
153
|
-
/**
|
|
154
|
-
* Response interface for scraping operations on v0.
|
|
155
|
-
* Similar to ScrapeResponse but tailored for responses from API version v0.
|
|
156
|
-
*/
|
|
157
|
-
export interface ScrapeResponseV0 {
|
|
158
|
-
success: boolean;
|
|
159
|
-
data?: FirecrawlDocumentV0;
|
|
160
|
-
error?: string;
|
|
161
|
-
}
|
|
162
|
-
|
|
163
95
|
/**
|
|
164
96
|
* Parameters for crawling operations.
|
|
165
97
|
* Includes options for both scraping and mapping during a crawl.
|
|
@@ -175,37 +107,6 @@ export interface CrawlParams {
|
|
|
175
107
|
scrapeOptions?: ScrapeParams;
|
|
176
108
|
}
|
|
177
109
|
|
|
178
|
-
/**
|
|
179
|
-
* Parameters for crawling operations on v0.
|
|
180
|
-
* Tailored for API version v0, includes specific options for crawling.
|
|
181
|
-
*/
|
|
182
|
-
export interface CrawlParamsV0 {
|
|
183
|
-
crawlerOptions?: {
|
|
184
|
-
includes?: string[];
|
|
185
|
-
excludes?: string[];
|
|
186
|
-
generateImgAltText?: boolean;
|
|
187
|
-
returnOnlyUrls?: boolean;
|
|
188
|
-
maxDepth?: number;
|
|
189
|
-
mode?: "default" | "fast";
|
|
190
|
-
ignoreSitemap?: boolean;
|
|
191
|
-
limit?: number;
|
|
192
|
-
allowBackwardCrawling?: boolean;
|
|
193
|
-
allowExternalContentLinks?: boolean;
|
|
194
|
-
};
|
|
195
|
-
pageOptions?: {
|
|
196
|
-
headers?: Record<string, string>;
|
|
197
|
-
includeHtml?: boolean;
|
|
198
|
-
includeRawHtml?: boolean;
|
|
199
|
-
onlyIncludeTags?: string[];
|
|
200
|
-
onlyMainContent?: boolean;
|
|
201
|
-
removeTags?: string[];
|
|
202
|
-
replaceAllPathsWithAbsolutePaths?: boolean;
|
|
203
|
-
screenshot?: boolean;
|
|
204
|
-
fullPageScreenshot?: boolean;
|
|
205
|
-
waitFor?: number;
|
|
206
|
-
};
|
|
207
|
-
}
|
|
208
|
-
|
|
209
110
|
/**
|
|
210
111
|
* Response interface for crawling operations.
|
|
211
112
|
* Defines the structure of the response received after initiating a crawl.
|
|
@@ -213,17 +114,7 @@ export interface CrawlParamsV0 {
|
|
|
213
114
|
export interface CrawlResponse {
|
|
214
115
|
id?: string;
|
|
215
116
|
url?: string;
|
|
216
|
-
success:
|
|
217
|
-
error?: string;
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
/**
|
|
221
|
-
* Response interface for crawling operations on v0.
|
|
222
|
-
* Similar to CrawlResponse but tailored for responses from API version v0.
|
|
223
|
-
*/
|
|
224
|
-
export interface CrawlResponseV0 {
|
|
225
|
-
jobId?: string;
|
|
226
|
-
success: boolean;
|
|
117
|
+
success: true;
|
|
227
118
|
error?: string;
|
|
228
119
|
}
|
|
229
120
|
|
|
@@ -232,7 +123,7 @@ export interface CrawlResponseV0 {
|
|
|
232
123
|
* Provides detailed status of a crawl job including progress and results.
|
|
233
124
|
*/
|
|
234
125
|
export interface CrawlStatusResponse {
|
|
235
|
-
success:
|
|
126
|
+
success: true;
|
|
236
127
|
total: number;
|
|
237
128
|
completed: number;
|
|
238
129
|
creditsUsed: number;
|
|
@@ -243,35 +134,15 @@ export interface CrawlStatusResponse {
|
|
|
243
134
|
error?: string;
|
|
244
135
|
}
|
|
245
136
|
|
|
246
|
-
/**
|
|
247
|
-
* Response interface for job status checks on v0.
|
|
248
|
-
* Tailored for API version v0, provides status and partial data of a crawl job.
|
|
249
|
-
*/
|
|
250
|
-
export interface CrawlStatusResponseV0 {
|
|
251
|
-
success: boolean;
|
|
252
|
-
status: string;
|
|
253
|
-
current?: number;
|
|
254
|
-
current_url?: string;
|
|
255
|
-
current_step?: string;
|
|
256
|
-
total?: number;
|
|
257
|
-
data?: FirecrawlDocumentV0[];
|
|
258
|
-
partial_data?: FirecrawlDocumentV0[];
|
|
259
|
-
error?: string;
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
|
|
263
137
|
/**
|
|
264
138
|
* Parameters for mapping operations.
|
|
265
139
|
* Defines options for mapping URLs during a crawl.
|
|
266
140
|
*/
|
|
267
141
|
export interface MapParams {
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
limit?: number
|
|
272
|
-
allowBackwardLinks?: boolean
|
|
273
|
-
allowExternalLinks?: boolean
|
|
274
|
-
ignoreSitemap?: boolean
|
|
142
|
+
search?: string;
|
|
143
|
+
ignoreSitemap?: boolean;
|
|
144
|
+
includeSubdomains?: boolean;
|
|
145
|
+
limit?: number;
|
|
275
146
|
}
|
|
276
147
|
|
|
277
148
|
/**
|
|
@@ -279,57 +150,35 @@ export interface MapParams {
|
|
|
279
150
|
* Defines the structure of the response received after a mapping operation.
|
|
280
151
|
*/
|
|
281
152
|
export interface MapResponse {
|
|
282
|
-
success:
|
|
153
|
+
success: true;
|
|
283
154
|
links?: string[];
|
|
284
155
|
error?: string;
|
|
285
156
|
}
|
|
286
157
|
|
|
287
158
|
/**
|
|
288
|
-
*
|
|
289
|
-
*
|
|
159
|
+
* Error response interface.
|
|
160
|
+
* Defines the structure of the response received when an error occurs.
|
|
290
161
|
*/
|
|
291
|
-
export interface
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
fetchPageContent?: boolean;
|
|
295
|
-
includeHtml?: boolean;
|
|
296
|
-
includeRawHtml?: boolean;
|
|
297
|
-
};
|
|
298
|
-
searchOptions?: {
|
|
299
|
-
limit?: number;
|
|
300
|
-
};
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
/**
|
|
304
|
-
* Response interface for searching operations on v0.
|
|
305
|
-
* Defines the structure of the response received after a search operation on v0.
|
|
306
|
-
*/
|
|
307
|
-
export interface SearchResponseV0 {
|
|
308
|
-
success: boolean;
|
|
309
|
-
data?: FirecrawlDocumentV0[];
|
|
310
|
-
error?: string;
|
|
162
|
+
export interface ErrorResponse {
|
|
163
|
+
success: false;
|
|
164
|
+
error: string;
|
|
311
165
|
}
|
|
312
166
|
|
|
313
167
|
/**
|
|
314
168
|
* Main class for interacting with the Firecrawl API.
|
|
315
169
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
316
170
|
*/
|
|
317
|
-
export default class FirecrawlApp
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
public version: T;
|
|
171
|
+
export default class FirecrawlApp {
|
|
172
|
+
public apiKey: string;
|
|
173
|
+
public apiUrl: string;
|
|
321
174
|
|
|
322
175
|
/**
|
|
323
176
|
* Initializes a new instance of the FirecrawlApp class.
|
|
324
177
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
325
178
|
*/
|
|
326
|
-
constructor({ apiKey = null, apiUrl = null
|
|
179
|
+
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
|
|
327
180
|
this.apiKey = apiKey || "";
|
|
328
181
|
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
329
|
-
this.version = version as T;
|
|
330
|
-
if (!this.apiKey) {
|
|
331
|
-
throw new Error("No API key provided");
|
|
332
|
-
}
|
|
333
182
|
}
|
|
334
183
|
|
|
335
184
|
/**
|
|
@@ -340,8 +189,8 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
340
189
|
*/
|
|
341
190
|
async scrapeUrl(
|
|
342
191
|
url: string,
|
|
343
|
-
params?: ScrapeParams
|
|
344
|
-
): Promise<
|
|
192
|
+
params?: ScrapeParams
|
|
193
|
+
): Promise<ScrapeResponse | ErrorResponse> {
|
|
345
194
|
const headers: AxiosRequestHeaders = {
|
|
346
195
|
"Content-Type": "application/json",
|
|
347
196
|
Authorization: `Bearer ${this.apiKey}`,
|
|
@@ -364,19 +213,19 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
364
213
|
}
|
|
365
214
|
try {
|
|
366
215
|
const response: AxiosResponse = await axios.post(
|
|
367
|
-
this.apiUrl +
|
|
216
|
+
this.apiUrl + `/v1/scrape`,
|
|
368
217
|
jsonData,
|
|
369
218
|
{ headers }
|
|
370
219
|
);
|
|
371
220
|
if (response.status === 200) {
|
|
372
221
|
const responseData = response.data;
|
|
373
222
|
if (responseData.success) {
|
|
374
|
-
return
|
|
223
|
+
return {
|
|
375
224
|
success: true,
|
|
376
225
|
warning: responseData.warning,
|
|
377
226
|
error: responseData.error,
|
|
378
227
|
...responseData.data
|
|
379
|
-
}
|
|
228
|
+
};
|
|
380
229
|
} else {
|
|
381
230
|
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
382
231
|
}
|
|
@@ -386,100 +235,47 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
386
235
|
} catch (error: any) {
|
|
387
236
|
throw new Error(error.message);
|
|
388
237
|
}
|
|
389
|
-
return { success: false, error: "Internal server error." }
|
|
238
|
+
return { success: false, error: "Internal server error." };
|
|
390
239
|
}
|
|
391
240
|
|
|
392
241
|
/**
|
|
393
|
-
*
|
|
394
|
-
* @param query - The query
|
|
395
|
-
* @param params - Additional parameters for the search
|
|
396
|
-
* @returns
|
|
242
|
+
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
|
243
|
+
* @param query - The search query string.
|
|
244
|
+
* @param params - Additional parameters for the search.
|
|
245
|
+
* @returns Throws an error advising to use version 0 of the API.
|
|
397
246
|
*/
|
|
398
247
|
async search(
|
|
399
248
|
query: string,
|
|
400
|
-
params?:
|
|
401
|
-
): Promise<
|
|
402
|
-
|
|
403
|
-
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
const headers: AxiosRequestHeaders = {
|
|
407
|
-
"Content-Type": "application/json",
|
|
408
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
409
|
-
} as AxiosRequestHeaders;
|
|
410
|
-
let jsonData: any = { query };
|
|
411
|
-
if (params) {
|
|
412
|
-
jsonData = { ...jsonData, ...params };
|
|
413
|
-
}
|
|
414
|
-
try {
|
|
415
|
-
const response: AxiosResponse = await axios.post(
|
|
416
|
-
this.apiUrl + "/v0/search",
|
|
417
|
-
jsonData,
|
|
418
|
-
{ headers }
|
|
419
|
-
);
|
|
420
|
-
if (response.status === 200) {
|
|
421
|
-
const responseData = response.data;
|
|
422
|
-
if (responseData.success) {
|
|
423
|
-
return responseData;
|
|
424
|
-
} else {
|
|
425
|
-
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
426
|
-
}
|
|
427
|
-
} else {
|
|
428
|
-
this.handleError(response, "search");
|
|
429
|
-
}
|
|
430
|
-
} catch (error: any) {
|
|
431
|
-
throw new Error(error.message);
|
|
432
|
-
}
|
|
433
|
-
return { success: false, error: "Internal server error." };
|
|
249
|
+
params?: any
|
|
250
|
+
): Promise<any> {
|
|
251
|
+
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
|
434
252
|
}
|
|
435
253
|
|
|
436
254
|
/**
|
|
437
255
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
438
256
|
* @param url - The URL to crawl.
|
|
439
257
|
* @param params - Additional parameters for the crawl request.
|
|
440
|
-
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
|
441
258
|
* @param pollInterval - Time in seconds for job status checks.
|
|
442
259
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
443
260
|
* @returns The response from the crawl operation.
|
|
444
261
|
*/
|
|
445
262
|
async crawlUrl(
|
|
446
263
|
url: string,
|
|
447
|
-
params?:
|
|
448
|
-
waitUntilDone: boolean = true,
|
|
264
|
+
params?: CrawlParams,
|
|
449
265
|
pollInterval: number = 2,
|
|
450
266
|
idempotencyKey?: string
|
|
451
|
-
): Promise<
|
|
452
|
-
this['version'] extends 'v0'
|
|
453
|
-
? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[]
|
|
454
|
-
: CrawlResponse | CrawlStatusResponse
|
|
455
|
-
> {
|
|
267
|
+
): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
456
268
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
457
269
|
let jsonData: any = { url, ...params };
|
|
458
270
|
try {
|
|
459
271
|
const response: AxiosResponse = await this.postRequest(
|
|
460
|
-
this.apiUrl +
|
|
272
|
+
this.apiUrl + `/v1/crawl`,
|
|
461
273
|
jsonData,
|
|
462
274
|
headers
|
|
463
275
|
);
|
|
464
276
|
if (response.status === 200) {
|
|
465
|
-
const id: string =
|
|
466
|
-
|
|
467
|
-
if (waitUntilDone) {
|
|
468
|
-
if (this.version === 'v1') { checkUrl = response.data.url }
|
|
469
|
-
return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
|
|
470
|
-
} else {
|
|
471
|
-
if (this.version === 'v0') {
|
|
472
|
-
return {
|
|
473
|
-
success: true,
|
|
474
|
-
jobId: id
|
|
475
|
-
} as CrawlResponseV0;
|
|
476
|
-
} else {
|
|
477
|
-
return {
|
|
478
|
-
success: true,
|
|
479
|
-
id: id
|
|
480
|
-
} as CrawlResponse;
|
|
481
|
-
}
|
|
482
|
-
}
|
|
277
|
+
const id: string = response.data.id;
|
|
278
|
+
return this.monitorJobStatus(id, headers, pollInterval);
|
|
483
279
|
} else {
|
|
484
280
|
this.handleError(response, "start crawl job");
|
|
485
281
|
}
|
|
@@ -490,7 +286,35 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
490
286
|
throw new Error(error.message);
|
|
491
287
|
}
|
|
492
288
|
}
|
|
493
|
-
return { success: false, error: "Internal server error." }
|
|
289
|
+
return { success: false, error: "Internal server error." };
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
async asyncCrawlUrl(
|
|
293
|
+
url: string,
|
|
294
|
+
params?: CrawlParams,
|
|
295
|
+
idempotencyKey?: string
|
|
296
|
+
): Promise<CrawlResponse | ErrorResponse> {
|
|
297
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
298
|
+
let jsonData: any = { url, ...params };
|
|
299
|
+
try {
|
|
300
|
+
const response: AxiosResponse = await this.postRequest(
|
|
301
|
+
this.apiUrl + `/v1/crawl`,
|
|
302
|
+
jsonData,
|
|
303
|
+
headers
|
|
304
|
+
);
|
|
305
|
+
if (response.status === 200) {
|
|
306
|
+
return response.data;
|
|
307
|
+
} else {
|
|
308
|
+
this.handleError(response, "start crawl job");
|
|
309
|
+
}
|
|
310
|
+
} catch (error: any) {
|
|
311
|
+
if (error.response?.data?.error) {
|
|
312
|
+
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
|
313
|
+
} else {
|
|
314
|
+
throw new Error(error.message);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
return { success: false, error: "Internal server error." };
|
|
494
318
|
}
|
|
495
319
|
|
|
496
320
|
/**
|
|
@@ -498,7 +322,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
498
322
|
* @param id - The ID of the crawl operation.
|
|
499
323
|
* @returns The response containing the job status.
|
|
500
324
|
*/
|
|
501
|
-
async checkCrawlStatus(id?: string): Promise<
|
|
325
|
+
async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
502
326
|
if (!id) {
|
|
503
327
|
throw new Error("No crawl ID provided");
|
|
504
328
|
}
|
|
@@ -506,71 +330,52 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
506
330
|
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
|
507
331
|
try {
|
|
508
332
|
const response: AxiosResponse = await this.getRequest(
|
|
509
|
-
this.
|
|
510
|
-
`${this.apiUrl}/${this.version}/crawl/${id}` :
|
|
511
|
-
`${this.apiUrl}/${this.version}/crawl/status/${id}`,
|
|
333
|
+
`${this.apiUrl}/v1/crawl/${id}`,
|
|
512
334
|
headers
|
|
513
335
|
);
|
|
514
336
|
if (response.status === 200) {
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
: undefined,
|
|
527
|
-
} as CrawlStatusResponseV0) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
|
|
528
|
-
} else {
|
|
529
|
-
return ({
|
|
530
|
-
success: true,
|
|
531
|
-
status: response.data.status,
|
|
532
|
-
total: response.data.total,
|
|
533
|
-
completed: response.data.completed,
|
|
534
|
-
creditsUsed: response.data.creditsUsed,
|
|
535
|
-
expiresAt: new Date(response.data.expiresAt),
|
|
536
|
-
next: response.data.next,
|
|
537
|
-
data: response.data.data,
|
|
538
|
-
error: response.data.error
|
|
539
|
-
} as CrawlStatusResponse) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
|
|
540
|
-
}
|
|
337
|
+
return ({
|
|
338
|
+
success: true,
|
|
339
|
+
status: response.data.status,
|
|
340
|
+
total: response.data.total,
|
|
341
|
+
completed: response.data.completed,
|
|
342
|
+
creditsUsed: response.data.creditsUsed,
|
|
343
|
+
expiresAt: new Date(response.data.expiresAt),
|
|
344
|
+
next: response.data.next,
|
|
345
|
+
data: response.data.data,
|
|
346
|
+
error: response.data.error
|
|
347
|
+
})
|
|
541
348
|
} else {
|
|
542
349
|
this.handleError(response, "check crawl status");
|
|
543
350
|
}
|
|
544
351
|
} catch (error: any) {
|
|
545
352
|
throw new Error(error.message);
|
|
546
353
|
}
|
|
547
|
-
|
|
548
|
-
return this.version === 'v0' ?
|
|
549
|
-
({
|
|
550
|
-
success: false,
|
|
551
|
-
status: "unknown",
|
|
552
|
-
current: 0,
|
|
553
|
-
current_url: "",
|
|
554
|
-
current_step: "",
|
|
555
|
-
total: 0,
|
|
556
|
-
error: "Internal server error.",
|
|
557
|
-
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse) :
|
|
558
|
-
({
|
|
559
|
-
success: false,
|
|
560
|
-
error: "Internal server error.",
|
|
561
|
-
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
|
|
354
|
+
return { success: false, error: "Internal server error." };
|
|
562
355
|
}
|
|
563
356
|
|
|
564
|
-
async
|
|
565
|
-
|
|
566
|
-
|
|
357
|
+
async crawlUrlAndWatch(
|
|
358
|
+
url: string,
|
|
359
|
+
params?: CrawlParams,
|
|
360
|
+
idempotencyKey?: string,
|
|
361
|
+
) {
|
|
362
|
+
const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
|
|
363
|
+
|
|
364
|
+
if (crawl.success && crawl.id) {
|
|
365
|
+
const id = crawl.id;
|
|
366
|
+
return new CrawlWatcher(id, this);
|
|
567
367
|
}
|
|
368
|
+
|
|
369
|
+
throw new Error("Crawl job failed to start");
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
|
|
568
373
|
const headers = this.prepareHeaders();
|
|
569
374
|
let jsonData: { url: string } & MapParams = { url, ...params };
|
|
570
375
|
|
|
571
376
|
try {
|
|
572
377
|
const response: AxiosResponse = await this.postRequest(
|
|
573
|
-
this.apiUrl +
|
|
378
|
+
this.apiUrl + `/v1/map`,
|
|
574
379
|
jsonData,
|
|
575
380
|
headers
|
|
576
381
|
);
|
|
@@ -582,7 +387,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
582
387
|
} catch (error: any) {
|
|
583
388
|
throw new Error(error.message);
|
|
584
389
|
}
|
|
585
|
-
return { success: false, error: "Internal server error." }
|
|
390
|
+
return { success: false, error: "Internal server error." };
|
|
586
391
|
}
|
|
587
392
|
|
|
588
393
|
/**
|
|
@@ -637,25 +442,18 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
637
442
|
async monitorJobStatus(
|
|
638
443
|
id: string,
|
|
639
444
|
headers: AxiosRequestHeaders,
|
|
640
|
-
checkInterval: number
|
|
641
|
-
|
|
642
|
-
): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse> {
|
|
643
|
-
let apiUrl: string = '';
|
|
445
|
+
checkInterval: number
|
|
446
|
+
): Promise<CrawlStatusResponse> {
|
|
644
447
|
while (true) {
|
|
645
|
-
if (this.version === 'v1') {
|
|
646
|
-
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
|
|
647
|
-
} else if (this.version === 'v0') {
|
|
648
|
-
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
|
|
649
|
-
}
|
|
650
448
|
const statusResponse: AxiosResponse = await this.getRequest(
|
|
651
|
-
apiUrl
|
|
449
|
+
`${this.apiUrl}/v1/crawl/${id}`,
|
|
652
450
|
headers
|
|
653
451
|
);
|
|
654
452
|
if (statusResponse.status === 200) {
|
|
655
453
|
const statusData = statusResponse.data;
|
|
656
454
|
if (statusData.status === "completed") {
|
|
657
455
|
if ("data" in statusData) {
|
|
658
|
-
return
|
|
456
|
+
return statusData;
|
|
659
457
|
} else {
|
|
660
458
|
throw new Error("Crawl job completed but no data was returned");
|
|
661
459
|
}
|
|
@@ -696,3 +494,111 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
696
494
|
}
|
|
697
495
|
}
|
|
698
496
|
}
|
|
497
|
+
|
|
498
|
+
interface CrawlWatcherEvents {
|
|
499
|
+
document: CustomEvent<FirecrawlDocument>,
|
|
500
|
+
done: CustomEvent<{
|
|
501
|
+
status: CrawlStatusResponse["status"];
|
|
502
|
+
data: FirecrawlDocument[];
|
|
503
|
+
}>,
|
|
504
|
+
error: CustomEvent<{
|
|
505
|
+
status: CrawlStatusResponse["status"],
|
|
506
|
+
data: FirecrawlDocument[],
|
|
507
|
+
error: string,
|
|
508
|
+
}>,
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
512
|
+
private ws: WebSocket;
|
|
513
|
+
public data: FirecrawlDocument[];
|
|
514
|
+
public status: CrawlStatusResponse["status"];
|
|
515
|
+
|
|
516
|
+
constructor(id: string, app: FirecrawlApp) {
|
|
517
|
+
super();
|
|
518
|
+
this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
|
|
519
|
+
this.status = "scraping";
|
|
520
|
+
this.data = [];
|
|
521
|
+
|
|
522
|
+
type ErrorMessage = {
|
|
523
|
+
type: "error",
|
|
524
|
+
error: string,
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
type CatchupMessage = {
|
|
528
|
+
type: "catchup",
|
|
529
|
+
data: CrawlStatusResponse,
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
type DocumentMessage = {
|
|
533
|
+
type: "document",
|
|
534
|
+
data: FirecrawlDocument,
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
type DoneMessage = { type: "done" }
|
|
538
|
+
|
|
539
|
+
type Message = ErrorMessage | CatchupMessage | DoneMessage | DocumentMessage;
|
|
540
|
+
|
|
541
|
+
const messageHandler = (msg: Message) => {
|
|
542
|
+
if (msg.type === "done") {
|
|
543
|
+
this.status = "completed";
|
|
544
|
+
this.dispatchTypedEvent("done", new CustomEvent("done", {
|
|
545
|
+
detail: {
|
|
546
|
+
status: this.status,
|
|
547
|
+
data: this.data,
|
|
548
|
+
},
|
|
549
|
+
}));
|
|
550
|
+
} else if (msg.type === "error") {
|
|
551
|
+
this.status = "failed";
|
|
552
|
+
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
|
553
|
+
detail: {
|
|
554
|
+
status: this.status,
|
|
555
|
+
data: this.data,
|
|
556
|
+
error: msg.error,
|
|
557
|
+
},
|
|
558
|
+
}));
|
|
559
|
+
} else if (msg.type === "catchup") {
|
|
560
|
+
this.status = msg.data.status;
|
|
561
|
+
this.data.push(...(msg.data.data ?? []));
|
|
562
|
+
for (const doc of this.data) {
|
|
563
|
+
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
|
564
|
+
detail: doc,
|
|
565
|
+
}));
|
|
566
|
+
}
|
|
567
|
+
} else if (msg.type === "document") {
|
|
568
|
+
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
|
569
|
+
detail: msg.data,
|
|
570
|
+
}));
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
this.ws.onmessage = ((ev: MessageEvent) => {
|
|
575
|
+
if (typeof ev.data !== "string") {
|
|
576
|
+
this.ws.close();
|
|
577
|
+
return;
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
const msg = JSON.parse(ev.data) as Message;
|
|
581
|
+
messageHandler(msg);
|
|
582
|
+
}).bind(this);
|
|
583
|
+
|
|
584
|
+
this.ws.onclose = ((ev: CloseEvent) => {
|
|
585
|
+
const msg = JSON.parse(ev.reason) as Message;
|
|
586
|
+
messageHandler(msg);
|
|
587
|
+
}).bind(this);
|
|
588
|
+
|
|
589
|
+
this.ws.onerror = ((_: Event) => {
|
|
590
|
+
this.status = "failed"
|
|
591
|
+
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
|
592
|
+
detail: {
|
|
593
|
+
status: this.status,
|
|
594
|
+
data: this.data,
|
|
595
|
+
error: "WebSocket error",
|
|
596
|
+
},
|
|
597
|
+
}));
|
|
598
|
+
}).bind(this);
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
close() {
|
|
602
|
+
this.ws.close();
|
|
603
|
+
}
|
|
604
|
+
}
|