@mendable/firecrawl-js 1.0.4 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -9
- package/build/cjs/index.js +131 -124
- package/build/esm/index.js +129 -124
- package/package.json +3 -1
- package/src/__tests__/e2e_withAuth/index.test.ts +0 -1
- package/src/index.ts +207 -298
- package/types/index.d.ts +48 -163
package/src/index.ts
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
|
+
import { WebSocket } from "isows";
|
|
5
|
+
import { TypedEventTarget } from "typescript-event-target";
|
|
4
6
|
|
|
5
7
|
/**
|
|
6
8
|
* Configuration interface for FirecrawlApp.
|
|
7
9
|
* @param apiKey - Optional API key for authentication.
|
|
8
10
|
* @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
|
|
9
|
-
* @param version - API version, either 'v0' or 'v1'.
|
|
10
11
|
*/
|
|
11
12
|
export interface FirecrawlAppConfig {
|
|
12
13
|
apiKey?: string | null;
|
|
13
14
|
apiUrl?: string | null;
|
|
14
|
-
version?: "v0" | "v1";
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
/**
|
|
@@ -54,17 +54,6 @@ export interface FirecrawlDocumentMetadata {
|
|
|
54
54
|
[key: string]: any; // Allows for additional metadata properties not explicitly defined.
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
-
/**
|
|
58
|
-
* Metadata for a Firecrawl document on v0.
|
|
59
|
-
* Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
|
|
60
|
-
*/
|
|
61
|
-
export interface FirecrawlDocumentMetadataV0 {
|
|
62
|
-
// Similar properties as FirecrawlDocumentMetadata with additional v0 specific adjustments
|
|
63
|
-
pageStatusCode?: number;
|
|
64
|
-
pageError?: string;
|
|
65
|
-
[key: string]: any;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
57
|
/**
|
|
69
58
|
* Document interface for Firecrawl.
|
|
70
59
|
* Represents a document retrieved or processed by Firecrawl.
|
|
@@ -76,28 +65,7 @@ export interface FirecrawlDocument {
|
|
|
76
65
|
rawHtml?: string;
|
|
77
66
|
links?: string[];
|
|
78
67
|
screenshot?: string;
|
|
79
|
-
metadata
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
/**
|
|
83
|
-
* Document interface for Firecrawl on v0.
|
|
84
|
-
* Represents a document specifically for API version v0 with additional properties.
|
|
85
|
-
*/
|
|
86
|
-
export interface FirecrawlDocumentV0 {
|
|
87
|
-
id?: string;
|
|
88
|
-
url?: string;
|
|
89
|
-
content: string;
|
|
90
|
-
markdown?: string;
|
|
91
|
-
html?: string;
|
|
92
|
-
llm_extraction?: Record<string, any>;
|
|
93
|
-
createdAt?: Date;
|
|
94
|
-
updatedAt?: Date;
|
|
95
|
-
type?: string;
|
|
96
|
-
metadata: FirecrawlDocumentMetadataV0;
|
|
97
|
-
childrenLinks?: string[];
|
|
98
|
-
provider?: string;
|
|
99
|
-
warning?: string;
|
|
100
|
-
index?: number;
|
|
68
|
+
metadata?: FirecrawlDocumentMetadata;
|
|
101
69
|
}
|
|
102
70
|
|
|
103
71
|
/**
|
|
@@ -105,38 +73,12 @@ export interface FirecrawlDocumentV0 {
|
|
|
105
73
|
* Defines the options and configurations available for scraping web content.
|
|
106
74
|
*/
|
|
107
75
|
export interface ScrapeParams {
|
|
108
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
|
|
76
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "full@scrennshot")[];
|
|
109
77
|
headers?: Record<string, string>;
|
|
110
78
|
includeTags?: string[];
|
|
111
79
|
excludeTags?: string[];
|
|
112
80
|
onlyMainContent?: boolean;
|
|
113
|
-
screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
|
|
114
|
-
waitFor?: number;
|
|
115
|
-
timeout?: number;
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* Parameters for scraping operations on v0.
|
|
120
|
-
* Includes page and extractor options specific to API version v0.
|
|
121
|
-
*/
|
|
122
|
-
export interface ScrapeParamsV0 {
|
|
123
|
-
pageOptions?: {
|
|
124
|
-
headers?: Record<string, string>;
|
|
125
|
-
includeHtml?: boolean;
|
|
126
|
-
includeRawHtml?: boolean;
|
|
127
|
-
onlyIncludeTags?: string[];
|
|
128
|
-
onlyMainContent?: boolean;
|
|
129
|
-
removeTags?: string[];
|
|
130
|
-
replaceAllPathsWithAbsolutePaths?: boolean;
|
|
131
|
-
screenshot?: boolean;
|
|
132
|
-
fullPageScreenshot?: boolean;
|
|
133
81
|
waitFor?: number;
|
|
134
|
-
};
|
|
135
|
-
extractorOptions?: {
|
|
136
|
-
mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
|
|
137
|
-
extractionPrompt?: string;
|
|
138
|
-
extractionSchema?: Record<string, any> | z.ZodSchema | any;
|
|
139
|
-
};
|
|
140
82
|
timeout?: number;
|
|
141
83
|
}
|
|
142
84
|
|
|
@@ -145,21 +87,11 @@ export interface ScrapeParamsV0 {
|
|
|
145
87
|
* Defines the structure of the response received after a scraping operation.
|
|
146
88
|
*/
|
|
147
89
|
export interface ScrapeResponse extends FirecrawlDocument {
|
|
148
|
-
success:
|
|
90
|
+
success: true;
|
|
149
91
|
warning?: string;
|
|
150
92
|
error?: string;
|
|
151
93
|
}
|
|
152
94
|
|
|
153
|
-
/**
|
|
154
|
-
* Response interface for scraping operations on v0.
|
|
155
|
-
* Similar to ScrapeResponse but tailored for responses from API version v0.
|
|
156
|
-
*/
|
|
157
|
-
export interface ScrapeResponseV0 {
|
|
158
|
-
success: boolean;
|
|
159
|
-
data?: FirecrawlDocumentV0;
|
|
160
|
-
error?: string;
|
|
161
|
-
}
|
|
162
|
-
|
|
163
95
|
/**
|
|
164
96
|
* Parameters for crawling operations.
|
|
165
97
|
* Includes options for both scraping and mapping during a crawl.
|
|
@@ -175,37 +107,6 @@ export interface CrawlParams {
|
|
|
175
107
|
scrapeOptions?: ScrapeParams;
|
|
176
108
|
}
|
|
177
109
|
|
|
178
|
-
/**
|
|
179
|
-
* Parameters for crawling operations on v0.
|
|
180
|
-
* Tailored for API version v0, includes specific options for crawling.
|
|
181
|
-
*/
|
|
182
|
-
export interface CrawlParamsV0 {
|
|
183
|
-
crawlerOptions?: {
|
|
184
|
-
includes?: string[];
|
|
185
|
-
excludes?: string[];
|
|
186
|
-
generateImgAltText?: boolean;
|
|
187
|
-
returnOnlyUrls?: boolean;
|
|
188
|
-
maxDepth?: number;
|
|
189
|
-
mode?: "default" | "fast";
|
|
190
|
-
ignoreSitemap?: boolean;
|
|
191
|
-
limit?: number;
|
|
192
|
-
allowBackwardCrawling?: boolean;
|
|
193
|
-
allowExternalContentLinks?: boolean;
|
|
194
|
-
};
|
|
195
|
-
pageOptions?: {
|
|
196
|
-
headers?: Record<string, string>;
|
|
197
|
-
includeHtml?: boolean;
|
|
198
|
-
includeRawHtml?: boolean;
|
|
199
|
-
onlyIncludeTags?: string[];
|
|
200
|
-
onlyMainContent?: boolean;
|
|
201
|
-
removeTags?: string[];
|
|
202
|
-
replaceAllPathsWithAbsolutePaths?: boolean;
|
|
203
|
-
screenshot?: boolean;
|
|
204
|
-
fullPageScreenshot?: boolean;
|
|
205
|
-
waitFor?: number;
|
|
206
|
-
};
|
|
207
|
-
}
|
|
208
|
-
|
|
209
110
|
/**
|
|
210
111
|
* Response interface for crawling operations.
|
|
211
112
|
* Defines the structure of the response received after initiating a crawl.
|
|
@@ -213,17 +114,7 @@ export interface CrawlParamsV0 {
|
|
|
213
114
|
export interface CrawlResponse {
|
|
214
115
|
id?: string;
|
|
215
116
|
url?: string;
|
|
216
|
-
success:
|
|
217
|
-
error?: string;
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
/**
|
|
221
|
-
* Response interface for crawling operations on v0.
|
|
222
|
-
* Similar to CrawlResponse but tailored for responses from API version v0.
|
|
223
|
-
*/
|
|
224
|
-
export interface CrawlResponseV0 {
|
|
225
|
-
jobId?: string;
|
|
226
|
-
success: boolean;
|
|
117
|
+
success: true;
|
|
227
118
|
error?: string;
|
|
228
119
|
}
|
|
229
120
|
|
|
@@ -232,7 +123,7 @@ export interface CrawlResponseV0 {
|
|
|
232
123
|
* Provides detailed status of a crawl job including progress and results.
|
|
233
124
|
*/
|
|
234
125
|
export interface CrawlStatusResponse {
|
|
235
|
-
success:
|
|
126
|
+
success: true;
|
|
236
127
|
total: number;
|
|
237
128
|
completed: number;
|
|
238
129
|
creditsUsed: number;
|
|
@@ -243,23 +134,6 @@ export interface CrawlStatusResponse {
|
|
|
243
134
|
error?: string;
|
|
244
135
|
}
|
|
245
136
|
|
|
246
|
-
/**
|
|
247
|
-
* Response interface for job status checks on v0.
|
|
248
|
-
* Tailored for API version v0, provides status and partial data of a crawl job.
|
|
249
|
-
*/
|
|
250
|
-
export interface CrawlStatusResponseV0 {
|
|
251
|
-
success: boolean;
|
|
252
|
-
status: string;
|
|
253
|
-
current?: number;
|
|
254
|
-
current_url?: string;
|
|
255
|
-
current_step?: string;
|
|
256
|
-
total?: number;
|
|
257
|
-
data?: FirecrawlDocumentV0[];
|
|
258
|
-
partial_data?: FirecrawlDocumentV0[];
|
|
259
|
-
error?: string;
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
|
|
263
137
|
/**
|
|
264
138
|
* Parameters for mapping operations.
|
|
265
139
|
* Defines options for mapping URLs during a crawl.
|
|
@@ -276,57 +150,35 @@ export interface MapParams {
|
|
|
276
150
|
* Defines the structure of the response received after a mapping operation.
|
|
277
151
|
*/
|
|
278
152
|
export interface MapResponse {
|
|
279
|
-
success:
|
|
153
|
+
success: true;
|
|
280
154
|
links?: string[];
|
|
281
155
|
error?: string;
|
|
282
156
|
}
|
|
283
157
|
|
|
284
158
|
/**
|
|
285
|
-
*
|
|
286
|
-
*
|
|
287
|
-
*/
|
|
288
|
-
export interface SearchParamsV0 {
|
|
289
|
-
pageOptions?: {
|
|
290
|
-
onlyMainContent?: boolean;
|
|
291
|
-
fetchPageContent?: boolean;
|
|
292
|
-
includeHtml?: boolean;
|
|
293
|
-
includeRawHtml?: boolean;
|
|
294
|
-
};
|
|
295
|
-
searchOptions?: {
|
|
296
|
-
limit?: number;
|
|
297
|
-
};
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
/**
|
|
301
|
-
* Response interface for searching operations on v0.
|
|
302
|
-
* Defines the structure of the response received after a search operation on v0.
|
|
159
|
+
* Error response interface.
|
|
160
|
+
* Defines the structure of the response received when an error occurs.
|
|
303
161
|
*/
|
|
304
|
-
export interface
|
|
305
|
-
success:
|
|
306
|
-
|
|
307
|
-
error?: string;
|
|
162
|
+
export interface ErrorResponse {
|
|
163
|
+
success: false;
|
|
164
|
+
error: string;
|
|
308
165
|
}
|
|
309
166
|
|
|
310
167
|
/**
|
|
311
168
|
* Main class for interacting with the Firecrawl API.
|
|
312
169
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
313
170
|
*/
|
|
314
|
-
export default class FirecrawlApp
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
public version: T;
|
|
171
|
+
export default class FirecrawlApp {
|
|
172
|
+
public apiKey: string;
|
|
173
|
+
public apiUrl: string;
|
|
318
174
|
|
|
319
175
|
/**
|
|
320
176
|
* Initializes a new instance of the FirecrawlApp class.
|
|
321
177
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
322
178
|
*/
|
|
323
|
-
constructor({ apiKey = null, apiUrl = null
|
|
179
|
+
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
|
|
324
180
|
this.apiKey = apiKey || "";
|
|
325
181
|
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
326
|
-
this.version = version as T;
|
|
327
|
-
if (!this.apiKey) {
|
|
328
|
-
throw new Error("No API key provided");
|
|
329
|
-
}
|
|
330
182
|
}
|
|
331
183
|
|
|
332
184
|
/**
|
|
@@ -337,8 +189,8 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
337
189
|
*/
|
|
338
190
|
async scrapeUrl(
|
|
339
191
|
url: string,
|
|
340
|
-
params?: ScrapeParams
|
|
341
|
-
): Promise<
|
|
192
|
+
params?: ScrapeParams
|
|
193
|
+
): Promise<ScrapeResponse | ErrorResponse> {
|
|
342
194
|
const headers: AxiosRequestHeaders = {
|
|
343
195
|
"Content-Type": "application/json",
|
|
344
196
|
Authorization: `Bearer ${this.apiKey}`,
|
|
@@ -361,19 +213,19 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
361
213
|
}
|
|
362
214
|
try {
|
|
363
215
|
const response: AxiosResponse = await axios.post(
|
|
364
|
-
this.apiUrl +
|
|
216
|
+
this.apiUrl + `/v1/scrape`,
|
|
365
217
|
jsonData,
|
|
366
218
|
{ headers }
|
|
367
219
|
);
|
|
368
220
|
if (response.status === 200) {
|
|
369
221
|
const responseData = response.data;
|
|
370
222
|
if (responseData.success) {
|
|
371
|
-
return
|
|
223
|
+
return {
|
|
372
224
|
success: true,
|
|
373
225
|
warning: responseData.warning,
|
|
374
226
|
error: responseData.error,
|
|
375
227
|
...responseData.data
|
|
376
|
-
}
|
|
228
|
+
};
|
|
377
229
|
} else {
|
|
378
230
|
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
379
231
|
}
|
|
@@ -383,100 +235,75 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
383
235
|
} catch (error: any) {
|
|
384
236
|
throw new Error(error.message);
|
|
385
237
|
}
|
|
386
|
-
return { success: false, error: "Internal server error." }
|
|
238
|
+
return { success: false, error: "Internal server error." };
|
|
387
239
|
}
|
|
388
240
|
|
|
389
241
|
/**
|
|
390
|
-
*
|
|
391
|
-
* @param query - The query
|
|
392
|
-
* @param params - Additional parameters for the search
|
|
393
|
-
* @returns
|
|
242
|
+
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
|
243
|
+
* @param query - The search query string.
|
|
244
|
+
* @param params - Additional parameters for the search.
|
|
245
|
+
* @returns Throws an error advising to use version 0 of the API.
|
|
394
246
|
*/
|
|
395
247
|
async search(
|
|
396
248
|
query: string,
|
|
397
|
-
params?:
|
|
398
|
-
): Promise<
|
|
399
|
-
|
|
400
|
-
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
const headers: AxiosRequestHeaders = {
|
|
404
|
-
"Content-Type": "application/json",
|
|
405
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
406
|
-
} as AxiosRequestHeaders;
|
|
407
|
-
let jsonData: any = { query };
|
|
408
|
-
if (params) {
|
|
409
|
-
jsonData = { ...jsonData, ...params };
|
|
410
|
-
}
|
|
411
|
-
try {
|
|
412
|
-
const response: AxiosResponse = await axios.post(
|
|
413
|
-
this.apiUrl + "/v0/search",
|
|
414
|
-
jsonData,
|
|
415
|
-
{ headers }
|
|
416
|
-
);
|
|
417
|
-
if (response.status === 200) {
|
|
418
|
-
const responseData = response.data;
|
|
419
|
-
if (responseData.success) {
|
|
420
|
-
return responseData;
|
|
421
|
-
} else {
|
|
422
|
-
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
423
|
-
}
|
|
424
|
-
} else {
|
|
425
|
-
this.handleError(response, "search");
|
|
426
|
-
}
|
|
427
|
-
} catch (error: any) {
|
|
428
|
-
throw new Error(error.message);
|
|
429
|
-
}
|
|
430
|
-
return { success: false, error: "Internal server error." };
|
|
249
|
+
params?: any
|
|
250
|
+
): Promise<any> {
|
|
251
|
+
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
|
431
252
|
}
|
|
432
253
|
|
|
433
254
|
/**
|
|
434
255
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
435
256
|
* @param url - The URL to crawl.
|
|
436
257
|
* @param params - Additional parameters for the crawl request.
|
|
437
|
-
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
|
438
258
|
* @param pollInterval - Time in seconds for job status checks.
|
|
439
259
|
* @param idempotencyKey - Optional idempotency key for the request.
|
|
440
260
|
* @returns The response from the crawl operation.
|
|
441
261
|
*/
|
|
442
262
|
async crawlUrl(
|
|
443
263
|
url: string,
|
|
444
|
-
params?:
|
|
445
|
-
waitUntilDone: boolean = true,
|
|
264
|
+
params?: CrawlParams,
|
|
446
265
|
pollInterval: number = 2,
|
|
447
266
|
idempotencyKey?: string
|
|
448
|
-
): Promise<
|
|
449
|
-
this['version'] extends 'v0'
|
|
450
|
-
? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[]
|
|
451
|
-
: CrawlResponse | CrawlStatusResponse
|
|
452
|
-
> {
|
|
267
|
+
): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
453
268
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
454
269
|
let jsonData: any = { url, ...params };
|
|
455
270
|
try {
|
|
456
271
|
const response: AxiosResponse = await this.postRequest(
|
|
457
|
-
this.apiUrl +
|
|
272
|
+
this.apiUrl + `/v1/crawl`,
|
|
458
273
|
jsonData,
|
|
459
274
|
headers
|
|
460
275
|
);
|
|
461
276
|
if (response.status === 200) {
|
|
462
|
-
const id: string =
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
277
|
+
const id: string = response.data.id;
|
|
278
|
+
return this.monitorJobStatus(id, headers, pollInterval);
|
|
279
|
+
} else {
|
|
280
|
+
this.handleError(response, "start crawl job");
|
|
281
|
+
}
|
|
282
|
+
} catch (error: any) {
|
|
283
|
+
if (error.response?.data?.error) {
|
|
284
|
+
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
|
285
|
+
} else {
|
|
286
|
+
throw new Error(error.message);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
return { success: false, error: "Internal server error." };
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
async asyncCrawlUrl(
|
|
293
|
+
url: string,
|
|
294
|
+
params?: CrawlParams,
|
|
295
|
+
idempotencyKey?: string
|
|
296
|
+
): Promise<CrawlResponse | ErrorResponse> {
|
|
297
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
298
|
+
let jsonData: any = { url, ...params };
|
|
299
|
+
try {
|
|
300
|
+
const response: AxiosResponse = await this.postRequest(
|
|
301
|
+
this.apiUrl + `/v1/crawl`,
|
|
302
|
+
jsonData,
|
|
303
|
+
headers
|
|
304
|
+
);
|
|
305
|
+
if (response.status === 200) {
|
|
306
|
+
return response.data;
|
|
480
307
|
} else {
|
|
481
308
|
this.handleError(response, "start crawl job");
|
|
482
309
|
}
|
|
@@ -487,7 +314,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
487
314
|
throw new Error(error.message);
|
|
488
315
|
}
|
|
489
316
|
}
|
|
490
|
-
return { success: false, error: "Internal server error." }
|
|
317
|
+
return { success: false, error: "Internal server error." };
|
|
491
318
|
}
|
|
492
319
|
|
|
493
320
|
/**
|
|
@@ -495,7 +322,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
495
322
|
* @param id - The ID of the crawl operation.
|
|
496
323
|
* @returns The response containing the job status.
|
|
497
324
|
*/
|
|
498
|
-
async checkCrawlStatus(id?: string): Promise<
|
|
325
|
+
async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
499
326
|
if (!id) {
|
|
500
327
|
throw new Error("No crawl ID provided");
|
|
501
328
|
}
|
|
@@ -503,71 +330,52 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
503
330
|
const headers: AxiosRequestHeaders = this.prepareHeaders();
|
|
504
331
|
try {
|
|
505
332
|
const response: AxiosResponse = await this.getRequest(
|
|
506
|
-
this.
|
|
507
|
-
`${this.apiUrl}/${this.version}/crawl/${id}` :
|
|
508
|
-
`${this.apiUrl}/${this.version}/crawl/status/${id}`,
|
|
333
|
+
`${this.apiUrl}/v1/crawl/${id}`,
|
|
509
334
|
headers
|
|
510
335
|
);
|
|
511
336
|
if (response.status === 200) {
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
: undefined,
|
|
524
|
-
} as CrawlStatusResponseV0) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
|
|
525
|
-
} else {
|
|
526
|
-
return ({
|
|
527
|
-
success: true,
|
|
528
|
-
status: response.data.status,
|
|
529
|
-
total: response.data.total,
|
|
530
|
-
completed: response.data.completed,
|
|
531
|
-
creditsUsed: response.data.creditsUsed,
|
|
532
|
-
expiresAt: new Date(response.data.expiresAt),
|
|
533
|
-
next: response.data.next,
|
|
534
|
-
data: response.data.data,
|
|
535
|
-
error: response.data.error
|
|
536
|
-
} as CrawlStatusResponse) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
|
|
537
|
-
}
|
|
337
|
+
return ({
|
|
338
|
+
success: true,
|
|
339
|
+
status: response.data.status,
|
|
340
|
+
total: response.data.total,
|
|
341
|
+
completed: response.data.completed,
|
|
342
|
+
creditsUsed: response.data.creditsUsed,
|
|
343
|
+
expiresAt: new Date(response.data.expiresAt),
|
|
344
|
+
next: response.data.next,
|
|
345
|
+
data: response.data.data,
|
|
346
|
+
error: response.data.error
|
|
347
|
+
})
|
|
538
348
|
} else {
|
|
539
349
|
this.handleError(response, "check crawl status");
|
|
540
350
|
}
|
|
541
351
|
} catch (error: any) {
|
|
542
352
|
throw new Error(error.message);
|
|
543
353
|
}
|
|
544
|
-
|
|
545
|
-
return this.version === 'v0' ?
|
|
546
|
-
({
|
|
547
|
-
success: false,
|
|
548
|
-
status: "unknown",
|
|
549
|
-
current: 0,
|
|
550
|
-
current_url: "",
|
|
551
|
-
current_step: "",
|
|
552
|
-
total: 0,
|
|
553
|
-
error: "Internal server error.",
|
|
554
|
-
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse) :
|
|
555
|
-
({
|
|
556
|
-
success: false,
|
|
557
|
-
error: "Internal server error.",
|
|
558
|
-
} as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
|
|
354
|
+
return { success: false, error: "Internal server error." };
|
|
559
355
|
}
|
|
560
356
|
|
|
561
|
-
async
|
|
562
|
-
|
|
563
|
-
|
|
357
|
+
async crawlUrlAndWatch(
|
|
358
|
+
url: string,
|
|
359
|
+
params?: CrawlParams,
|
|
360
|
+
idempotencyKey?: string,
|
|
361
|
+
) {
|
|
362
|
+
const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
|
|
363
|
+
|
|
364
|
+
if (crawl.success && crawl.id) {
|
|
365
|
+
const id = crawl.id;
|
|
366
|
+
return new CrawlWatcher(id, this);
|
|
564
367
|
}
|
|
368
|
+
|
|
369
|
+
throw new Error("Crawl job failed to start");
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
|
|
565
373
|
const headers = this.prepareHeaders();
|
|
566
374
|
let jsonData: { url: string } & MapParams = { url, ...params };
|
|
567
375
|
|
|
568
376
|
try {
|
|
569
377
|
const response: AxiosResponse = await this.postRequest(
|
|
570
|
-
this.apiUrl +
|
|
378
|
+
this.apiUrl + `/v1/map`,
|
|
571
379
|
jsonData,
|
|
572
380
|
headers
|
|
573
381
|
);
|
|
@@ -579,7 +387,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
579
387
|
} catch (error: any) {
|
|
580
388
|
throw new Error(error.message);
|
|
581
389
|
}
|
|
582
|
-
return { success: false, error: "Internal server error." }
|
|
390
|
+
return { success: false, error: "Internal server error." };
|
|
583
391
|
}
|
|
584
392
|
|
|
585
393
|
/**
|
|
@@ -634,25 +442,18 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
634
442
|
async monitorJobStatus(
|
|
635
443
|
id: string,
|
|
636
444
|
headers: AxiosRequestHeaders,
|
|
637
|
-
checkInterval: number
|
|
638
|
-
|
|
639
|
-
): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse> {
|
|
640
|
-
let apiUrl: string = '';
|
|
445
|
+
checkInterval: number
|
|
446
|
+
): Promise<CrawlStatusResponse> {
|
|
641
447
|
while (true) {
|
|
642
|
-
if (this.version === 'v1') {
|
|
643
|
-
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
|
|
644
|
-
} else if (this.version === 'v0') {
|
|
645
|
-
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
|
|
646
|
-
}
|
|
647
448
|
const statusResponse: AxiosResponse = await this.getRequest(
|
|
648
|
-
apiUrl
|
|
449
|
+
`${this.apiUrl}/v1/crawl/${id}`,
|
|
649
450
|
headers
|
|
650
451
|
);
|
|
651
452
|
if (statusResponse.status === 200) {
|
|
652
453
|
const statusData = statusResponse.data;
|
|
653
454
|
if (statusData.status === "completed") {
|
|
654
455
|
if ("data" in statusData) {
|
|
655
|
-
return
|
|
456
|
+
return statusData;
|
|
656
457
|
} else {
|
|
657
458
|
throw new Error("Crawl job completed but no data was returned");
|
|
658
459
|
}
|
|
@@ -693,3 +494,111 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
|
|
|
693
494
|
}
|
|
694
495
|
}
|
|
695
496
|
}
|
|
497
|
+
|
|
498
|
+
interface CrawlWatcherEvents {
|
|
499
|
+
document: CustomEvent<FirecrawlDocument>,
|
|
500
|
+
done: CustomEvent<{
|
|
501
|
+
status: CrawlStatusResponse["status"];
|
|
502
|
+
data: FirecrawlDocument[];
|
|
503
|
+
}>,
|
|
504
|
+
error: CustomEvent<{
|
|
505
|
+
status: CrawlStatusResponse["status"],
|
|
506
|
+
data: FirecrawlDocument[],
|
|
507
|
+
error: string,
|
|
508
|
+
}>,
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
512
|
+
private ws: WebSocket;
|
|
513
|
+
public data: FirecrawlDocument[];
|
|
514
|
+
public status: CrawlStatusResponse["status"];
|
|
515
|
+
|
|
516
|
+
constructor(id: string, app: FirecrawlApp) {
|
|
517
|
+
super();
|
|
518
|
+
this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
|
|
519
|
+
this.status = "scraping";
|
|
520
|
+
this.data = [];
|
|
521
|
+
|
|
522
|
+
type ErrorMessage = {
|
|
523
|
+
type: "error",
|
|
524
|
+
error: string,
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
type CatchupMessage = {
|
|
528
|
+
type: "catchup",
|
|
529
|
+
data: CrawlStatusResponse,
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
type DocumentMessage = {
|
|
533
|
+
type: "document",
|
|
534
|
+
data: FirecrawlDocument,
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
type DoneMessage = { type: "done" }
|
|
538
|
+
|
|
539
|
+
type Message = ErrorMessage | CatchupMessage | DoneMessage | DocumentMessage;
|
|
540
|
+
|
|
541
|
+
const messageHandler = (msg: Message) => {
|
|
542
|
+
if (msg.type === "done") {
|
|
543
|
+
this.status = "completed";
|
|
544
|
+
this.dispatchTypedEvent("done", new CustomEvent("done", {
|
|
545
|
+
detail: {
|
|
546
|
+
status: this.status,
|
|
547
|
+
data: this.data,
|
|
548
|
+
},
|
|
549
|
+
}));
|
|
550
|
+
} else if (msg.type === "error") {
|
|
551
|
+
this.status = "failed";
|
|
552
|
+
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
|
553
|
+
detail: {
|
|
554
|
+
status: this.status,
|
|
555
|
+
data: this.data,
|
|
556
|
+
error: msg.error,
|
|
557
|
+
},
|
|
558
|
+
}));
|
|
559
|
+
} else if (msg.type === "catchup") {
|
|
560
|
+
this.status = msg.data.status;
|
|
561
|
+
this.data.push(...(msg.data.data ?? []));
|
|
562
|
+
for (const doc of this.data) {
|
|
563
|
+
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
|
564
|
+
detail: doc,
|
|
565
|
+
}));
|
|
566
|
+
}
|
|
567
|
+
} else if (msg.type === "document") {
|
|
568
|
+
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
|
569
|
+
detail: msg.data,
|
|
570
|
+
}));
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
this.ws.onmessage = ((ev: MessageEvent) => {
|
|
575
|
+
if (typeof ev.data !== "string") {
|
|
576
|
+
this.ws.close();
|
|
577
|
+
return;
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
const msg = JSON.parse(ev.data) as Message;
|
|
581
|
+
messageHandler(msg);
|
|
582
|
+
}).bind(this);
|
|
583
|
+
|
|
584
|
+
this.ws.onclose = ((ev: CloseEvent) => {
|
|
585
|
+
const msg = JSON.parse(ev.reason) as Message;
|
|
586
|
+
messageHandler(msg);
|
|
587
|
+
}).bind(this);
|
|
588
|
+
|
|
589
|
+
this.ws.onerror = ((_: Event) => {
|
|
590
|
+
this.status = "failed"
|
|
591
|
+
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
|
592
|
+
detail: {
|
|
593
|
+
status: this.status,
|
|
594
|
+
data: this.data,
|
|
595
|
+
error: "WebSocket error",
|
|
596
|
+
},
|
|
597
|
+
}));
|
|
598
|
+
}).bind(this);
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
close() {
|
|
602
|
+
this.ws.close();
|
|
603
|
+
}
|
|
604
|
+
}
|