@mendable/firecrawl-js 1.0.4 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -1,17 +1,17 @@
1
1
  import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
2
2
  import { z } from "zod";
3
3
  import { zodToJsonSchema } from "zod-to-json-schema";
4
+ import { WebSocket } from "isows";
5
+ import { TypedEventTarget } from "typescript-event-target";
4
6
 
5
7
  /**
6
8
  * Configuration interface for FirecrawlApp.
7
9
  * @param apiKey - Optional API key for authentication.
8
10
  * @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
9
- * @param version - API version, either 'v0' or 'v1'.
10
11
  */
11
12
  export interface FirecrawlAppConfig {
12
13
  apiKey?: string | null;
13
14
  apiUrl?: string | null;
14
- version?: "v0" | "v1";
15
15
  }
16
16
 
17
17
  /**
@@ -54,17 +54,6 @@ export interface FirecrawlDocumentMetadata {
54
54
  [key: string]: any; // Allows for additional metadata properties not explicitly defined.
55
55
  }
56
56
 
57
- /**
58
- * Metadata for a Firecrawl document on v0.
59
- * Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
60
- */
61
- export interface FirecrawlDocumentMetadataV0 {
62
- // Similar properties as FirecrawlDocumentMetadata with additional v0 specific adjustments
63
- pageStatusCode?: number;
64
- pageError?: string;
65
- [key: string]: any;
66
- }
67
-
68
57
  /**
69
58
  * Document interface for Firecrawl.
70
59
  * Represents a document retrieved or processed by Firecrawl.
@@ -75,29 +64,9 @@ export interface FirecrawlDocument {
75
64
  html?: string;
76
65
  rawHtml?: string;
77
66
  links?: string[];
67
+ extract?: Record<any, any>;
78
68
  screenshot?: string;
79
- metadata: FirecrawlDocumentMetadata;
80
- }
81
-
82
- /**
83
- * Document interface for Firecrawl on v0.
84
- * Represents a document specifically for API version v0 with additional properties.
85
- */
86
- export interface FirecrawlDocumentV0 {
87
- id?: string;
88
- url?: string;
89
- content: string;
90
- markdown?: string;
91
- html?: string;
92
- llm_extraction?: Record<string, any>;
93
- createdAt?: Date;
94
- updatedAt?: Date;
95
- type?: string;
96
- metadata: FirecrawlDocumentMetadataV0;
97
- childrenLinks?: string[];
98
- provider?: string;
99
- warning?: string;
100
- index?: number;
69
+ metadata?: FirecrawlDocumentMetadata;
101
70
  }
102
71
 
103
72
  /**
@@ -105,38 +74,17 @@ export interface FirecrawlDocumentV0 {
105
74
  * Defines the options and configurations available for scraping web content.
106
75
  */
107
76
  export interface ScrapeParams {
108
- formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
77
+ formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
109
78
  headers?: Record<string, string>;
110
79
  includeTags?: string[];
111
80
  excludeTags?: string[];
112
81
  onlyMainContent?: boolean;
113
- screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
114
- waitFor?: number;
115
- timeout?: number;
116
- }
117
-
118
- /**
119
- * Parameters for scraping operations on v0.
120
- * Includes page and extractor options specific to API version v0.
121
- */
122
- export interface ScrapeParamsV0 {
123
- pageOptions?: {
124
- headers?: Record<string, string>;
125
- includeHtml?: boolean;
126
- includeRawHtml?: boolean;
127
- onlyIncludeTags?: string[];
128
- onlyMainContent?: boolean;
129
- removeTags?: string[];
130
- replaceAllPathsWithAbsolutePaths?: boolean;
131
- screenshot?: boolean;
132
- fullPageScreenshot?: boolean;
133
- waitFor?: number;
134
- };
135
- extractorOptions?: {
136
- mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
137
- extractionPrompt?: string;
138
- extractionSchema?: Record<string, any> | z.ZodSchema | any;
82
+ extract?: {
83
+ prompt?: string;
84
+ schema?: z.ZodSchema | any;
85
+ systemPrompt?: string;
139
86
  };
87
+ waitFor?: number;
140
88
  timeout?: number;
141
89
  }
142
90
 
@@ -145,21 +93,11 @@ export interface ScrapeParamsV0 {
145
93
  * Defines the structure of the response received after a scraping operation.
146
94
  */
147
95
  export interface ScrapeResponse extends FirecrawlDocument {
148
- success: boolean;
96
+ success: true;
149
97
  warning?: string;
150
98
  error?: string;
151
99
  }
152
100
 
153
- /**
154
- * Response interface for scraping operations on v0.
155
- * Similar to ScrapeResponse but tailored for responses from API version v0.
156
- */
157
- export interface ScrapeResponseV0 {
158
- success: boolean;
159
- data?: FirecrawlDocumentV0;
160
- error?: string;
161
- }
162
-
163
101
  /**
164
102
  * Parameters for crawling operations.
165
103
  * Includes options for both scraping and mapping during a crawl.
@@ -175,37 +113,6 @@ export interface CrawlParams {
175
113
  scrapeOptions?: ScrapeParams;
176
114
  }
177
115
 
178
- /**
179
- * Parameters for crawling operations on v0.
180
- * Tailored for API version v0, includes specific options for crawling.
181
- */
182
- export interface CrawlParamsV0 {
183
- crawlerOptions?: {
184
- includes?: string[];
185
- excludes?: string[];
186
- generateImgAltText?: boolean;
187
- returnOnlyUrls?: boolean;
188
- maxDepth?: number;
189
- mode?: "default" | "fast";
190
- ignoreSitemap?: boolean;
191
- limit?: number;
192
- allowBackwardCrawling?: boolean;
193
- allowExternalContentLinks?: boolean;
194
- };
195
- pageOptions?: {
196
- headers?: Record<string, string>;
197
- includeHtml?: boolean;
198
- includeRawHtml?: boolean;
199
- onlyIncludeTags?: string[];
200
- onlyMainContent?: boolean;
201
- removeTags?: string[];
202
- replaceAllPathsWithAbsolutePaths?: boolean;
203
- screenshot?: boolean;
204
- fullPageScreenshot?: boolean;
205
- waitFor?: number;
206
- };
207
- }
208
-
209
116
  /**
210
117
  * Response interface for crawling operations.
211
118
  * Defines the structure of the response received after initiating a crawl.
@@ -213,17 +120,7 @@ export interface CrawlParamsV0 {
213
120
  export interface CrawlResponse {
214
121
  id?: string;
215
122
  url?: string;
216
- success: boolean;
217
- error?: string;
218
- }
219
-
220
- /**
221
- * Response interface for crawling operations on v0.
222
- * Similar to CrawlResponse but tailored for responses from API version v0.
223
- */
224
- export interface CrawlResponseV0 {
225
- jobId?: string;
226
- success: boolean;
123
+ success: true;
227
124
  error?: string;
228
125
  }
229
126
 
@@ -232,7 +129,7 @@ export interface CrawlResponseV0 {
232
129
  * Provides detailed status of a crawl job including progress and results.
233
130
  */
234
131
  export interface CrawlStatusResponse {
235
- success: boolean;
132
+ success: true;
236
133
  total: number;
237
134
  completed: number;
238
135
  creditsUsed: number;
@@ -243,23 +140,6 @@ export interface CrawlStatusResponse {
243
140
  error?: string;
244
141
  }
245
142
 
246
- /**
247
- * Response interface for job status checks on v0.
248
- * Tailored for API version v0, provides status and partial data of a crawl job.
249
- */
250
- export interface CrawlStatusResponseV0 {
251
- success: boolean;
252
- status: string;
253
- current?: number;
254
- current_url?: string;
255
- current_step?: string;
256
- total?: number;
257
- data?: FirecrawlDocumentV0[];
258
- partial_data?: FirecrawlDocumentV0[];
259
- error?: string;
260
- }
261
-
262
-
263
143
  /**
264
144
  * Parameters for mapping operations.
265
145
  * Defines options for mapping URLs during a crawl.
@@ -276,57 +156,35 @@ export interface MapParams {
276
156
  * Defines the structure of the response received after a mapping operation.
277
157
  */
278
158
  export interface MapResponse {
279
- success: boolean;
159
+ success: true;
280
160
  links?: string[];
281
161
  error?: string;
282
162
  }
283
163
 
284
164
  /**
285
- * Parameters for searching operations on v0.
286
- * Tailored for API version v0, includes specific options for searching content.
165
+ * Error response interface.
166
+ * Defines the structure of the response received when an error occurs.
287
167
  */
288
- export interface SearchParamsV0 {
289
- pageOptions?: {
290
- onlyMainContent?: boolean;
291
- fetchPageContent?: boolean;
292
- includeHtml?: boolean;
293
- includeRawHtml?: boolean;
294
- };
295
- searchOptions?: {
296
- limit?: number;
297
- };
298
- }
299
-
300
- /**
301
- * Response interface for searching operations on v0.
302
- * Defines the structure of the response received after a search operation on v0.
303
- */
304
- export interface SearchResponseV0 {
305
- success: boolean;
306
- data?: FirecrawlDocumentV0[];
307
- error?: string;
168
+ export interface ErrorResponse {
169
+ success: false;
170
+ error: string;
308
171
  }
309
172
 
310
173
  /**
311
174
  * Main class for interacting with the Firecrawl API.
312
175
  * Provides methods for scraping, searching, crawling, and mapping web content.
313
176
  */
314
- export default class FirecrawlApp<T extends "v0" | "v1"> {
315
- private apiKey: string;
316
- private apiUrl: string;
317
- public version: T;
177
+ export default class FirecrawlApp {
178
+ public apiKey: string;
179
+ public apiUrl: string;
318
180
 
319
181
  /**
320
182
  * Initializes a new instance of the FirecrawlApp class.
321
183
  * @param config - Configuration options for the FirecrawlApp instance.
322
184
  */
323
- constructor({ apiKey = null, apiUrl = null, version = "v1" }: FirecrawlAppConfig) {
185
+ constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
324
186
  this.apiKey = apiKey || "";
325
187
  this.apiUrl = apiUrl || "https://api.firecrawl.dev";
326
- this.version = version as T;
327
- if (!this.apiKey) {
328
- throw new Error("No API key provided");
329
- }
330
188
  }
331
189
 
332
190
  /**
@@ -337,43 +195,45 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
337
195
  */
338
196
  async scrapeUrl(
339
197
  url: string,
340
- params?: ScrapeParams | ScrapeParamsV0
341
- ): Promise<this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse> {
198
+ params?: ScrapeParams
199
+ ): Promise<ScrapeResponse | ErrorResponse> {
342
200
  const headers: AxiosRequestHeaders = {
343
201
  "Content-Type": "application/json",
344
202
  Authorization: `Bearer ${this.apiKey}`,
345
203
  } as AxiosRequestHeaders;
346
204
  let jsonData: any = { url, ...params };
347
- if (jsonData?.extractorOptions?.extractionSchema) {
348
- let schema = jsonData.extractorOptions.extractionSchema;
349
- // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
350
- if (schema instanceof z.ZodSchema) {
205
+ if (jsonData?.extract?.schema) {
206
+ let schema = jsonData.extract.schema;
207
+
208
+ // Try parsing the schema as a Zod schema
209
+ try {
351
210
  schema = zodToJsonSchema(schema);
211
+ } catch (error) {
212
+
352
213
  }
353
214
  jsonData = {
354
215
  ...jsonData,
355
- extractorOptions: {
356
- ...jsonData.extractorOptions,
357
- extractionSchema: schema,
358
- mode: jsonData.extractorOptions.mode || "llm-extraction",
216
+ extract: {
217
+ ...jsonData.extract,
218
+ schema: schema,
359
219
  },
360
220
  };
361
221
  }
362
222
  try {
363
223
  const response: AxiosResponse = await axios.post(
364
- this.apiUrl + `/${this.version}/scrape`,
224
+ this.apiUrl + `/v1/scrape`,
365
225
  jsonData,
366
226
  { headers }
367
227
  );
368
228
  if (response.status === 200) {
369
229
  const responseData = response.data;
370
230
  if (responseData.success) {
371
- return (this.version === 'v0' ? responseData as ScrapeResponseV0 : {
231
+ return {
372
232
  success: true,
373
233
  warning: responseData.warning,
374
234
  error: responseData.error,
375
235
  ...responseData.data
376
- }) as ScrapeResponse;
236
+ };
377
237
  } else {
378
238
  throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
379
239
  }
@@ -383,100 +243,47 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
383
243
  } catch (error: any) {
384
244
  throw new Error(error.message);
385
245
  }
386
- return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse;
246
+ return { success: false, error: "Internal server error." };
387
247
  }
388
248
 
389
249
  /**
390
- * Searches for a query using the Firecrawl API.
391
- * @param query - The query to search for.
392
- * @param params - Additional parameters for the search request.
393
- * @returns The response from the search operation.
250
+ * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
251
+ * @param query - The search query string.
252
+ * @param params - Additional parameters for the search.
253
+ * @returns Throws an error advising to use version 0 of the API.
394
254
  */
395
255
  async search(
396
256
  query: string,
397
- params?: SearchParamsV0
398
- ): Promise<SearchResponseV0> {
399
- if (this.version === "v1") {
400
- throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
401
- }
402
-
403
- const headers: AxiosRequestHeaders = {
404
- "Content-Type": "application/json",
405
- Authorization: `Bearer ${this.apiKey}`,
406
- } as AxiosRequestHeaders;
407
- let jsonData: any = { query };
408
- if (params) {
409
- jsonData = { ...jsonData, ...params };
410
- }
411
- try {
412
- const response: AxiosResponse = await axios.post(
413
- this.apiUrl + "/v0/search",
414
- jsonData,
415
- { headers }
416
- );
417
- if (response.status === 200) {
418
- const responseData = response.data;
419
- if (responseData.success) {
420
- return responseData;
421
- } else {
422
- throw new Error(`Failed to search. Error: ${responseData.error}`);
423
- }
424
- } else {
425
- this.handleError(response, "search");
426
- }
427
- } catch (error: any) {
428
- throw new Error(error.message);
429
- }
430
- return { success: false, error: "Internal server error." };
257
+ params?: any
258
+ ): Promise<any> {
259
+ throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
431
260
  }
432
261
 
433
262
  /**
434
263
  * Initiates a crawl job for a URL using the Firecrawl API.
435
264
  * @param url - The URL to crawl.
436
265
  * @param params - Additional parameters for the crawl request.
437
- * @param waitUntilDone - Whether to wait for the crawl job to complete.
438
266
  * @param pollInterval - Time in seconds for job status checks.
439
267
  * @param idempotencyKey - Optional idempotency key for the request.
440
268
  * @returns The response from the crawl operation.
441
269
  */
442
270
  async crawlUrl(
443
271
  url: string,
444
- params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams,
445
- waitUntilDone: boolean = true,
272
+ params?: CrawlParams,
446
273
  pollInterval: number = 2,
447
274
  idempotencyKey?: string
448
- ): Promise<
449
- this['version'] extends 'v0'
450
- ? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[]
451
- : CrawlResponse | CrawlStatusResponse
452
- > {
275
+ ): Promise<CrawlStatusResponse | ErrorResponse> {
453
276
  const headers = this.prepareHeaders(idempotencyKey);
454
277
  let jsonData: any = { url, ...params };
455
278
  try {
456
279
  const response: AxiosResponse = await this.postRequest(
457
- this.apiUrl + `/${this.version}/crawl`,
280
+ this.apiUrl + `/v1/crawl`,
458
281
  jsonData,
459
282
  headers
460
283
  );
461
284
  if (response.status === 200) {
462
- const id: string = this.version === 'v0' ? response.data.jobId : response.data.id;
463
- let checkUrl: string | undefined = undefined;
464
- if (waitUntilDone) {
465
- if (this.version === 'v1') { checkUrl = response.data.url }
466
- return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
467
- } else {
468
- if (this.version === 'v0') {
469
- return {
470
- success: true,
471
- jobId: id
472
- } as CrawlResponseV0;
473
- } else {
474
- return {
475
- success: true,
476
- id: id
477
- } as CrawlResponse;
478
- }
479
- }
285
+ const id: string = response.data.id;
286
+ return this.monitorJobStatus(id, headers, pollInterval);
480
287
  } else {
481
288
  this.handleError(response, "start crawl job");
482
289
  }
@@ -487,7 +294,35 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
487
294
  throw new Error(error.message);
488
295
  }
489
296
  }
490
- return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? CrawlResponseV0 : CrawlResponse;
297
+ return { success: false, error: "Internal server error." };
298
+ }
299
+
300
+ async asyncCrawlUrl(
301
+ url: string,
302
+ params?: CrawlParams,
303
+ idempotencyKey?: string
304
+ ): Promise<CrawlResponse | ErrorResponse> {
305
+ const headers = this.prepareHeaders(idempotencyKey);
306
+ let jsonData: any = { url, ...params };
307
+ try {
308
+ const response: AxiosResponse = await this.postRequest(
309
+ this.apiUrl + `/v1/crawl`,
310
+ jsonData,
311
+ headers
312
+ );
313
+ if (response.status === 200) {
314
+ return response.data;
315
+ } else {
316
+ this.handleError(response, "start crawl job");
317
+ }
318
+ } catch (error: any) {
319
+ if (error.response?.data?.error) {
320
+ throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
321
+ } else {
322
+ throw new Error(error.message);
323
+ }
324
+ }
325
+ return { success: false, error: "Internal server error." };
491
326
  }
492
327
 
493
328
  /**
@@ -495,7 +330,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
495
330
  * @param id - The ID of the crawl operation.
496
331
  * @returns The response containing the job status.
497
332
  */
498
- async checkCrawlStatus(id?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse> {
333
+ async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
499
334
  if (!id) {
500
335
  throw new Error("No crawl ID provided");
501
336
  }
@@ -503,71 +338,52 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
503
338
  const headers: AxiosRequestHeaders = this.prepareHeaders();
504
339
  try {
505
340
  const response: AxiosResponse = await this.getRequest(
506
- this.version === 'v1' ?
507
- `${this.apiUrl}/${this.version}/crawl/${id}` :
508
- `${this.apiUrl}/${this.version}/crawl/status/${id}`,
341
+ `${this.apiUrl}/v1/crawl/${id}`,
509
342
  headers
510
343
  );
511
344
  if (response.status === 200) {
512
- if (this.version === 'v0') {
513
- return ({
514
- success: true,
515
- status: response.data.status,
516
- current: response.data.current,
517
- current_url: response.data.current_url,
518
- current_step: response.data.current_step,
519
- total: response.data.total,
520
- data: response.data.data,
521
- partial_data: !response.data.data
522
- ? response.data.partial_data
523
- : undefined,
524
- } as CrawlStatusResponseV0) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
525
- } else {
526
- return ({
527
- success: true,
528
- status: response.data.status,
529
- total: response.data.total,
530
- completed: response.data.completed,
531
- creditsUsed: response.data.creditsUsed,
532
- expiresAt: new Date(response.data.expiresAt),
533
- next: response.data.next,
534
- data: response.data.data,
535
- error: response.data.error
536
- } as CrawlStatusResponse) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
537
- }
345
+ return ({
346
+ success: true,
347
+ status: response.data.status,
348
+ total: response.data.total,
349
+ completed: response.data.completed,
350
+ creditsUsed: response.data.creditsUsed,
351
+ expiresAt: new Date(response.data.expiresAt),
352
+ next: response.data.next,
353
+ data: response.data.data,
354
+ error: response.data.error
355
+ })
538
356
  } else {
539
357
  this.handleError(response, "check crawl status");
540
358
  }
541
359
  } catch (error: any) {
542
360
  throw new Error(error.message);
543
361
  }
544
-
545
- return this.version === 'v0' ?
546
- ({
547
- success: false,
548
- status: "unknown",
549
- current: 0,
550
- current_url: "",
551
- current_step: "",
552
- total: 0,
553
- error: "Internal server error.",
554
- } as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse) :
555
- ({
556
- success: false,
557
- error: "Internal server error.",
558
- } as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
362
+ return { success: false, error: "Internal server error." };
559
363
  }
560
364
 
561
- async mapUrl(url: string, params?: MapParams): Promise<MapResponse> {
562
- if (this.version == 'v0') {
563
- throw new Error("Map is not supported in v0");
365
+ async crawlUrlAndWatch(
366
+ url: string,
367
+ params?: CrawlParams,
368
+ idempotencyKey?: string,
369
+ ) {
370
+ const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
371
+
372
+ if (crawl.success && crawl.id) {
373
+ const id = crawl.id;
374
+ return new CrawlWatcher(id, this);
564
375
  }
376
+
377
+ throw new Error("Crawl job failed to start");
378
+ }
379
+
380
+ async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
565
381
  const headers = this.prepareHeaders();
566
382
  let jsonData: { url: string } & MapParams = { url, ...params };
567
383
 
568
384
  try {
569
385
  const response: AxiosResponse = await this.postRequest(
570
- this.apiUrl + `/${this.version}/map`,
386
+ this.apiUrl + `/v1/map`,
571
387
  jsonData,
572
388
  headers
573
389
  );
@@ -579,7 +395,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
579
395
  } catch (error: any) {
580
396
  throw new Error(error.message);
581
397
  }
582
- return { success: false, error: "Internal server error." } as MapResponse;
398
+ return { success: false, error: "Internal server error." };
583
399
  }
584
400
 
585
401
  /**
@@ -634,25 +450,18 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
634
450
  async monitorJobStatus(
635
451
  id: string,
636
452
  headers: AxiosRequestHeaders,
637
- checkInterval: number,
638
- checkUrl?: string
639
- ): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse> {
640
- let apiUrl: string = '';
453
+ checkInterval: number
454
+ ): Promise<CrawlStatusResponse> {
641
455
  while (true) {
642
- if (this.version === 'v1') {
643
- apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
644
- } else if (this.version === 'v0') {
645
- apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
646
- }
647
456
  const statusResponse: AxiosResponse = await this.getRequest(
648
- apiUrl,
457
+ `${this.apiUrl}/v1/crawl/${id}`,
649
458
  headers
650
459
  );
651
460
  if (statusResponse.status === 200) {
652
461
  const statusData = statusResponse.data;
653
462
  if (statusData.status === "completed") {
654
463
  if ("data" in statusData) {
655
- return this.version === 'v0' ? statusData.data : statusData;
464
+ return statusData;
656
465
  } else {
657
466
  throw new Error("Crawl job completed but no data was returned");
658
467
  }
@@ -693,3 +502,111 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
693
502
  }
694
503
  }
695
504
  }
505
+
506
+ interface CrawlWatcherEvents {
507
+ document: CustomEvent<FirecrawlDocument>,
508
+ done: CustomEvent<{
509
+ status: CrawlStatusResponse["status"];
510
+ data: FirecrawlDocument[];
511
+ }>,
512
+ error: CustomEvent<{
513
+ status: CrawlStatusResponse["status"],
514
+ data: FirecrawlDocument[],
515
+ error: string,
516
+ }>,
517
+ }
518
+
519
+ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
520
+ private ws: WebSocket;
521
+ public data: FirecrawlDocument[];
522
+ public status: CrawlStatusResponse["status"];
523
+
524
+ constructor(id: string, app: FirecrawlApp) {
525
+ super();
526
+ this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
527
+ this.status = "scraping";
528
+ this.data = [];
529
+
530
+ type ErrorMessage = {
531
+ type: "error",
532
+ error: string,
533
+ }
534
+
535
+ type CatchupMessage = {
536
+ type: "catchup",
537
+ data: CrawlStatusResponse,
538
+ }
539
+
540
+ type DocumentMessage = {
541
+ type: "document",
542
+ data: FirecrawlDocument,
543
+ }
544
+
545
+ type DoneMessage = { type: "done" }
546
+
547
+ type Message = ErrorMessage | CatchupMessage | DoneMessage | DocumentMessage;
548
+
549
+ const messageHandler = (msg: Message) => {
550
+ if (msg.type === "done") {
551
+ this.status = "completed";
552
+ this.dispatchTypedEvent("done", new CustomEvent("done", {
553
+ detail: {
554
+ status: this.status,
555
+ data: this.data,
556
+ },
557
+ }));
558
+ } else if (msg.type === "error") {
559
+ this.status = "failed";
560
+ this.dispatchTypedEvent("error", new CustomEvent("error", {
561
+ detail: {
562
+ status: this.status,
563
+ data: this.data,
564
+ error: msg.error,
565
+ },
566
+ }));
567
+ } else if (msg.type === "catchup") {
568
+ this.status = msg.data.status;
569
+ this.data.push(...(msg.data.data ?? []));
570
+ for (const doc of this.data) {
571
+ this.dispatchTypedEvent("document", new CustomEvent("document", {
572
+ detail: doc,
573
+ }));
574
+ }
575
+ } else if (msg.type === "document") {
576
+ this.dispatchTypedEvent("document", new CustomEvent("document", {
577
+ detail: msg.data,
578
+ }));
579
+ }
580
+ }
581
+
582
+ this.ws.onmessage = ((ev: MessageEvent) => {
583
+ if (typeof ev.data !== "string") {
584
+ this.ws.close();
585
+ return;
586
+ }
587
+
588
+ const msg = JSON.parse(ev.data) as Message;
589
+ messageHandler(msg);
590
+ }).bind(this);
591
+
592
+ this.ws.onclose = ((ev: CloseEvent) => {
593
+ const msg = JSON.parse(ev.reason) as Message;
594
+ messageHandler(msg);
595
+ }).bind(this);
596
+
597
+ this.ws.onerror = ((_: Event) => {
598
+ this.status = "failed"
599
+ this.dispatchTypedEvent("error", new CustomEvent("error", {
600
+ detail: {
601
+ status: this.status,
602
+ data: this.data,
603
+ error: "WebSocket error",
604
+ },
605
+ }));
606
+ }).bind(this);
607
+ }
608
+
609
+ close() {
610
+ this.ws.close();
611
+ }
612
+ }