@mendable/firecrawl-js 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -1,17 +1,17 @@
1
1
  import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
2
2
  import { z } from "zod";
3
3
  import { zodToJsonSchema } from "zod-to-json-schema";
4
+ import { WebSocket } from "isows";
5
+ import { TypedEventTarget } from "typescript-event-target";
4
6
 
5
7
  /**
6
8
  * Configuration interface for FirecrawlApp.
7
9
  * @param apiKey - Optional API key for authentication.
8
10
  * @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
9
- * @param version - API version, either 'v0' or 'v1'.
10
11
  */
11
12
  export interface FirecrawlAppConfig {
12
13
  apiKey?: string | null;
13
14
  apiUrl?: string | null;
14
- version?: "v0" | "v1";
15
15
  }
16
16
 
17
17
  /**
@@ -54,17 +54,6 @@ export interface FirecrawlDocumentMetadata {
54
54
  [key: string]: any; // Allows for additional metadata properties not explicitly defined.
55
55
  }
56
56
 
57
- /**
58
- * Metadata for a Firecrawl document on v0.
59
- * Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
60
- */
61
- export interface FirecrawlDocumentMetadataV0 {
62
- // Similar properties as FirecrawlDocumentMetadata with additional v0 specific adjustments
63
- pageStatusCode?: number;
64
- pageError?: string;
65
- [key: string]: any;
66
- }
67
-
68
57
  /**
69
58
  * Document interface for Firecrawl.
70
59
  * Represents a document retrieved or processed by Firecrawl.
@@ -76,28 +65,7 @@ export interface FirecrawlDocument {
76
65
  rawHtml?: string;
77
66
  links?: string[];
78
67
  screenshot?: string;
79
- metadata: FirecrawlDocumentMetadata;
80
- }
81
-
82
- /**
83
- * Document interface for Firecrawl on v0.
84
- * Represents a document specifically for API version v0 with additional properties.
85
- */
86
- export interface FirecrawlDocumentV0 {
87
- id?: string;
88
- url?: string;
89
- content: string;
90
- markdown?: string;
91
- html?: string;
92
- llm_extraction?: Record<string, any>;
93
- createdAt?: Date;
94
- updatedAt?: Date;
95
- type?: string;
96
- metadata: FirecrawlDocumentMetadataV0;
97
- childrenLinks?: string[];
98
- provider?: string;
99
- warning?: string;
100
- index?: number;
68
+ metadata?: FirecrawlDocumentMetadata;
101
69
  }
102
70
 
103
71
  /**
@@ -105,38 +73,12 @@ export interface FirecrawlDocumentV0 {
105
73
  * Defines the options and configurations available for scraping web content.
106
74
  */
107
75
  export interface ScrapeParams {
108
- formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
76
+ formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "full@scrennshot")[];
109
77
  headers?: Record<string, string>;
110
78
  includeTags?: string[];
111
79
  excludeTags?: string[];
112
80
  onlyMainContent?: boolean;
113
- screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
114
- waitFor?: number;
115
- timeout?: number;
116
- }
117
-
118
- /**
119
- * Parameters for scraping operations on v0.
120
- * Includes page and extractor options specific to API version v0.
121
- */
122
- export interface ScrapeParamsV0 {
123
- pageOptions?: {
124
- headers?: Record<string, string>;
125
- includeHtml?: boolean;
126
- includeRawHtml?: boolean;
127
- onlyIncludeTags?: string[];
128
- onlyMainContent?: boolean;
129
- removeTags?: string[];
130
- replaceAllPathsWithAbsolutePaths?: boolean;
131
- screenshot?: boolean;
132
- fullPageScreenshot?: boolean;
133
81
  waitFor?: number;
134
- };
135
- extractorOptions?: {
136
- mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
137
- extractionPrompt?: string;
138
- extractionSchema?: Record<string, any> | z.ZodSchema | any;
139
- };
140
82
  timeout?: number;
141
83
  }
142
84
 
@@ -145,21 +87,11 @@ export interface ScrapeParamsV0 {
145
87
  * Defines the structure of the response received after a scraping operation.
146
88
  */
147
89
  export interface ScrapeResponse extends FirecrawlDocument {
148
- success: boolean;
90
+ success: true;
149
91
  warning?: string;
150
92
  error?: string;
151
93
  }
152
94
 
153
- /**
154
- * Response interface for scraping operations on v0.
155
- * Similar to ScrapeResponse but tailored for responses from API version v0.
156
- */
157
- export interface ScrapeResponseV0 {
158
- success: boolean;
159
- data?: FirecrawlDocumentV0;
160
- error?: string;
161
- }
162
-
163
95
  /**
164
96
  * Parameters for crawling operations.
165
97
  * Includes options for both scraping and mapping during a crawl.
@@ -175,37 +107,6 @@ export interface CrawlParams {
175
107
  scrapeOptions?: ScrapeParams;
176
108
  }
177
109
 
178
- /**
179
- * Parameters for crawling operations on v0.
180
- * Tailored for API version v0, includes specific options for crawling.
181
- */
182
- export interface CrawlParamsV0 {
183
- crawlerOptions?: {
184
- includes?: string[];
185
- excludes?: string[];
186
- generateImgAltText?: boolean;
187
- returnOnlyUrls?: boolean;
188
- maxDepth?: number;
189
- mode?: "default" | "fast";
190
- ignoreSitemap?: boolean;
191
- limit?: number;
192
- allowBackwardCrawling?: boolean;
193
- allowExternalContentLinks?: boolean;
194
- };
195
- pageOptions?: {
196
- headers?: Record<string, string>;
197
- includeHtml?: boolean;
198
- includeRawHtml?: boolean;
199
- onlyIncludeTags?: string[];
200
- onlyMainContent?: boolean;
201
- removeTags?: string[];
202
- replaceAllPathsWithAbsolutePaths?: boolean;
203
- screenshot?: boolean;
204
- fullPageScreenshot?: boolean;
205
- waitFor?: number;
206
- };
207
- }
208
-
209
110
  /**
210
111
  * Response interface for crawling operations.
211
112
  * Defines the structure of the response received after initiating a crawl.
@@ -213,17 +114,7 @@ export interface CrawlParamsV0 {
213
114
  export interface CrawlResponse {
214
115
  id?: string;
215
116
  url?: string;
216
- success: boolean;
217
- error?: string;
218
- }
219
-
220
- /**
221
- * Response interface for crawling operations on v0.
222
- * Similar to CrawlResponse but tailored for responses from API version v0.
223
- */
224
- export interface CrawlResponseV0 {
225
- jobId?: string;
226
- success: boolean;
117
+ success: true;
227
118
  error?: string;
228
119
  }
229
120
 
@@ -232,7 +123,7 @@ export interface CrawlResponseV0 {
232
123
  * Provides detailed status of a crawl job including progress and results.
233
124
  */
234
125
  export interface CrawlStatusResponse {
235
- success: boolean;
126
+ success: true;
236
127
  total: number;
237
128
  completed: number;
238
129
  creditsUsed: number;
@@ -243,35 +134,15 @@ export interface CrawlStatusResponse {
243
134
  error?: string;
244
135
  }
245
136
 
246
- /**
247
- * Response interface for job status checks on v0.
248
- * Tailored for API version v0, provides status and partial data of a crawl job.
249
- */
250
- export interface CrawlStatusResponseV0 {
251
- success: boolean;
252
- status: string;
253
- current?: number;
254
- current_url?: string;
255
- current_step?: string;
256
- total?: number;
257
- data?: FirecrawlDocumentV0[];
258
- partial_data?: FirecrawlDocumentV0[];
259
- error?: string;
260
- }
261
-
262
-
263
137
  /**
264
138
  * Parameters for mapping operations.
265
139
  * Defines options for mapping URLs during a crawl.
266
140
  */
267
141
  export interface MapParams {
268
- includePaths?: string[]
269
- excludePaths?: string[]
270
- maxDepth?: number
271
- limit?: number
272
- allowBackwardLinks?: boolean
273
- allowExternalLinks?: boolean
274
- ignoreSitemap?: boolean
142
+ search?: string;
143
+ ignoreSitemap?: boolean;
144
+ includeSubdomains?: boolean;
145
+ limit?: number;
275
146
  }
276
147
 
277
148
  /**
@@ -279,57 +150,35 @@ export interface MapParams {
279
150
  * Defines the structure of the response received after a mapping operation.
280
151
  */
281
152
  export interface MapResponse {
282
- success: boolean;
153
+ success: true;
283
154
  links?: string[];
284
155
  error?: string;
285
156
  }
286
157
 
287
158
  /**
288
- * Parameters for searching operations on v0.
289
- * Tailored for API version v0, includes specific options for searching content.
159
+ * Error response interface.
160
+ * Defines the structure of the response received when an error occurs.
290
161
  */
291
- export interface SearchParamsV0 {
292
- pageOptions?: {
293
- onlyMainContent?: boolean;
294
- fetchPageContent?: boolean;
295
- includeHtml?: boolean;
296
- includeRawHtml?: boolean;
297
- };
298
- searchOptions?: {
299
- limit?: number;
300
- };
301
- }
302
-
303
- /**
304
- * Response interface for searching operations on v0.
305
- * Defines the structure of the response received after a search operation on v0.
306
- */
307
- export interface SearchResponseV0 {
308
- success: boolean;
309
- data?: FirecrawlDocumentV0[];
310
- error?: string;
162
+ export interface ErrorResponse {
163
+ success: false;
164
+ error: string;
311
165
  }
312
166
 
313
167
  /**
314
168
  * Main class for interacting with the Firecrawl API.
315
169
  * Provides methods for scraping, searching, crawling, and mapping web content.
316
170
  */
317
- export default class FirecrawlApp<T extends "v0" | "v1"> {
318
- private apiKey: string;
319
- private apiUrl: string;
320
- public version: T;
171
+ export default class FirecrawlApp {
172
+ public apiKey: string;
173
+ public apiUrl: string;
321
174
 
322
175
  /**
323
176
  * Initializes a new instance of the FirecrawlApp class.
324
177
  * @param config - Configuration options for the FirecrawlApp instance.
325
178
  */
326
- constructor({ apiKey = null, apiUrl = null, version = "v1" }: FirecrawlAppConfig) {
179
+ constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
327
180
  this.apiKey = apiKey || "";
328
181
  this.apiUrl = apiUrl || "https://api.firecrawl.dev";
329
- this.version = version as T;
330
- if (!this.apiKey) {
331
- throw new Error("No API key provided");
332
- }
333
182
  }
334
183
 
335
184
  /**
@@ -340,8 +189,8 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
340
189
  */
341
190
  async scrapeUrl(
342
191
  url: string,
343
- params?: ScrapeParams | ScrapeParamsV0
344
- ): Promise<this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse> {
192
+ params?: ScrapeParams
193
+ ): Promise<ScrapeResponse | ErrorResponse> {
345
194
  const headers: AxiosRequestHeaders = {
346
195
  "Content-Type": "application/json",
347
196
  Authorization: `Bearer ${this.apiKey}`,
@@ -364,19 +213,19 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
364
213
  }
365
214
  try {
366
215
  const response: AxiosResponse = await axios.post(
367
- this.apiUrl + `/${this.version}/scrape`,
216
+ this.apiUrl + `/v1/scrape`,
368
217
  jsonData,
369
218
  { headers }
370
219
  );
371
220
  if (response.status === 200) {
372
221
  const responseData = response.data;
373
222
  if (responseData.success) {
374
- return (this.version === 'v0' ? responseData as ScrapeResponseV0 : {
223
+ return {
375
224
  success: true,
376
225
  warning: responseData.warning,
377
226
  error: responseData.error,
378
227
  ...responseData.data
379
- }) as ScrapeResponse;
228
+ };
380
229
  } else {
381
230
  throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
382
231
  }
@@ -386,100 +235,47 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
386
235
  } catch (error: any) {
387
236
  throw new Error(error.message);
388
237
  }
389
- return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse;
238
+ return { success: false, error: "Internal server error." };
390
239
  }
391
240
 
392
241
  /**
393
- * Searches for a query using the Firecrawl API.
394
- * @param query - The query to search for.
395
- * @param params - Additional parameters for the search request.
396
- * @returns The response from the search operation.
242
+ * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
243
+ * @param query - The search query string.
244
+ * @param params - Additional parameters for the search.
245
+ * @returns Throws an error advising to use version 0 of the API.
397
246
  */
398
247
  async search(
399
248
  query: string,
400
- params?: SearchParamsV0
401
- ): Promise<SearchResponseV0> {
402
- if (this.version === "v1") {
403
- throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
404
- }
405
-
406
- const headers: AxiosRequestHeaders = {
407
- "Content-Type": "application/json",
408
- Authorization: `Bearer ${this.apiKey}`,
409
- } as AxiosRequestHeaders;
410
- let jsonData: any = { query };
411
- if (params) {
412
- jsonData = { ...jsonData, ...params };
413
- }
414
- try {
415
- const response: AxiosResponse = await axios.post(
416
- this.apiUrl + "/v0/search",
417
- jsonData,
418
- { headers }
419
- );
420
- if (response.status === 200) {
421
- const responseData = response.data;
422
- if (responseData.success) {
423
- return responseData;
424
- } else {
425
- throw new Error(`Failed to search. Error: ${responseData.error}`);
426
- }
427
- } else {
428
- this.handleError(response, "search");
429
- }
430
- } catch (error: any) {
431
- throw new Error(error.message);
432
- }
433
- return { success: false, error: "Internal server error." };
249
+ params?: any
250
+ ): Promise<any> {
251
+ throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
434
252
  }
435
253
 
436
254
  /**
437
255
  * Initiates a crawl job for a URL using the Firecrawl API.
438
256
  * @param url - The URL to crawl.
439
257
  * @param params - Additional parameters for the crawl request.
440
- * @param waitUntilDone - Whether to wait for the crawl job to complete.
441
258
  * @param pollInterval - Time in seconds for job status checks.
442
259
  * @param idempotencyKey - Optional idempotency key for the request.
443
260
  * @returns The response from the crawl operation.
444
261
  */
445
262
  async crawlUrl(
446
263
  url: string,
447
- params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams,
448
- waitUntilDone: boolean = true,
264
+ params?: CrawlParams,
449
265
  pollInterval: number = 2,
450
266
  idempotencyKey?: string
451
- ): Promise<
452
- this['version'] extends 'v0'
453
- ? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[]
454
- : CrawlResponse | CrawlStatusResponse
455
- > {
267
+ ): Promise<CrawlStatusResponse | ErrorResponse> {
456
268
  const headers = this.prepareHeaders(idempotencyKey);
457
269
  let jsonData: any = { url, ...params };
458
270
  try {
459
271
  const response: AxiosResponse = await this.postRequest(
460
- this.apiUrl + `/${this.version}/crawl`,
272
+ this.apiUrl + `/v1/crawl`,
461
273
  jsonData,
462
274
  headers
463
275
  );
464
276
  if (response.status === 200) {
465
- const id: string = this.version === 'v0' ? response.data.jobId : response.data.id;
466
- let checkUrl: string | undefined = undefined;
467
- if (waitUntilDone) {
468
- if (this.version === 'v1') { checkUrl = response.data.url }
469
- return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
470
- } else {
471
- if (this.version === 'v0') {
472
- return {
473
- success: true,
474
- jobId: id
475
- } as CrawlResponseV0;
476
- } else {
477
- return {
478
- success: true,
479
- id: id
480
- } as CrawlResponse;
481
- }
482
- }
277
+ const id: string = response.data.id;
278
+ return this.monitorJobStatus(id, headers, pollInterval);
483
279
  } else {
484
280
  this.handleError(response, "start crawl job");
485
281
  }
@@ -490,7 +286,35 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
490
286
  throw new Error(error.message);
491
287
  }
492
288
  }
493
- return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? CrawlResponseV0 : CrawlResponse;
289
+ return { success: false, error: "Internal server error." };
290
+ }
291
+
292
+ async asyncCrawlUrl(
293
+ url: string,
294
+ params?: CrawlParams,
295
+ idempotencyKey?: string
296
+ ): Promise<CrawlResponse | ErrorResponse> {
297
+ const headers = this.prepareHeaders(idempotencyKey);
298
+ let jsonData: any = { url, ...params };
299
+ try {
300
+ const response: AxiosResponse = await this.postRequest(
301
+ this.apiUrl + `/v1/crawl`,
302
+ jsonData,
303
+ headers
304
+ );
305
+ if (response.status === 200) {
306
+ return response.data;
307
+ } else {
308
+ this.handleError(response, "start crawl job");
309
+ }
310
+ } catch (error: any) {
311
+ if (error.response?.data?.error) {
312
+ throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
313
+ } else {
314
+ throw new Error(error.message);
315
+ }
316
+ }
317
+ return { success: false, error: "Internal server error." };
494
318
  }
495
319
 
496
320
  /**
@@ -498,7 +322,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
498
322
  * @param id - The ID of the crawl operation.
499
323
  * @returns The response containing the job status.
500
324
  */
501
- async checkCrawlStatus(id?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse> {
325
+ async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
502
326
  if (!id) {
503
327
  throw new Error("No crawl ID provided");
504
328
  }
@@ -506,71 +330,52 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
506
330
  const headers: AxiosRequestHeaders = this.prepareHeaders();
507
331
  try {
508
332
  const response: AxiosResponse = await this.getRequest(
509
- this.version === 'v1' ?
510
- `${this.apiUrl}/${this.version}/crawl/${id}` :
511
- `${this.apiUrl}/${this.version}/crawl/status/${id}`,
333
+ `${this.apiUrl}/v1/crawl/${id}`,
512
334
  headers
513
335
  );
514
336
  if (response.status === 200) {
515
- if (this.version === 'v0') {
516
- return ({
517
- success: true,
518
- status: response.data.status,
519
- current: response.data.current,
520
- current_url: response.data.current_url,
521
- current_step: response.data.current_step,
522
- total: response.data.total,
523
- data: response.data.data,
524
- partial_data: !response.data.data
525
- ? response.data.partial_data
526
- : undefined,
527
- } as CrawlStatusResponseV0) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
528
- } else {
529
- return ({
530
- success: true,
531
- status: response.data.status,
532
- total: response.data.total,
533
- completed: response.data.completed,
534
- creditsUsed: response.data.creditsUsed,
535
- expiresAt: new Date(response.data.expiresAt),
536
- next: response.data.next,
537
- data: response.data.data,
538
- error: response.data.error
539
- } as CrawlStatusResponse) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
540
- }
337
+ return ({
338
+ success: true,
339
+ status: response.data.status,
340
+ total: response.data.total,
341
+ completed: response.data.completed,
342
+ creditsUsed: response.data.creditsUsed,
343
+ expiresAt: new Date(response.data.expiresAt),
344
+ next: response.data.next,
345
+ data: response.data.data,
346
+ error: response.data.error
347
+ })
541
348
  } else {
542
349
  this.handleError(response, "check crawl status");
543
350
  }
544
351
  } catch (error: any) {
545
352
  throw new Error(error.message);
546
353
  }
547
-
548
- return this.version === 'v0' ?
549
- ({
550
- success: false,
551
- status: "unknown",
552
- current: 0,
553
- current_url: "",
554
- current_step: "",
555
- total: 0,
556
- error: "Internal server error.",
557
- } as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse) :
558
- ({
559
- success: false,
560
- error: "Internal server error.",
561
- } as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
354
+ return { success: false, error: "Internal server error." };
562
355
  }
563
356
 
564
- async mapUrl(url: string, params?: MapParams): Promise<MapResponse> {
565
- if (this.version == 'v0') {
566
- throw new Error("Map is not supported in v0");
357
+ async crawlUrlAndWatch(
358
+ url: string,
359
+ params?: CrawlParams,
360
+ idempotencyKey?: string,
361
+ ) {
362
+ const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
363
+
364
+ if (crawl.success && crawl.id) {
365
+ const id = crawl.id;
366
+ return new CrawlWatcher(id, this);
567
367
  }
368
+
369
+ throw new Error("Crawl job failed to start");
370
+ }
371
+
372
+ async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
568
373
  const headers = this.prepareHeaders();
569
374
  let jsonData: { url: string } & MapParams = { url, ...params };
570
375
 
571
376
  try {
572
377
  const response: AxiosResponse = await this.postRequest(
573
- this.apiUrl + `/${this.version}/map`,
378
+ this.apiUrl + `/v1/map`,
574
379
  jsonData,
575
380
  headers
576
381
  );
@@ -582,7 +387,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
582
387
  } catch (error: any) {
583
388
  throw new Error(error.message);
584
389
  }
585
- return { success: false, error: "Internal server error." } as MapResponse;
390
+ return { success: false, error: "Internal server error." };
586
391
  }
587
392
 
588
393
  /**
@@ -637,25 +442,18 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
637
442
  async monitorJobStatus(
638
443
  id: string,
639
444
  headers: AxiosRequestHeaders,
640
- checkInterval: number,
641
- checkUrl?: string
642
- ): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse> {
643
- let apiUrl: string = '';
445
+ checkInterval: number
446
+ ): Promise<CrawlStatusResponse> {
644
447
  while (true) {
645
- if (this.version === 'v1') {
646
- apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
647
- } else if (this.version === 'v0') {
648
- apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
649
- }
650
448
  const statusResponse: AxiosResponse = await this.getRequest(
651
- apiUrl,
449
+ `${this.apiUrl}/v1/crawl/${id}`,
652
450
  headers
653
451
  );
654
452
  if (statusResponse.status === 200) {
655
453
  const statusData = statusResponse.data;
656
454
  if (statusData.status === "completed") {
657
455
  if ("data" in statusData) {
658
- return this.version === 'v0' ? statusData.data : statusData;
456
+ return statusData;
659
457
  } else {
660
458
  throw new Error("Crawl job completed but no data was returned");
661
459
  }
@@ -696,3 +494,111 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
696
494
  }
697
495
  }
698
496
  }
497
+
498
+ interface CrawlWatcherEvents {
499
+ document: CustomEvent<FirecrawlDocument>,
500
+ done: CustomEvent<{
501
+ status: CrawlStatusResponse["status"];
502
+ data: FirecrawlDocument[];
503
+ }>,
504
+ error: CustomEvent<{
505
+ status: CrawlStatusResponse["status"],
506
+ data: FirecrawlDocument[],
507
+ error: string,
508
+ }>,
509
+ }
510
+
511
+ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
512
+ private ws: WebSocket;
513
+ public data: FirecrawlDocument[];
514
+ public status: CrawlStatusResponse["status"];
515
+
516
+ constructor(id: string, app: FirecrawlApp) {
517
+ super();
518
+ this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
519
+ this.status = "scraping";
520
+ this.data = [];
521
+
522
+ type ErrorMessage = {
523
+ type: "error",
524
+ error: string,
525
+ }
526
+
527
+ type CatchupMessage = {
528
+ type: "catchup",
529
+ data: CrawlStatusResponse,
530
+ }
531
+
532
+ type DocumentMessage = {
533
+ type: "document",
534
+ data: FirecrawlDocument,
535
+ }
536
+
537
+ type DoneMessage = { type: "done" }
538
+
539
+ type Message = ErrorMessage | CatchupMessage | DoneMessage | DocumentMessage;
540
+
541
+ const messageHandler = (msg: Message) => {
542
+ if (msg.type === "done") {
543
+ this.status = "completed";
544
+ this.dispatchTypedEvent("done", new CustomEvent("done", {
545
+ detail: {
546
+ status: this.status,
547
+ data: this.data,
548
+ },
549
+ }));
550
+ } else if (msg.type === "error") {
551
+ this.status = "failed";
552
+ this.dispatchTypedEvent("error", new CustomEvent("error", {
553
+ detail: {
554
+ status: this.status,
555
+ data: this.data,
556
+ error: msg.error,
557
+ },
558
+ }));
559
+ } else if (msg.type === "catchup") {
560
+ this.status = msg.data.status;
561
+ this.data.push(...(msg.data.data ?? []));
562
+ for (const doc of this.data) {
563
+ this.dispatchTypedEvent("document", new CustomEvent("document", {
564
+ detail: doc,
565
+ }));
566
+ }
567
+ } else if (msg.type === "document") {
568
+ this.dispatchTypedEvent("document", new CustomEvent("document", {
569
+ detail: msg.data,
570
+ }));
571
+ }
572
+ }
573
+
574
+ this.ws.onmessage = ((ev: MessageEvent) => {
575
+ if (typeof ev.data !== "string") {
576
+ this.ws.close();
577
+ return;
578
+ }
579
+
580
+ const msg = JSON.parse(ev.data) as Message;
581
+ messageHandler(msg);
582
+ }).bind(this);
583
+
584
+ this.ws.onclose = ((ev: CloseEvent) => {
585
+ const msg = JSON.parse(ev.reason) as Message;
586
+ messageHandler(msg);
587
+ }).bind(this);
588
+
589
+ this.ws.onerror = ((_: Event) => {
590
+ this.status = "failed"
591
+ this.dispatchTypedEvent("error", new CustomEvent("error", {
592
+ detail: {
593
+ status: this.status,
594
+ data: this.data,
595
+ error: "WebSocket error",
596
+ },
597
+ }));
598
+ }).bind(this);
599
+ }
600
+
601
+ close() {
602
+ this.ws.close();
603
+ }
604
+ }