@mendable/firecrawl-js 1.0.4 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -1,17 +1,17 @@
1
1
  import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
2
2
  import { z } from "zod";
3
3
  import { zodToJsonSchema } from "zod-to-json-schema";
4
+ import { WebSocket } from "isows";
5
+ import { TypedEventTarget } from "typescript-event-target";
4
6
 
5
7
  /**
6
8
  * Configuration interface for FirecrawlApp.
7
9
  * @param apiKey - Optional API key for authentication.
8
10
  * @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
9
- * @param version - API version, either 'v0' or 'v1'.
10
11
  */
11
12
  export interface FirecrawlAppConfig {
12
13
  apiKey?: string | null;
13
14
  apiUrl?: string | null;
14
- version?: "v0" | "v1";
15
15
  }
16
16
 
17
17
  /**
@@ -54,17 +54,6 @@ export interface FirecrawlDocumentMetadata {
54
54
  [key: string]: any; // Allows for additional metadata properties not explicitly defined.
55
55
  }
56
56
 
57
- /**
58
- * Metadata for a Firecrawl document on v0.
59
- * Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
60
- */
61
- export interface FirecrawlDocumentMetadataV0 {
62
- // Similar properties as FirecrawlDocumentMetadata with additional v0 specific adjustments
63
- pageStatusCode?: number;
64
- pageError?: string;
65
- [key: string]: any;
66
- }
67
-
68
57
  /**
69
58
  * Document interface for Firecrawl.
70
59
  * Represents a document retrieved or processed by Firecrawl.
@@ -76,28 +65,7 @@ export interface FirecrawlDocument {
76
65
  rawHtml?: string;
77
66
  links?: string[];
78
67
  screenshot?: string;
79
- metadata: FirecrawlDocumentMetadata;
80
- }
81
-
82
- /**
83
- * Document interface for Firecrawl on v0.
84
- * Represents a document specifically for API version v0 with additional properties.
85
- */
86
- export interface FirecrawlDocumentV0 {
87
- id?: string;
88
- url?: string;
89
- content: string;
90
- markdown?: string;
91
- html?: string;
92
- llm_extraction?: Record<string, any>;
93
- createdAt?: Date;
94
- updatedAt?: Date;
95
- type?: string;
96
- metadata: FirecrawlDocumentMetadataV0;
97
- childrenLinks?: string[];
98
- provider?: string;
99
- warning?: string;
100
- index?: number;
68
+ metadata?: FirecrawlDocumentMetadata;
101
69
  }
102
70
 
103
71
  /**
@@ -105,38 +73,12 @@ export interface FirecrawlDocumentV0 {
105
73
  * Defines the options and configurations available for scraping web content.
106
74
  */
107
75
  export interface ScrapeParams {
108
- formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
76
+ formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "full@scrennshot")[];
109
77
  headers?: Record<string, string>;
110
78
  includeTags?: string[];
111
79
  excludeTags?: string[];
112
80
  onlyMainContent?: boolean;
113
- screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
114
- waitFor?: number;
115
- timeout?: number;
116
- }
117
-
118
- /**
119
- * Parameters for scraping operations on v0.
120
- * Includes page and extractor options specific to API version v0.
121
- */
122
- export interface ScrapeParamsV0 {
123
- pageOptions?: {
124
- headers?: Record<string, string>;
125
- includeHtml?: boolean;
126
- includeRawHtml?: boolean;
127
- onlyIncludeTags?: string[];
128
- onlyMainContent?: boolean;
129
- removeTags?: string[];
130
- replaceAllPathsWithAbsolutePaths?: boolean;
131
- screenshot?: boolean;
132
- fullPageScreenshot?: boolean;
133
81
  waitFor?: number;
134
- };
135
- extractorOptions?: {
136
- mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
137
- extractionPrompt?: string;
138
- extractionSchema?: Record<string, any> | z.ZodSchema | any;
139
- };
140
82
  timeout?: number;
141
83
  }
142
84
 
@@ -145,21 +87,11 @@ export interface ScrapeParamsV0 {
145
87
  * Defines the structure of the response received after a scraping operation.
146
88
  */
147
89
  export interface ScrapeResponse extends FirecrawlDocument {
148
- success: boolean;
90
+ success: true;
149
91
  warning?: string;
150
92
  error?: string;
151
93
  }
152
94
 
153
- /**
154
- * Response interface for scraping operations on v0.
155
- * Similar to ScrapeResponse but tailored for responses from API version v0.
156
- */
157
- export interface ScrapeResponseV0 {
158
- success: boolean;
159
- data?: FirecrawlDocumentV0;
160
- error?: string;
161
- }
162
-
163
95
  /**
164
96
  * Parameters for crawling operations.
165
97
  * Includes options for both scraping and mapping during a crawl.
@@ -175,37 +107,6 @@ export interface CrawlParams {
175
107
  scrapeOptions?: ScrapeParams;
176
108
  }
177
109
 
178
- /**
179
- * Parameters for crawling operations on v0.
180
- * Tailored for API version v0, includes specific options for crawling.
181
- */
182
- export interface CrawlParamsV0 {
183
- crawlerOptions?: {
184
- includes?: string[];
185
- excludes?: string[];
186
- generateImgAltText?: boolean;
187
- returnOnlyUrls?: boolean;
188
- maxDepth?: number;
189
- mode?: "default" | "fast";
190
- ignoreSitemap?: boolean;
191
- limit?: number;
192
- allowBackwardCrawling?: boolean;
193
- allowExternalContentLinks?: boolean;
194
- };
195
- pageOptions?: {
196
- headers?: Record<string, string>;
197
- includeHtml?: boolean;
198
- includeRawHtml?: boolean;
199
- onlyIncludeTags?: string[];
200
- onlyMainContent?: boolean;
201
- removeTags?: string[];
202
- replaceAllPathsWithAbsolutePaths?: boolean;
203
- screenshot?: boolean;
204
- fullPageScreenshot?: boolean;
205
- waitFor?: number;
206
- };
207
- }
208
-
209
110
  /**
210
111
  * Response interface for crawling operations.
211
112
  * Defines the structure of the response received after initiating a crawl.
@@ -213,17 +114,7 @@ export interface CrawlParamsV0 {
213
114
  export interface CrawlResponse {
214
115
  id?: string;
215
116
  url?: string;
216
- success: boolean;
217
- error?: string;
218
- }
219
-
220
- /**
221
- * Response interface for crawling operations on v0.
222
- * Similar to CrawlResponse but tailored for responses from API version v0.
223
- */
224
- export interface CrawlResponseV0 {
225
- jobId?: string;
226
- success: boolean;
117
+ success: true;
227
118
  error?: string;
228
119
  }
229
120
 
@@ -232,7 +123,7 @@ export interface CrawlResponseV0 {
232
123
  * Provides detailed status of a crawl job including progress and results.
233
124
  */
234
125
  export interface CrawlStatusResponse {
235
- success: boolean;
126
+ success: true;
236
127
  total: number;
237
128
  completed: number;
238
129
  creditsUsed: number;
@@ -243,23 +134,6 @@ export interface CrawlStatusResponse {
243
134
  error?: string;
244
135
  }
245
136
 
246
- /**
247
- * Response interface for job status checks on v0.
248
- * Tailored for API version v0, provides status and partial data of a crawl job.
249
- */
250
- export interface CrawlStatusResponseV0 {
251
- success: boolean;
252
- status: string;
253
- current?: number;
254
- current_url?: string;
255
- current_step?: string;
256
- total?: number;
257
- data?: FirecrawlDocumentV0[];
258
- partial_data?: FirecrawlDocumentV0[];
259
- error?: string;
260
- }
261
-
262
-
263
137
  /**
264
138
  * Parameters for mapping operations.
265
139
  * Defines options for mapping URLs during a crawl.
@@ -276,57 +150,35 @@ export interface MapParams {
276
150
  * Defines the structure of the response received after a mapping operation.
277
151
  */
278
152
  export interface MapResponse {
279
- success: boolean;
153
+ success: true;
280
154
  links?: string[];
281
155
  error?: string;
282
156
  }
283
157
 
284
158
  /**
285
- * Parameters for searching operations on v0.
286
- * Tailored for API version v0, includes specific options for searching content.
287
- */
288
- export interface SearchParamsV0 {
289
- pageOptions?: {
290
- onlyMainContent?: boolean;
291
- fetchPageContent?: boolean;
292
- includeHtml?: boolean;
293
- includeRawHtml?: boolean;
294
- };
295
- searchOptions?: {
296
- limit?: number;
297
- };
298
- }
299
-
300
- /**
301
- * Response interface for searching operations on v0.
302
- * Defines the structure of the response received after a search operation on v0.
159
+ * Error response interface.
160
+ * Defines the structure of the response received when an error occurs.
303
161
  */
304
- export interface SearchResponseV0 {
305
- success: boolean;
306
- data?: FirecrawlDocumentV0[];
307
- error?: string;
162
+ export interface ErrorResponse {
163
+ success: false;
164
+ error: string;
308
165
  }
309
166
 
310
167
  /**
311
168
  * Main class for interacting with the Firecrawl API.
312
169
  * Provides methods for scraping, searching, crawling, and mapping web content.
313
170
  */
314
- export default class FirecrawlApp<T extends "v0" | "v1"> {
315
- private apiKey: string;
316
- private apiUrl: string;
317
- public version: T;
171
+ export default class FirecrawlApp {
172
+ public apiKey: string;
173
+ public apiUrl: string;
318
174
 
319
175
  /**
320
176
  * Initializes a new instance of the FirecrawlApp class.
321
177
  * @param config - Configuration options for the FirecrawlApp instance.
322
178
  */
323
- constructor({ apiKey = null, apiUrl = null, version = "v1" }: FirecrawlAppConfig) {
179
+ constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
324
180
  this.apiKey = apiKey || "";
325
181
  this.apiUrl = apiUrl || "https://api.firecrawl.dev";
326
- this.version = version as T;
327
- if (!this.apiKey) {
328
- throw new Error("No API key provided");
329
- }
330
182
  }
331
183
 
332
184
  /**
@@ -337,8 +189,8 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
337
189
  */
338
190
  async scrapeUrl(
339
191
  url: string,
340
- params?: ScrapeParams | ScrapeParamsV0
341
- ): Promise<this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse> {
192
+ params?: ScrapeParams
193
+ ): Promise<ScrapeResponse | ErrorResponse> {
342
194
  const headers: AxiosRequestHeaders = {
343
195
  "Content-Type": "application/json",
344
196
  Authorization: `Bearer ${this.apiKey}`,
@@ -361,19 +213,19 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
361
213
  }
362
214
  try {
363
215
  const response: AxiosResponse = await axios.post(
364
- this.apiUrl + `/${this.version}/scrape`,
216
+ this.apiUrl + `/v1/scrape`,
365
217
  jsonData,
366
218
  { headers }
367
219
  );
368
220
  if (response.status === 200) {
369
221
  const responseData = response.data;
370
222
  if (responseData.success) {
371
- return (this.version === 'v0' ? responseData as ScrapeResponseV0 : {
223
+ return {
372
224
  success: true,
373
225
  warning: responseData.warning,
374
226
  error: responseData.error,
375
227
  ...responseData.data
376
- }) as ScrapeResponse;
228
+ };
377
229
  } else {
378
230
  throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
379
231
  }
@@ -383,100 +235,75 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
383
235
  } catch (error: any) {
384
236
  throw new Error(error.message);
385
237
  }
386
- return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse;
238
+ return { success: false, error: "Internal server error." };
387
239
  }
388
240
 
389
241
  /**
390
- * Searches for a query using the Firecrawl API.
391
- * @param query - The query to search for.
392
- * @param params - Additional parameters for the search request.
393
- * @returns The response from the search operation.
242
+ * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
243
+ * @param query - The search query string.
244
+ * @param params - Additional parameters for the search.
245
+ * @returns Throws an error advising to use version 0 of the API.
394
246
  */
395
247
  async search(
396
248
  query: string,
397
- params?: SearchParamsV0
398
- ): Promise<SearchResponseV0> {
399
- if (this.version === "v1") {
400
- throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
401
- }
402
-
403
- const headers: AxiosRequestHeaders = {
404
- "Content-Type": "application/json",
405
- Authorization: `Bearer ${this.apiKey}`,
406
- } as AxiosRequestHeaders;
407
- let jsonData: any = { query };
408
- if (params) {
409
- jsonData = { ...jsonData, ...params };
410
- }
411
- try {
412
- const response: AxiosResponse = await axios.post(
413
- this.apiUrl + "/v0/search",
414
- jsonData,
415
- { headers }
416
- );
417
- if (response.status === 200) {
418
- const responseData = response.data;
419
- if (responseData.success) {
420
- return responseData;
421
- } else {
422
- throw new Error(`Failed to search. Error: ${responseData.error}`);
423
- }
424
- } else {
425
- this.handleError(response, "search");
426
- }
427
- } catch (error: any) {
428
- throw new Error(error.message);
429
- }
430
- return { success: false, error: "Internal server error." };
249
+ params?: any
250
+ ): Promise<any> {
251
+ throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
431
252
  }
432
253
 
433
254
  /**
434
255
  * Initiates a crawl job for a URL using the Firecrawl API.
435
256
  * @param url - The URL to crawl.
436
257
  * @param params - Additional parameters for the crawl request.
437
- * @param waitUntilDone - Whether to wait for the crawl job to complete.
438
258
  * @param pollInterval - Time in seconds for job status checks.
439
259
  * @param idempotencyKey - Optional idempotency key for the request.
440
260
  * @returns The response from the crawl operation.
441
261
  */
442
262
  async crawlUrl(
443
263
  url: string,
444
- params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams,
445
- waitUntilDone: boolean = true,
264
+ params?: CrawlParams,
446
265
  pollInterval: number = 2,
447
266
  idempotencyKey?: string
448
- ): Promise<
449
- this['version'] extends 'v0'
450
- ? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[]
451
- : CrawlResponse | CrawlStatusResponse
452
- > {
267
+ ): Promise<CrawlStatusResponse | ErrorResponse> {
453
268
  const headers = this.prepareHeaders(idempotencyKey);
454
269
  let jsonData: any = { url, ...params };
455
270
  try {
456
271
  const response: AxiosResponse = await this.postRequest(
457
- this.apiUrl + `/${this.version}/crawl`,
272
+ this.apiUrl + `/v1/crawl`,
458
273
  jsonData,
459
274
  headers
460
275
  );
461
276
  if (response.status === 200) {
462
- const id: string = this.version === 'v0' ? response.data.jobId : response.data.id;
463
- let checkUrl: string | undefined = undefined;
464
- if (waitUntilDone) {
465
- if (this.version === 'v1') { checkUrl = response.data.url }
466
- return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
467
- } else {
468
- if (this.version === 'v0') {
469
- return {
470
- success: true,
471
- jobId: id
472
- } as CrawlResponseV0;
473
- } else {
474
- return {
475
- success: true,
476
- id: id
477
- } as CrawlResponse;
478
- }
479
- }
277
+ const id: string = response.data.id;
278
+ return this.monitorJobStatus(id, headers, pollInterval);
279
+ } else {
280
+ this.handleError(response, "start crawl job");
281
+ }
282
+ } catch (error: any) {
283
+ if (error.response?.data?.error) {
284
+ throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
285
+ } else {
286
+ throw new Error(error.message);
287
+ }
288
+ }
289
+ return { success: false, error: "Internal server error." };
290
+ }
291
+
292
+ async asyncCrawlUrl(
293
+ url: string,
294
+ params?: CrawlParams,
295
+ idempotencyKey?: string
296
+ ): Promise<CrawlResponse | ErrorResponse> {
297
+ const headers = this.prepareHeaders(idempotencyKey);
298
+ let jsonData: any = { url, ...params };
299
+ try {
300
+ const response: AxiosResponse = await this.postRequest(
301
+ this.apiUrl + `/v1/crawl`,
302
+ jsonData,
303
+ headers
304
+ );
305
+ if (response.status === 200) {
306
+ return response.data;
480
307
  } else {
481
308
  this.handleError(response, "start crawl job");
482
309
  }
@@ -487,7 +314,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
487
314
  throw new Error(error.message);
488
315
  }
489
316
  }
490
- return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? CrawlResponseV0 : CrawlResponse;
317
+ return { success: false, error: "Internal server error." };
491
318
  }
492
319
 
493
320
  /**
@@ -495,7 +322,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
495
322
  * @param id - The ID of the crawl operation.
496
323
  * @returns The response containing the job status.
497
324
  */
498
- async checkCrawlStatus(id?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse> {
325
+ async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
499
326
  if (!id) {
500
327
  throw new Error("No crawl ID provided");
501
328
  }
@@ -503,71 +330,52 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
503
330
  const headers: AxiosRequestHeaders = this.prepareHeaders();
504
331
  try {
505
332
  const response: AxiosResponse = await this.getRequest(
506
- this.version === 'v1' ?
507
- `${this.apiUrl}/${this.version}/crawl/${id}` :
508
- `${this.apiUrl}/${this.version}/crawl/status/${id}`,
333
+ `${this.apiUrl}/v1/crawl/${id}`,
509
334
  headers
510
335
  );
511
336
  if (response.status === 200) {
512
- if (this.version === 'v0') {
513
- return ({
514
- success: true,
515
- status: response.data.status,
516
- current: response.data.current,
517
- current_url: response.data.current_url,
518
- current_step: response.data.current_step,
519
- total: response.data.total,
520
- data: response.data.data,
521
- partial_data: !response.data.data
522
- ? response.data.partial_data
523
- : undefined,
524
- } as CrawlStatusResponseV0) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
525
- } else {
526
- return ({
527
- success: true,
528
- status: response.data.status,
529
- total: response.data.total,
530
- completed: response.data.completed,
531
- creditsUsed: response.data.creditsUsed,
532
- expiresAt: new Date(response.data.expiresAt),
533
- next: response.data.next,
534
- data: response.data.data,
535
- error: response.data.error
536
- } as CrawlStatusResponse) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
537
- }
337
+ return ({
338
+ success: true,
339
+ status: response.data.status,
340
+ total: response.data.total,
341
+ completed: response.data.completed,
342
+ creditsUsed: response.data.creditsUsed,
343
+ expiresAt: new Date(response.data.expiresAt),
344
+ next: response.data.next,
345
+ data: response.data.data,
346
+ error: response.data.error
347
+ })
538
348
  } else {
539
349
  this.handleError(response, "check crawl status");
540
350
  }
541
351
  } catch (error: any) {
542
352
  throw new Error(error.message);
543
353
  }
544
-
545
- return this.version === 'v0' ?
546
- ({
547
- success: false,
548
- status: "unknown",
549
- current: 0,
550
- current_url: "",
551
- current_step: "",
552
- total: 0,
553
- error: "Internal server error.",
554
- } as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse) :
555
- ({
556
- success: false,
557
- error: "Internal server error.",
558
- } as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
354
+ return { success: false, error: "Internal server error." };
559
355
  }
560
356
 
561
- async mapUrl(url: string, params?: MapParams): Promise<MapResponse> {
562
- if (this.version == 'v0') {
563
- throw new Error("Map is not supported in v0");
357
+ async crawlUrlAndWatch(
358
+ url: string,
359
+ params?: CrawlParams,
360
+ idempotencyKey?: string,
361
+ ) {
362
+ const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
363
+
364
+ if (crawl.success && crawl.id) {
365
+ const id = crawl.id;
366
+ return new CrawlWatcher(id, this);
564
367
  }
368
+
369
+ throw new Error("Crawl job failed to start");
370
+ }
371
+
372
+ async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
565
373
  const headers = this.prepareHeaders();
566
374
  let jsonData: { url: string } & MapParams = { url, ...params };
567
375
 
568
376
  try {
569
377
  const response: AxiosResponse = await this.postRequest(
570
- this.apiUrl + `/${this.version}/map`,
378
+ this.apiUrl + `/v1/map`,
571
379
  jsonData,
572
380
  headers
573
381
  );
@@ -579,7 +387,7 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
579
387
  } catch (error: any) {
580
388
  throw new Error(error.message);
581
389
  }
582
- return { success: false, error: "Internal server error." } as MapResponse;
390
+ return { success: false, error: "Internal server error." };
583
391
  }
584
392
 
585
393
  /**
@@ -634,25 +442,18 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
634
442
  async monitorJobStatus(
635
443
  id: string,
636
444
  headers: AxiosRequestHeaders,
637
- checkInterval: number,
638
- checkUrl?: string
639
- ): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse> {
640
- let apiUrl: string = '';
445
+ checkInterval: number
446
+ ): Promise<CrawlStatusResponse> {
641
447
  while (true) {
642
- if (this.version === 'v1') {
643
- apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
644
- } else if (this.version === 'v0') {
645
- apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
646
- }
647
448
  const statusResponse: AxiosResponse = await this.getRequest(
648
- apiUrl,
449
+ `${this.apiUrl}/v1/crawl/${id}`,
649
450
  headers
650
451
  );
651
452
  if (statusResponse.status === 200) {
652
453
  const statusData = statusResponse.data;
653
454
  if (statusData.status === "completed") {
654
455
  if ("data" in statusData) {
655
- return this.version === 'v0' ? statusData.data : statusData;
456
+ return statusData;
656
457
  } else {
657
458
  throw new Error("Crawl job completed but no data was returned");
658
459
  }
@@ -693,3 +494,111 @@ export default class FirecrawlApp<T extends "v0" | "v1"> {
693
494
  }
694
495
  }
695
496
  }
497
+
498
+ interface CrawlWatcherEvents {
499
+ document: CustomEvent<FirecrawlDocument>,
500
+ done: CustomEvent<{
501
+ status: CrawlStatusResponse["status"];
502
+ data: FirecrawlDocument[];
503
+ }>,
504
+ error: CustomEvent<{
505
+ status: CrawlStatusResponse["status"],
506
+ data: FirecrawlDocument[],
507
+ error: string,
508
+ }>,
509
+ }
510
+
511
+ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
512
+ private ws: WebSocket;
513
+ public data: FirecrawlDocument[];
514
+ public status: CrawlStatusResponse["status"];
515
+
516
+ constructor(id: string, app: FirecrawlApp) {
517
+ super();
518
+ this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
519
+ this.status = "scraping";
520
+ this.data = [];
521
+
522
+ type ErrorMessage = {
523
+ type: "error",
524
+ error: string,
525
+ }
526
+
527
+ type CatchupMessage = {
528
+ type: "catchup",
529
+ data: CrawlStatusResponse,
530
+ }
531
+
532
+ type DocumentMessage = {
533
+ type: "document",
534
+ data: FirecrawlDocument,
535
+ }
536
+
537
+ type DoneMessage = { type: "done" }
538
+
539
+ type Message = ErrorMessage | CatchupMessage | DoneMessage | DocumentMessage;
540
+
541
+ const messageHandler = (msg: Message) => {
542
+ if (msg.type === "done") {
543
+ this.status = "completed";
544
+ this.dispatchTypedEvent("done", new CustomEvent("done", {
545
+ detail: {
546
+ status: this.status,
547
+ data: this.data,
548
+ },
549
+ }));
550
+ } else if (msg.type === "error") {
551
+ this.status = "failed";
552
+ this.dispatchTypedEvent("error", new CustomEvent("error", {
553
+ detail: {
554
+ status: this.status,
555
+ data: this.data,
556
+ error: msg.error,
557
+ },
558
+ }));
559
+ } else if (msg.type === "catchup") {
560
+ this.status = msg.data.status;
561
+ this.data.push(...(msg.data.data ?? []));
562
+ for (const doc of this.data) {
563
+ this.dispatchTypedEvent("document", new CustomEvent("document", {
564
+ detail: doc,
565
+ }));
566
+ }
567
+ } else if (msg.type === "document") {
568
+ this.dispatchTypedEvent("document", new CustomEvent("document", {
569
+ detail: msg.data,
570
+ }));
571
+ }
572
+ }
573
+
574
+ this.ws.onmessage = ((ev: MessageEvent) => {
575
+ if (typeof ev.data !== "string") {
576
+ this.ws.close();
577
+ return;
578
+ }
579
+
580
+ const msg = JSON.parse(ev.data) as Message;
581
+ messageHandler(msg);
582
+ }).bind(this);
583
+
584
+ this.ws.onclose = ((ev: CloseEvent) => {
585
+ const msg = JSON.parse(ev.reason) as Message;
586
+ messageHandler(msg);
587
+ }).bind(this);
588
+
589
+ this.ws.onerror = ((_: Event) => {
590
+ this.status = "failed"
591
+ this.dispatchTypedEvent("error", new CustomEvent("error", {
592
+ detail: {
593
+ status: this.status,
594
+ data: this.data,
595
+ error: "WebSocket error",
596
+ },
597
+ }));
598
+ }).bind(this);
599
+ }
600
+
601
+ close() {
602
+ this.ws.close();
603
+ }
604
+ }