@mendable/firecrawl 1.18.0 → 1.18.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs DELETED
@@ -1,778 +0,0 @@
1
- "use strict";
2
- var __create = Object.create;
3
- var __defProp = Object.defineProperty;
4
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
- var __getOwnPropNames = Object.getOwnPropertyNames;
6
- var __getProtoOf = Object.getPrototypeOf;
7
- var __hasOwnProp = Object.prototype.hasOwnProperty;
8
- var __export = (target, all) => {
9
- for (var name in all)
10
- __defProp(target, name, { get: all[name], enumerable: true });
11
- };
12
- var __copyProps = (to, from, except, desc) => {
13
- if (from && typeof from === "object" || typeof from === "function") {
14
- for (let key of __getOwnPropNames(from))
15
- if (!__hasOwnProp.call(to, key) && key !== except)
16
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
- }
18
- return to;
19
- };
20
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
- // If the importer is in node compatibility mode or this is not an ESM
22
- // file that has been converted to a CommonJS file using a Babel-
23
- // compatible transform (i.e. "__esModule" has not been set), then set
24
- // "default" to the CommonJS "module.exports" for node compatibility.
25
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
- mod
27
- ));
28
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
-
30
- // src/index.ts
31
- var src_exports = {};
32
- __export(src_exports, {
33
- CrawlWatcher: () => CrawlWatcher,
34
- FirecrawlError: () => FirecrawlError,
35
- default: () => FirecrawlApp
36
- });
37
- module.exports = __toCommonJS(src_exports);
38
- var import_axios = __toESM(require("axios"), 1);
39
- var zt = __toESM(require("zod"), 1);
40
- var import_zod_to_json_schema = require("zod-to-json-schema");
41
- var import_isows = require("isows");
42
- var import_typescript_event_target = require("typescript-event-target");
43
- var FirecrawlError = class extends Error {
44
- statusCode;
45
- constructor(message, statusCode) {
46
- super(message);
47
- this.statusCode = statusCode;
48
- }
49
- };
50
- var FirecrawlApp = class {
51
- apiKey;
52
- apiUrl;
53
- isCloudService(url) {
54
- return url.includes("api.firecrawl.dev");
55
- }
56
- /**
57
- * Initializes a new instance of the FirecrawlApp class.
58
- * @param config - Configuration options for the FirecrawlApp instance.
59
- */
60
- constructor({ apiKey = null, apiUrl = null }) {
61
- const baseUrl = apiUrl || "https://api.firecrawl.dev";
62
- if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
63
- throw new FirecrawlError("No API key provided", 401);
64
- }
65
- this.apiKey = apiKey || "";
66
- this.apiUrl = baseUrl;
67
- }
68
- /**
69
- * Scrapes a URL using the Firecrawl API.
70
- * @param url - The URL to scrape.
71
- * @param params - Additional parameters for the scrape request.
72
- * @returns The response from the scrape operation.
73
- */
74
- async scrapeUrl(url, params) {
75
- const headers = {
76
- "Content-Type": "application/json",
77
- Authorization: `Bearer ${this.apiKey}`
78
- };
79
- let jsonData = { url, ...params };
80
- if (jsonData?.extract?.schema) {
81
- let schema = jsonData.extract.schema;
82
- try {
83
- schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
84
- } catch (error) {
85
- }
86
- jsonData = {
87
- ...jsonData,
88
- extract: {
89
- ...jsonData.extract,
90
- schema
91
- }
92
- };
93
- }
94
- try {
95
- const response = await import_axios.default.post(
96
- this.apiUrl + `/v1/scrape`,
97
- jsonData,
98
- { headers }
99
- );
100
- if (response.status === 200) {
101
- const responseData = response.data;
102
- if (responseData.success) {
103
- return {
104
- success: true,
105
- warning: responseData.warning,
106
- error: responseData.error,
107
- ...responseData.data
108
- };
109
- } else {
110
- throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
111
- }
112
- } else {
113
- this.handleError(response, "scrape URL");
114
- }
115
- } catch (error) {
116
- this.handleError(error.response, "scrape URL");
117
- }
118
- return { success: false, error: "Internal server error." };
119
- }
120
- /**
121
- * Searches using the Firecrawl API and optionally scrapes the results.
122
- * @param query - The search query string.
123
- * @param params - Optional parameters for the search request.
124
- * @returns The response from the search operation.
125
- */
126
- async search(query, params) {
127
- const headers = {
128
- "Content-Type": "application/json",
129
- Authorization: `Bearer ${this.apiKey}`
130
- };
131
- let jsonData = {
132
- query,
133
- limit: params?.limit ?? 5,
134
- tbs: params?.tbs,
135
- filter: params?.filter,
136
- lang: params?.lang ?? "en",
137
- country: params?.country ?? "us",
138
- location: params?.location,
139
- origin: params?.origin ?? "api",
140
- timeout: params?.timeout ?? 6e4,
141
- scrapeOptions: params?.scrapeOptions ?? { formats: [] }
142
- };
143
- if (jsonData?.scrapeOptions?.extract?.schema) {
144
- let schema = jsonData.scrapeOptions.extract.schema;
145
- try {
146
- schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
147
- } catch (error) {
148
- }
149
- jsonData = {
150
- ...jsonData,
151
- scrapeOptions: {
152
- ...jsonData.scrapeOptions,
153
- extract: {
154
- ...jsonData.scrapeOptions.extract,
155
- schema
156
- }
157
- }
158
- };
159
- }
160
- try {
161
- const response = await this.postRequest(
162
- this.apiUrl + `/v1/search`,
163
- jsonData,
164
- headers
165
- );
166
- if (response.status === 200) {
167
- const responseData = response.data;
168
- if (responseData.success) {
169
- return {
170
- success: true,
171
- data: responseData.data,
172
- warning: responseData.warning
173
- };
174
- } else {
175
- throw new FirecrawlError(`Failed to search. Error: ${responseData.error}`, response.status);
176
- }
177
- } else {
178
- this.handleError(response, "search");
179
- }
180
- } catch (error) {
181
- if (error.response?.data?.error) {
182
- throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
183
- } else {
184
- throw new FirecrawlError(error.message, 500);
185
- }
186
- }
187
- return { success: false, error: "Internal server error.", data: [] };
188
- }
189
- /**
190
- * Initiates a crawl job for a URL using the Firecrawl API.
191
- * @param url - The URL to crawl.
192
- * @param params - Additional parameters for the crawl request.
193
- * @param pollInterval - Time in seconds for job status checks.
194
- * @param idempotencyKey - Optional idempotency key for the request.
195
- * @returns The response from the crawl operation.
196
- */
197
- async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
198
- const headers = this.prepareHeaders(idempotencyKey);
199
- let jsonData = { url, ...params };
200
- try {
201
- const response = await this.postRequest(
202
- this.apiUrl + `/v1/crawl`,
203
- jsonData,
204
- headers
205
- );
206
- if (response.status === 200) {
207
- const id = response.data.id;
208
- return this.monitorJobStatus(id, headers, pollInterval);
209
- } else {
210
- this.handleError(response, "start crawl job");
211
- }
212
- } catch (error) {
213
- if (error.response?.data?.error) {
214
- throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
215
- } else {
216
- throw new FirecrawlError(error.message, 500);
217
- }
218
- }
219
- return { success: false, error: "Internal server error." };
220
- }
221
- async asyncCrawlUrl(url, params, idempotencyKey) {
222
- const headers = this.prepareHeaders(idempotencyKey);
223
- let jsonData = { url, ...params };
224
- try {
225
- const response = await this.postRequest(
226
- this.apiUrl + `/v1/crawl`,
227
- jsonData,
228
- headers
229
- );
230
- if (response.status === 200) {
231
- return response.data;
232
- } else {
233
- this.handleError(response, "start crawl job");
234
- }
235
- } catch (error) {
236
- if (error.response?.data?.error) {
237
- throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
238
- } else {
239
- throw new FirecrawlError(error.message, 500);
240
- }
241
- }
242
- return { success: false, error: "Internal server error." };
243
- }
244
- /**
245
- * Checks the status of a crawl job using the Firecrawl API.
246
- * @param id - The ID of the crawl operation.
247
- * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
248
- * @returns The response containing the job status.
249
- */
250
- async checkCrawlStatus(id, getAllData = false) {
251
- if (!id) {
252
- throw new FirecrawlError("No crawl ID provided", 400);
253
- }
254
- const headers = this.prepareHeaders();
255
- try {
256
- const response = await this.getRequest(
257
- `${this.apiUrl}/v1/crawl/${id}`,
258
- headers
259
- );
260
- if (response.status === 200) {
261
- let allData = response.data.data;
262
- if (getAllData && response.data.status === "completed") {
263
- let statusData = response.data;
264
- if ("data" in statusData) {
265
- let data = statusData.data;
266
- while (typeof statusData === "object" && "next" in statusData) {
267
- if (data.length === 0) {
268
- break;
269
- }
270
- statusData = (await this.getRequest(statusData.next, headers)).data;
271
- data = data.concat(statusData.data);
272
- }
273
- allData = data;
274
- }
275
- }
276
- let resp = {
277
- success: response.data.success,
278
- status: response.data.status,
279
- total: response.data.total,
280
- completed: response.data.completed,
281
- creditsUsed: response.data.creditsUsed,
282
- expiresAt: new Date(response.data.expiresAt),
283
- data: allData
284
- };
285
- if (!response.data.success && response.data.error) {
286
- resp = {
287
- ...resp,
288
- success: false,
289
- error: response.data.error
290
- };
291
- }
292
- if (response.data.next) {
293
- resp.next = response.data.next;
294
- }
295
- return resp;
296
- } else {
297
- this.handleError(response, "check crawl status");
298
- }
299
- } catch (error) {
300
- throw new FirecrawlError(error.message, 500);
301
- }
302
- return { success: false, error: "Internal server error." };
303
- }
304
- /**
305
- * Cancels a crawl job using the Firecrawl API.
306
- * @param id - The ID of the crawl operation.
307
- * @returns The response from the cancel crawl operation.
308
- */
309
- async cancelCrawl(id) {
310
- const headers = this.prepareHeaders();
311
- try {
312
- const response = await this.deleteRequest(
313
- `${this.apiUrl}/v1/crawl/${id}`,
314
- headers
315
- );
316
- if (response.status === 200) {
317
- return response.data;
318
- } else {
319
- this.handleError(response, "cancel crawl job");
320
- }
321
- } catch (error) {
322
- throw new FirecrawlError(error.message, 500);
323
- }
324
- return { success: false, error: "Internal server error." };
325
- }
326
- /**
327
- * Initiates a crawl job and returns a CrawlWatcher to monitor the job via WebSocket.
328
- * @param url - The URL to crawl.
329
- * @param params - Additional parameters for the crawl request.
330
- * @param idempotencyKey - Optional idempotency key for the request.
331
- * @returns A CrawlWatcher instance to monitor the crawl job.
332
- */
333
- async crawlUrlAndWatch(url, params, idempotencyKey) {
334
- const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
335
- if (crawl.success && crawl.id) {
336
- const id = crawl.id;
337
- return new CrawlWatcher(id, this);
338
- }
339
- throw new FirecrawlError("Crawl job failed to start", 400);
340
- }
341
- /**
342
- * Maps a URL using the Firecrawl API.
343
- * @param url - The URL to map.
344
- * @param params - Additional parameters for the map request.
345
- * @returns The response from the map operation.
346
- */
347
- async mapUrl(url, params) {
348
- const headers = this.prepareHeaders();
349
- let jsonData = { url, ...params };
350
- try {
351
- const response = await this.postRequest(
352
- this.apiUrl + `/v1/map`,
353
- jsonData,
354
- headers
355
- );
356
- if (response.status === 200) {
357
- return response.data;
358
- } else {
359
- this.handleError(response, "map");
360
- }
361
- } catch (error) {
362
- throw new FirecrawlError(error.message, 500);
363
- }
364
- return { success: false, error: "Internal server error." };
365
- }
366
- /**
367
- * Initiates a batch scrape job for multiple URLs using the Firecrawl API.
368
- * @param url - The URLs to scrape.
369
- * @param params - Additional parameters for the scrape request.
370
- * @param pollInterval - Time in seconds for job status checks.
371
- * @param idempotencyKey - Optional idempotency key for the request.
372
- * @param webhook - Optional webhook for the batch scrape.
373
- * @returns The response from the crawl operation.
374
- */
375
- async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
376
- const headers = this.prepareHeaders(idempotencyKey);
377
- let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
378
- if (jsonData?.extract?.schema) {
379
- let schema = jsonData.extract.schema;
380
- try {
381
- schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
382
- } catch (error) {
383
- }
384
- jsonData = {
385
- ...jsonData,
386
- extract: {
387
- ...jsonData.extract,
388
- schema
389
- }
390
- };
391
- }
392
- try {
393
- const response = await this.postRequest(
394
- this.apiUrl + `/v1/batch/scrape`,
395
- jsonData,
396
- headers
397
- );
398
- if (response.status === 200) {
399
- const id = response.data.id;
400
- return this.monitorJobStatus(id, headers, pollInterval);
401
- } else {
402
- this.handleError(response, "start batch scrape job");
403
- }
404
- } catch (error) {
405
- if (error.response?.data?.error) {
406
- throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
407
- } else {
408
- throw new FirecrawlError(error.message, 500);
409
- }
410
- }
411
- return { success: false, error: "Internal server error." };
412
- }
413
- async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
414
- const headers = this.prepareHeaders(idempotencyKey);
415
- let jsonData = { urls, webhook, ignoreInvalidURLs, ...params ?? {} };
416
- try {
417
- const response = await this.postRequest(
418
- this.apiUrl + `/v1/batch/scrape`,
419
- jsonData,
420
- headers
421
- );
422
- if (response.status === 200) {
423
- return response.data;
424
- } else {
425
- this.handleError(response, "start batch scrape job");
426
- }
427
- } catch (error) {
428
- if (error.response?.data?.error) {
429
- throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
430
- } else {
431
- throw new FirecrawlError(error.message, 500);
432
- }
433
- }
434
- return { success: false, error: "Internal server error." };
435
- }
436
- /**
437
- * Initiates a batch scrape job and returns a CrawlWatcher to monitor the job via WebSocket.
438
- * @param urls - The URL to scrape.
439
- * @param params - Additional parameters for the scrape request.
440
- * @param idempotencyKey - Optional idempotency key for the request.
441
- * @returns A CrawlWatcher instance to monitor the crawl job.
442
- */
443
- async batchScrapeUrlsAndWatch(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
444
- const crawl = await this.asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs);
445
- if (crawl.success && crawl.id) {
446
- const id = crawl.id;
447
- return new CrawlWatcher(id, this);
448
- }
449
- throw new FirecrawlError("Batch scrape job failed to start", 400);
450
- }
451
- /**
452
- * Checks the status of a batch scrape job using the Firecrawl API.
453
- * @param id - The ID of the batch scrape operation.
454
- * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
455
- * @returns The response containing the job status.
456
- */
457
- async checkBatchScrapeStatus(id, getAllData = false) {
458
- if (!id) {
459
- throw new FirecrawlError("No batch scrape ID provided", 400);
460
- }
461
- const headers = this.prepareHeaders();
462
- try {
463
- const response = await this.getRequest(
464
- `${this.apiUrl}/v1/batch/scrape/${id}`,
465
- headers
466
- );
467
- if (response.status === 200) {
468
- let allData = response.data.data;
469
- if (getAllData && response.data.status === "completed") {
470
- let statusData = response.data;
471
- if ("data" in statusData) {
472
- let data = statusData.data;
473
- while (typeof statusData === "object" && "next" in statusData) {
474
- if (data.length === 0) {
475
- break;
476
- }
477
- statusData = (await this.getRequest(statusData.next, headers)).data;
478
- data = data.concat(statusData.data);
479
- }
480
- allData = data;
481
- }
482
- }
483
- let resp = {
484
- success: response.data.success,
485
- status: response.data.status,
486
- total: response.data.total,
487
- completed: response.data.completed,
488
- creditsUsed: response.data.creditsUsed,
489
- expiresAt: new Date(response.data.expiresAt),
490
- data: allData
491
- };
492
- if (!response.data.success && response.data.error) {
493
- resp = {
494
- ...resp,
495
- success: false,
496
- error: response.data.error
497
- };
498
- }
499
- if (response.data.next) {
500
- resp.next = response.data.next;
501
- }
502
- return resp;
503
- } else {
504
- this.handleError(response, "check batch scrape status");
505
- }
506
- } catch (error) {
507
- throw new FirecrawlError(error.message, 500);
508
- }
509
- return { success: false, error: "Internal server error." };
510
- }
511
- /**
512
- * Extracts information from URLs using the Firecrawl API.
513
- * Currently in Beta. Expect breaking changes on future minor versions.
514
- * @param url - The URL to extract information from.
515
- * @param params - Additional parameters for the extract request.
516
- * @returns The response from the extract operation.
517
- */
518
- async extract(urls, params) {
519
- const headers = this.prepareHeaders();
520
- let jsonData = { urls, ...params };
521
- let jsonSchema;
522
- try {
523
- if (!params?.schema) {
524
- jsonSchema = void 0;
525
- } else if (params.schema instanceof zt.ZodType) {
526
- jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema);
527
- } else {
528
- jsonSchema = params.schema;
529
- }
530
- } catch (error) {
531
- throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
532
- }
533
- try {
534
- const response = await this.postRequest(
535
- this.apiUrl + `/v1/extract`,
536
- { ...jsonData, schema: jsonSchema },
537
- headers
538
- );
539
- if (response.status === 200) {
540
- const responseData = response.data;
541
- if (responseData.success) {
542
- return {
543
- success: true,
544
- data: responseData.data,
545
- warning: responseData.warning,
546
- error: responseData.error
547
- };
548
- } else {
549
- throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
550
- }
551
- } else {
552
- this.handleError(response, "extract");
553
- }
554
- } catch (error) {
555
- throw new FirecrawlError(error.message, 500);
556
- }
557
- return { success: false, error: "Internal server error." };
558
- }
559
- /**
560
- * Prepares the headers for an API request.
561
- * @param idempotencyKey - Optional key to ensure idempotency.
562
- * @returns The prepared headers.
563
- */
564
- prepareHeaders(idempotencyKey) {
565
- return {
566
- "Content-Type": "application/json",
567
- Authorization: `Bearer ${this.apiKey}`,
568
- ...idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}
569
- };
570
- }
571
- /**
572
- * Sends a POST request to the specified URL.
573
- * @param url - The URL to send the request to.
574
- * @param data - The data to send in the request.
575
- * @param headers - The headers for the request.
576
- * @returns The response from the POST request.
577
- */
578
- postRequest(url, data, headers) {
579
- return import_axios.default.post(url, data, { headers });
580
- }
581
- /**
582
- * Sends a GET request to the specified URL.
583
- * @param url - The URL to send the request to.
584
- * @param headers - The headers for the request.
585
- * @returns The response from the GET request.
586
- */
587
- async getRequest(url, headers) {
588
- try {
589
- return await import_axios.default.get(url, { headers });
590
- } catch (error) {
591
- if (error instanceof import_axios.AxiosError && error.response) {
592
- return error.response;
593
- } else {
594
- throw error;
595
- }
596
- }
597
- }
598
- /**
599
- * Sends a DELETE request to the specified URL.
600
- * @param url - The URL to send the request to.
601
- * @param headers - The headers for the request.
602
- * @returns The response from the DELETE request.
603
- */
604
- async deleteRequest(url, headers) {
605
- try {
606
- return await import_axios.default.delete(url, { headers });
607
- } catch (error) {
608
- if (error instanceof import_axios.AxiosError && error.response) {
609
- return error.response;
610
- } else {
611
- throw error;
612
- }
613
- }
614
- }
615
- /**
616
- * Monitors the status of a crawl job until completion or failure.
617
- * @param id - The ID of the crawl operation.
618
- * @param headers - The headers for the request.
619
- * @param checkInterval - Interval in seconds for job status checks.
620
- * @param checkUrl - Optional URL to check the status (used for v1 API)
621
- * @returns The final job status or data.
622
- */
623
- async monitorJobStatus(id, headers, checkInterval) {
624
- try {
625
- while (true) {
626
- let statusResponse = await this.getRequest(
627
- `${this.apiUrl}/v1/crawl/${id}`,
628
- headers
629
- );
630
- if (statusResponse.status === 200) {
631
- let statusData = statusResponse.data;
632
- if (statusData.status === "completed") {
633
- if ("data" in statusData) {
634
- let data = statusData.data;
635
- while (typeof statusData === "object" && "next" in statusData) {
636
- if (data.length === 0) {
637
- break;
638
- }
639
- statusResponse = await this.getRequest(statusData.next, headers);
640
- statusData = statusResponse.data;
641
- data = data.concat(statusData.data);
642
- }
643
- statusData.data = data;
644
- return statusData;
645
- } else {
646
- throw new FirecrawlError("Crawl job completed but no data was returned", 500);
647
- }
648
- } else if (["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)) {
649
- checkInterval = Math.max(checkInterval, 2);
650
- await new Promise(
651
- (resolve) => setTimeout(resolve, checkInterval * 1e3)
652
- );
653
- } else {
654
- throw new FirecrawlError(
655
- `Crawl job failed or was stopped. Status: ${statusData.status}`,
656
- 500
657
- );
658
- }
659
- } else {
660
- this.handleError(statusResponse, "check crawl status");
661
- }
662
- }
663
- } catch (error) {
664
- throw new FirecrawlError(error, 500);
665
- }
666
- }
667
- /**
668
- * Handles errors from API responses.
669
- * @param {AxiosResponse} response - The response from the API.
670
- * @param {string} action - The action being performed when the error occurred.
671
- */
672
- handleError(response, action) {
673
- if ([402, 408, 409, 500].includes(response.status)) {
674
- const errorMessage = response.data.error || "Unknown error occurred";
675
- throw new FirecrawlError(
676
- `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`,
677
- response.status
678
- );
679
- } else {
680
- throw new FirecrawlError(
681
- `Unexpected error occurred while trying to ${action}. Status code: ${response.status}`,
682
- response.status
683
- );
684
- }
685
- }
686
- };
687
- var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget {
688
- ws;
689
- data;
690
- status;
691
- id;
692
- constructor(id, app) {
693
- super();
694
- this.id = id;
695
- this.ws = new import_isows.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
696
- this.status = "scraping";
697
- this.data = [];
698
- const messageHandler = (msg) => {
699
- if (msg.type === "done") {
700
- this.status = "completed";
701
- this.dispatchTypedEvent("done", new CustomEvent("done", {
702
- detail: {
703
- status: this.status,
704
- data: this.data,
705
- id: this.id
706
- }
707
- }));
708
- } else if (msg.type === "error") {
709
- this.status = "failed";
710
- this.dispatchTypedEvent("error", new CustomEvent("error", {
711
- detail: {
712
- status: this.status,
713
- data: this.data,
714
- error: msg.error,
715
- id: this.id
716
- }
717
- }));
718
- } else if (msg.type === "catchup") {
719
- this.status = msg.data.status;
720
- this.data.push(...msg.data.data ?? []);
721
- for (const doc of this.data) {
722
- this.dispatchTypedEvent("document", new CustomEvent("document", {
723
- detail: {
724
- ...doc,
725
- id: this.id
726
- }
727
- }));
728
- }
729
- } else if (msg.type === "document") {
730
- this.dispatchTypedEvent("document", new CustomEvent("document", {
731
- detail: {
732
- ...msg.data,
733
- id: this.id
734
- }
735
- }));
736
- }
737
- };
738
- this.ws.onmessage = ((ev) => {
739
- if (typeof ev.data !== "string") {
740
- this.ws.close();
741
- return;
742
- }
743
- try {
744
- const msg = JSON.parse(ev.data);
745
- messageHandler(msg);
746
- } catch (error) {
747
- console.error("Error on message", error);
748
- }
749
- }).bind(this);
750
- this.ws.onclose = ((ev) => {
751
- try {
752
- const msg = JSON.parse(ev.reason);
753
- messageHandler(msg);
754
- } catch (error) {
755
- console.error("Error on close", error);
756
- }
757
- }).bind(this);
758
- this.ws.onerror = ((_) => {
759
- this.status = "failed";
760
- this.dispatchTypedEvent("error", new CustomEvent("error", {
761
- detail: {
762
- status: this.status,
763
- data: this.data,
764
- error: "WebSocket error",
765
- id: this.id
766
- }
767
- }));
768
- }).bind(this);
769
- }
770
- close() {
771
- this.ws.close();
772
- }
773
- };
774
- // Annotate the CommonJS export names for ESM import in node:
775
- 0 && (module.exports = {
776
- CrawlWatcher,
777
- FirecrawlError
778
- });