@mendable/firecrawl 4.5.0 → 4.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ var require_package = __commonJS({
8
8
  "package.json"(exports, module) {
9
9
  module.exports = {
10
10
  name: "@mendable/firecrawl-js",
11
- version: "4.5.0",
11
+ version: "4.6.1",
12
12
  description: "JavaScript SDK for Firecrawl API",
13
13
  main: "dist/index.js",
14
14
  types: "dist/index.d.ts",
@@ -36,6 +36,7 @@ var require_package = __commonJS({
36
36
  dependencies: {
37
37
  axios: "^1.12.2",
38
38
  "typescript-event-target": "^1.1.1",
39
+ ws: "^8.18.3",
39
40
  zod: "^3.23.8",
40
41
  "zod-to-json-schema": "^3.23.0"
41
42
  },
@@ -50,6 +51,7 @@ var require_package = __commonJS({
50
51
  "@types/mocha": "^10.0.6",
51
52
  "@types/node": "^20.12.12",
52
53
  "@types/uuid": "^9.0.8",
54
+ "@types/ws": "^8.18.1",
53
55
  dotenv: "^16.4.5",
54
56
  jest: "^30.0.5",
55
57
  "ts-jest": "^29.4.0",
package/dist/index.cjs CHANGED
@@ -35,7 +35,7 @@ var require_package = __commonJS({
35
35
  "package.json"(exports2, module2) {
36
36
  module2.exports = {
37
37
  name: "@mendable/firecrawl-js",
38
- version: "4.5.0",
38
+ version: "4.6.1",
39
39
  description: "JavaScript SDK for Firecrawl API",
40
40
  main: "dist/index.js",
41
41
  types: "dist/index.d.ts",
@@ -63,6 +63,7 @@ var require_package = __commonJS({
63
63
  dependencies: {
64
64
  axios: "^1.12.2",
65
65
  "typescript-event-target": "^1.1.1",
66
+ ws: "^8.18.3",
66
67
  zod: "^3.23.8",
67
68
  "zod-to-json-schema": "^3.23.0"
68
69
  },
@@ -77,6 +78,7 @@ var require_package = __commonJS({
77
78
  "@types/mocha": "^10.0.6",
78
79
  "@types/node": "^20.12.12",
79
80
  "@types/uuid": "^9.0.8",
81
+ "@types/ws": "^8.18.1",
80
82
  dotenv: "^16.4.5",
81
83
  jest: "^30.0.5",
82
84
  "ts-jest": "^29.4.0",
@@ -493,6 +495,7 @@ async function getCrawlStatus(http, jobId, pagination) {
493
495
  const auto = pagination?.autoPaginate ?? true;
494
496
  if (!auto || !body.next) {
495
497
  return {
498
+ id: jobId,
496
499
  status: body.status,
497
500
  completed: body.completed ?? 0,
498
501
  total: body.total ?? 0,
@@ -504,6 +507,7 @@ async function getCrawlStatus(http, jobId, pagination) {
504
507
  }
505
508
  const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
506
509
  return {
510
+ id: jobId,
507
511
  status: body.status,
508
512
  completed: body.completed ?? 0,
509
513
  total: body.total ?? 0,
@@ -622,6 +626,7 @@ async function getBatchScrapeStatus(http, jobId, pagination) {
622
626
  const auto = pagination?.autoPaginate ?? true;
623
627
  if (!auto || !body.next) {
624
628
  return {
629
+ id: jobId,
625
630
  status: body.status,
626
631
  completed: body.completed ?? 0,
627
632
  total: body.total ?? 0,
@@ -633,6 +638,7 @@ async function getBatchScrapeStatus(http, jobId, pagination) {
633
638
  }
634
639
  const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
635
640
  return {
641
+ id: jobId,
636
642
  status: body.status,
637
643
  completed: body.completed ?? 0,
638
644
  total: body.total ?? 0,
@@ -823,6 +829,7 @@ async function getTokenUsageHistorical(http, byApiKey) {
823
829
 
824
830
  // src/v2/watcher.ts
825
831
  var import_events = require("events");
832
+ var import_ws = require("ws");
826
833
  var Watcher = class extends import_events.EventEmitter {
827
834
  http;
828
835
  jobId;
@@ -848,9 +855,15 @@ var Watcher = class extends import_events.EventEmitter {
848
855
  async start() {
849
856
  try {
850
857
  const url = this.buildWsUrl();
851
- this.ws = new WebSocket(url, this.http.getApiKey());
852
- this.attachWsHandlers(this.ws);
853
- } catch {
858
+ if (typeof WebSocket !== "undefined") {
859
+ this.ws = new WebSocket(url, this.http.getApiKey());
860
+ } else {
861
+ this.ws = new import_ws.WebSocket(url, this.http.getApiKey());
862
+ }
863
+ if (this.ws) {
864
+ this.attachWsHandlers(this.ws);
865
+ }
866
+ } catch (err) {
854
867
  this.pollLoop();
855
868
  }
856
869
  }
@@ -878,7 +891,9 @@ var Watcher = class extends import_events.EventEmitter {
878
891
  return;
879
892
  }
880
893
  if (type === "done") {
881
- this.emit("done", { status: "completed", data: [], id: this.jobId });
894
+ const payload2 = body.data || body;
895
+ const data = payload2.data || [];
896
+ this.emit("done", { status: "completed", data, id: this.jobId });
882
897
  this.close();
883
898
  return;
884
899
  }
@@ -903,6 +918,7 @@ var Watcher = class extends import_events.EventEmitter {
903
918
  const status = payload.status;
904
919
  const data = payload.data || [];
905
920
  const snap = this.kind === "crawl" ? {
921
+ id: this.jobId,
906
922
  status,
907
923
  completed: payload.completed ?? 0,
908
924
  total: payload.total ?? 0,
@@ -911,6 +927,7 @@ var Watcher = class extends import_events.EventEmitter {
911
927
  next: payload.next ?? null,
912
928
  data
913
929
  } : {
930
+ id: this.jobId,
914
931
  status,
915
932
  completed: payload.completed ?? 0,
916
933
  total: payload.total ?? 0,
package/dist/index.d.cts CHANGED
@@ -387,6 +387,7 @@ interface CrawlResponse$1 {
387
387
  url: string;
388
388
  }
389
389
  interface CrawlJob {
390
+ id: string;
390
391
  status: 'scraping' | 'completed' | 'failed' | 'cancelled';
391
392
  total: number;
392
393
  completed: number;
@@ -411,6 +412,7 @@ interface BatchScrapeResponse$1 {
411
412
  invalidURLs?: string[];
412
413
  }
413
414
  interface BatchScrapeJob {
415
+ id: string;
414
416
  status: 'scraping' | 'completed' | 'failed' | 'cancelled';
415
417
  completed: number;
416
418
  total: number;
package/dist/index.d.ts CHANGED
@@ -387,6 +387,7 @@ interface CrawlResponse$1 {
387
387
  url: string;
388
388
  }
389
389
  interface CrawlJob {
390
+ id: string;
390
391
  status: 'scraping' | 'completed' | 'failed' | 'cancelled';
391
392
  total: number;
392
393
  completed: number;
@@ -411,6 +412,7 @@ interface BatchScrapeResponse$1 {
411
412
  invalidURLs?: string[];
412
413
  }
413
414
  interface BatchScrapeJob {
415
+ id: string;
414
416
  status: 'scraping' | 'completed' | 'failed' | 'cancelled';
415
417
  completed: number;
416
418
  total: number;
package/dist/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-MVAMVUST.js";
3
+ } from "./chunk-47H6QFPY.js";
4
4
 
5
5
  // src/v2/utils/httpClient.ts
6
6
  import axios from "axios";
@@ -377,6 +377,7 @@ async function getCrawlStatus(http, jobId, pagination) {
377
377
  const auto = pagination?.autoPaginate ?? true;
378
378
  if (!auto || !body.next) {
379
379
  return {
380
+ id: jobId,
380
381
  status: body.status,
381
382
  completed: body.completed ?? 0,
382
383
  total: body.total ?? 0,
@@ -388,6 +389,7 @@ async function getCrawlStatus(http, jobId, pagination) {
388
389
  }
389
390
  const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
390
391
  return {
392
+ id: jobId,
391
393
  status: body.status,
392
394
  completed: body.completed ?? 0,
393
395
  total: body.total ?? 0,
@@ -506,6 +508,7 @@ async function getBatchScrapeStatus(http, jobId, pagination) {
506
508
  const auto = pagination?.autoPaginate ?? true;
507
509
  if (!auto || !body.next) {
508
510
  return {
511
+ id: jobId,
509
512
  status: body.status,
510
513
  completed: body.completed ?? 0,
511
514
  total: body.total ?? 0,
@@ -517,6 +520,7 @@ async function getBatchScrapeStatus(http, jobId, pagination) {
517
520
  }
518
521
  const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
519
522
  return {
523
+ id: jobId,
520
524
  status: body.status,
521
525
  completed: body.completed ?? 0,
522
526
  total: body.total ?? 0,
@@ -707,6 +711,7 @@ async function getTokenUsageHistorical(http, byApiKey) {
707
711
 
708
712
  // src/v2/watcher.ts
709
713
  import { EventEmitter } from "events";
714
+ import { WebSocket as WS } from "ws";
710
715
  var Watcher = class extends EventEmitter {
711
716
  http;
712
717
  jobId;
@@ -732,9 +737,15 @@ var Watcher = class extends EventEmitter {
732
737
  async start() {
733
738
  try {
734
739
  const url = this.buildWsUrl();
735
- this.ws = new WebSocket(url, this.http.getApiKey());
736
- this.attachWsHandlers(this.ws);
737
- } catch {
740
+ if (typeof WebSocket !== "undefined") {
741
+ this.ws = new WebSocket(url, this.http.getApiKey());
742
+ } else {
743
+ this.ws = new WS(url, this.http.getApiKey());
744
+ }
745
+ if (this.ws) {
746
+ this.attachWsHandlers(this.ws);
747
+ }
748
+ } catch (err) {
738
749
  this.pollLoop();
739
750
  }
740
751
  }
@@ -762,7 +773,9 @@ var Watcher = class extends EventEmitter {
762
773
  return;
763
774
  }
764
775
  if (type === "done") {
765
- this.emit("done", { status: "completed", data: [], id: this.jobId });
776
+ const payload2 = body.data || body;
777
+ const data = payload2.data || [];
778
+ this.emit("done", { status: "completed", data, id: this.jobId });
766
779
  this.close();
767
780
  return;
768
781
  }
@@ -787,6 +800,7 @@ var Watcher = class extends EventEmitter {
787
800
  const status = payload.status;
788
801
  const data = payload.data || [];
789
802
  const snap = this.kind === "crawl" ? {
803
+ id: this.jobId,
790
804
  status,
791
805
  completed: payload.completed ?? 0,
792
806
  total: payload.total ?? 0,
@@ -795,6 +809,7 @@ var Watcher = class extends EventEmitter {
795
809
  next: payload.next ?? null,
796
810
  data
797
811
  } : {
812
+ id: this.jobId,
798
813
  status,
799
814
  completed: payload.completed ?? 0,
800
815
  total: payload.total ?? 0,
@@ -1071,7 +1086,7 @@ var FirecrawlApp = class {
1071
1086
  if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
1072
1087
  return process.env.npm_package_version;
1073
1088
  }
1074
- const packageJson = await import("./package-O4UQBIGU.js");
1089
+ const packageJson = await import("./package-OVF37QHH.js");
1075
1090
  return packageJson.default.version;
1076
1091
  } catch (error) {
1077
1092
  const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);
@@ -1,4 +1,4 @@
1
1
  import {
2
2
  require_package
3
- } from "./chunk-MVAMVUST.js";
3
+ } from "./chunk-47H6QFPY.js";
4
4
  export default require_package();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mendable/firecrawl",
3
- "version": "4.5.0",
3
+ "version": "4.6.1",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -28,6 +28,7 @@
28
28
  "dependencies": {
29
29
  "axios": "^1.12.2",
30
30
  "typescript-event-target": "^1.1.1",
31
+ "ws": "^8.18.3",
31
32
  "zod": "^3.23.8",
32
33
  "zod-to-json-schema": "^3.23.0"
33
34
  },
@@ -42,6 +43,7 @@
42
43
  "@types/mocha": "^10.0.6",
43
44
  "@types/node": "^20.12.12",
44
45
  "@types/uuid": "^9.0.8",
46
+ "@types/ws": "^8.18.1",
45
47
  "dotenv": "^16.4.5",
46
48
  "jest": "^30.0.5",
47
49
  "ts-jest": "^29.4.0",
@@ -29,6 +29,23 @@ describe("v2.batch e2e", () => {
29
29
  expect(Array.isArray(job.data)).toBe(true);
30
30
  }, 240_000);
31
31
 
32
+ test("batch scrape with wait returns job id for error retrieval", async () => {
33
+ const urls = [
34
+ "https://docs.firecrawl.dev",
35
+ "https://firecrawl.dev",
36
+ ];
37
+ const job = await client.batchScrape(urls, { options: { formats: ["markdown"] }, pollInterval: 1, timeout: 180 });
38
+ // Verify job has id field
39
+ expect(job.id).toBeDefined();
40
+ expect(typeof job.id).toBe("string");
41
+ // Verify we can use the id to retrieve errors
42
+ const errors = await client.getBatchScrapeErrors(job.id!);
43
+ expect(errors).toHaveProperty("errors");
44
+ expect(errors).toHaveProperty("robotsBlocked");
45
+ expect(Array.isArray(errors.errors)).toBe(true);
46
+ expect(Array.isArray(errors.robotsBlocked)).toBe(true);
47
+ }, 240_000);
48
+
32
49
  test("start batch minimal and status", async () => {
33
50
  const urls = ["https://docs.firecrawl.dev", "https://firecrawl.dev"];
34
51
  const start = await client.startBatchScrape(urls, { options: { formats: ["markdown"] }, ignoreInvalidURLs: true });
@@ -37,6 +54,9 @@ describe("v2.batch e2e", () => {
37
54
  const status = await client.getBatchScrapeStatus(start.id);
38
55
  expect(["scraping", "completed", "failed", "cancelled"]).toContain(status.status);
39
56
  expect(status.total).toBeGreaterThanOrEqual(0);
57
+ // Verify status includes id field
58
+ expect(status.id).toBeDefined();
59
+ expect(status.id).toBe(start.id);
40
60
  }, 120_000);
41
61
 
42
62
  test("wait batch with all params", async () => {
@@ -45,6 +45,9 @@ describe("v2.crawl e2e", () => {
45
45
  const status = await client.getCrawlStatus(start.id);
46
46
  expect(["scraping", "completed", "failed", "cancelled"]).toContain(status.status);
47
47
  expect(status.completed).toBeGreaterThanOrEqual(0);
48
+ // Verify status includes id field
49
+ expect(status.id).toBeDefined();
50
+ expect(status.id).toBe(start.id);
48
51
  // next/expiresAt may be null/undefined depending on state; check shape
49
52
  expect(Array.isArray(status.data)).toBe(true);
50
53
  }, 120_000);
@@ -112,6 +115,20 @@ describe("v2.crawl e2e", () => {
112
115
  expect(Array.isArray(job.data)).toBe(true);
113
116
  }, 180_000);
114
117
 
118
+ test("crawl with wait returns job id for error retrieval", async () => {
119
+ if (!client) throw new Error();
120
+ const job = await client.crawl("https://docs.firecrawl.dev", { limit: 3, maxDiscoveryDepth: 2, pollInterval: 1, timeout: 120 });
121
+ // Verify job has id field
122
+ expect(job.id).toBeDefined();
123
+ expect(typeof job.id).toBe("string");
124
+ // Verify we can use the id to retrieve errors
125
+ const errors = await client.getCrawlErrors(job.id!);
126
+ expect(errors).toHaveProperty("errors");
127
+ expect(errors).toHaveProperty("robotsBlocked");
128
+ expect(Array.isArray(errors.errors)).toBe(true);
129
+ expect(Array.isArray(errors.robotsBlocked)).toBe(true);
130
+ }, 180_000);
131
+
115
132
  test("crawl with prompt and wait", async () => {
116
133
  if (!client) throw new Error();
117
134
  const job = await client.crawl("https://docs.firecrawl.dev", { prompt: "Extract all blog posts", limit: 3, pollInterval: 1, timeout: 120 });
@@ -62,6 +62,7 @@ export async function getBatchScrapeStatus(
62
62
  const auto = pagination?.autoPaginate ?? true;
63
63
  if (!auto || !body.next) {
64
64
  return {
65
+ id: jobId,
65
66
  status: body.status,
66
67
  completed: body.completed ?? 0,
67
68
  total: body.total ?? 0,
@@ -74,6 +75,7 @@ export async function getBatchScrapeStatus(
74
75
 
75
76
  const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
76
77
  return {
78
+ id: jobId,
77
79
  status: body.status,
78
80
  completed: body.completed ?? 0,
79
81
  total: body.total ?? 0,
@@ -136,4 +138,4 @@ export function chunkUrls(urls: string[], chunkSize = 100): string[][] {
136
138
  const chunks: string[][] = [];
137
139
  for (let i = 0; i < urls.length; i += chunkSize) chunks.push(urls.slice(i, i + chunkSize));
138
140
  return chunks;
139
- }
141
+ }
@@ -72,6 +72,7 @@ export async function getCrawlStatus(
72
72
  const auto = pagination?.autoPaginate ?? true;
73
73
  if (!auto || !body.next) {
74
74
  return {
75
+ id: jobId,
75
76
  status: body.status,
76
77
  completed: body.completed ?? 0,
77
78
  total: body.total ?? 0,
@@ -85,6 +86,7 @@ export async function getCrawlStatus(
85
86
  const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
86
87
 
87
88
  return {
89
+ id: jobId,
88
90
  status: body.status,
89
91
  completed: body.completed ?? 0,
90
92
  total: body.total ?? 0,
@@ -165,4 +167,4 @@ export async function crawlParamsPreview(http: HttpClient, url: string, prompt:
165
167
  if (err?.isAxiosError) return normalizeAxiosError(err, "crawl params preview");
166
168
  throw err;
167
169
  }
168
- }
170
+ }
package/src/v2/types.ts CHANGED
@@ -466,6 +466,7 @@ export interface CrawlResponse {
466
466
  }
467
467
 
468
468
  export interface CrawlJob {
469
+ id: string;
469
470
  status: 'scraping' | 'completed' | 'failed' | 'cancelled';
470
471
  total: number;
471
472
  completed: number;
@@ -493,6 +494,7 @@ export interface BatchScrapeResponse {
493
494
  }
494
495
 
495
496
  export interface BatchScrapeJob {
497
+ id: string;
496
498
  status: 'scraping' | 'completed' | 'failed' | 'cancelled';
497
499
  completed: number;
498
500
  total: number;
package/src/v2/watcher.ts CHANGED
@@ -3,6 +3,7 @@ import type { BatchScrapeJob, CrawlJob, Document } from "./types";
3
3
  import type { HttpClient } from "./utils/httpClient";
4
4
  import { getBatchScrapeStatus } from "./methods/batch";
5
5
  import { getCrawlStatus } from "./methods/crawl";
6
+ import { WebSocket as WS } from "ws";
6
7
 
7
8
  type JobKind = "crawl" | "batch";
8
9
 
@@ -43,11 +44,17 @@ export class Watcher extends EventEmitter {
43
44
  async start(): Promise<void> {
44
45
  try {
45
46
  const url = this.buildWsUrl();
46
- // Pass API key as subprotocol for browser compatibility
47
- this.ws = new WebSocket(url, this.http.getApiKey());
48
- this.attachWsHandlers(this.ws);
49
- } catch {
50
- // Fallback to polling immediately
47
+
48
+ if (typeof WebSocket !== 'undefined') {
49
+ this.ws = new WebSocket(url, this.http.getApiKey()) as any;
50
+ } else {
51
+ this.ws = new WS(url, this.http.getApiKey()) as any;
52
+ }
53
+
54
+ if (this.ws) {
55
+ this.attachWsHandlers(this.ws);
56
+ }
57
+ } catch (err) {
51
58
  this.pollLoop();
52
59
  }
53
60
  }
@@ -76,7 +83,9 @@ export class Watcher extends EventEmitter {
76
83
  return;
77
84
  }
78
85
  if (type === "done") {
79
- this.emit("done", { status: "completed", data: [], id: this.jobId });
86
+ const payload = body.data || body;
87
+ const data = (payload.data || []) as Document[];
88
+ this.emit("done", { status: "completed", data, id: this.jobId });
80
89
  this.close();
81
90
  return;
82
91
  }
@@ -105,6 +114,7 @@ export class Watcher extends EventEmitter {
105
114
  const data = (payload.data || []) as Document[];
106
115
  const snap: Snapshot = this.kind === "crawl"
107
116
  ? {
117
+ id: this.jobId,
108
118
  status,
109
119
  completed: payload.completed ?? 0,
110
120
  total: payload.total ?? 0,
@@ -114,6 +124,7 @@ export class Watcher extends EventEmitter {
114
124
  data,
115
125
  }
116
126
  : {
127
+ id: this.jobId,
117
128
  status,
118
129
  completed: payload.completed ?? 0,
119
130
  total: payload.total ?? 0,
package/tsup.config.ts CHANGED
@@ -8,6 +8,7 @@ export default defineConfig({
8
8
  clean: true,
9
9
  platform: "node",
10
10
  target: "node22",
11
+ external: ["ws"],
11
12
  noExternal: ["typescript-event-target"],
12
13
  esbuildOptions(options) {
13
14
  options.define = {