npm - firecrawl - Versions diffs - 3.3.0 → 4.0.0 - Mend

firecrawl 3.3.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/{chunk-Y3QF4XAJ.js → chunk-YH34PXKT.js} +1 -1
package/dist/index.cjs +70 -11
package/dist/index.d.cts +13 -3
package/dist/index.d.ts +13 -3
package/dist/index.js +71 -12
package/dist/{package-LI2S3JCZ.js → package-CW75NWUC.js} +1 -1
package/package.json +1 -1
package/src/__tests__/unit/v2/pagination.test.ts +112 -0
package/src/v2/client.ts +5 -4
package/src/v2/methods/batch.ts +25 -5
package/src/v2/methods/crawl.ts +28 -5
package/src/v2/types.ts +12 -0
package/src/v2/utils/pagination.ts +45 -0

package/dist/{chunk-Y3QF4XAJ.js → chunk-YH34PXKT.js} RENAMED Viewed

@@ -8,7 +8,7 @@ var require_package = __commonJS({
   "package.json"(exports, module) {
     module.exports = {
       name: "@mendable/firecrawl-js",
-      version: "3.3.0",
+      version: "4.0.0",
       description: "JavaScript SDK for Firecrawl API",
       main: "dist/index.js",
       types: "dist/index.d.ts",

package/dist/index.cjs CHANGED Viewed

@@ -35,7 +35,7 @@ var require_package = __commonJS({
   "package.json"(exports2, module2) {
     module2.exports = {
       name: "@mendable/firecrawl-js",
-      version: "3.3.0",
+      version: "4.0.0",
       description: "JavaScript SDK for Firecrawl API",
       main: "dist/index.js",
       types: "dist/index.d.ts",
@@ -395,6 +395,37 @@ async function map(http, url, options) {
   }
 }
+// src/v2/utils/pagination.ts
+async function fetchAllPages(http, nextUrl, initial, pagination) {
+  const docs = initial.slice();
+  let current = nextUrl;
+  let pageCount = 0;
+  const maxPages = pagination?.maxPages ?? void 0;
+  const maxResults = pagination?.maxResults ?? void 0;
+  const maxWaitTime = pagination?.maxWaitTime ?? void 0;
+  const started = Date.now();
+  while (current) {
+    if (maxPages != null && pageCount >= maxPages) break;
+    if (maxWaitTime != null && (Date.now() - started) / 1e3 > maxWaitTime) break;
+    let payload = null;
+    try {
+      const res = await http.get(current);
+      payload = res.data;
+    } catch {
+      break;
+    }
+    if (!payload?.success) break;
+    for (const d of payload.data || []) {
+      if (maxResults != null && docs.length >= maxResults) break;
+      docs.push(d);
+    }
+    if (maxResults != null && docs.length >= maxResults) break;
+    current = payload.next ?? null;
+    pageCount += 1;
+  }
+  return docs;
+}
 // src/v2/methods/crawl.ts
 function prepareCrawlPayload(request) {
   if (!request.url || !request.url.trim()) throw new Error("URL cannot be empty");
@@ -432,21 +463,35 @@ async function startCrawl(http, request) {
     throw err;
   }
 }
-async function getCrawlStatus(http, jobId) {
+async function getCrawlStatus(http, jobId, pagination) {
   try {
     const res = await http.get(`/v2/crawl/${jobId}`);
     if (res.status !== 200 || !res.data?.success) {
       throwForBadResponse(res, "get crawl status");
     }
     const body = res.data;
+    const initialDocs = body.data || [];
+    const auto = pagination?.autoPaginate ?? true;
+    if (!auto || !body.next) {
+      return {
+        status: body.status,
+        completed: body.completed ?? 0,
+        total: body.total ?? 0,
+        creditsUsed: body.creditsUsed,
+        expiresAt: body.expiresAt,
+        next: body.next ?? null,
+        data: initialDocs
+      };
+    }
+    const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
     return {
       status: body.status,
       completed: body.completed ?? 0,
       total: body.total ?? 0,
       creditsUsed: body.creditsUsed,
       expiresAt: body.expiresAt,
-      next: body.next ?? null,
-      data: body.data || []
+      next: null,
+      data: aggregated
     };
   } catch (err) {
     if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl status");
@@ -549,19 +594,33 @@ async function startBatchScrape(http, urls, {
     throw err;
   }
 }
-async function getBatchScrapeStatus(http, jobId) {
+async function getBatchScrapeStatus(http, jobId, pagination) {
   try {
     const res = await http.get(`/v2/batch/scrape/${jobId}`);
     if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get batch scrape status");
     const body = res.data;
+    const initialDocs = body.data || [];
+    const auto = pagination?.autoPaginate ?? true;
+    if (!auto || !body.next) {
+      return {
+        status: body.status,
+        completed: body.completed ?? 0,
+        total: body.total ?? 0,
+        creditsUsed: body.creditsUsed,
+        expiresAt: body.expiresAt,
+        next: body.next ?? null,
+        data: initialDocs
+      };
+    }
+    const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
     return {
       status: body.status,
       completed: body.completed ?? 0,
       total: body.total ?? 0,
       creditsUsed: body.creditsUsed,
       expiresAt: body.expiresAt,
-      next: body.next ?? null,
-      data: body.data || []
+      next: null,
+      data: aggregated
     };
   } catch (err) {
     if (err?.isAxiosError) return normalizeAxiosError(err, "get batch scrape status");
@@ -885,8 +944,8 @@ var FirecrawlClient = class {
    * Get the status and partial data of a crawl job.
    * @param jobId Crawl job id.
    */
-  async getCrawlStatus(jobId) {
-    return getCrawlStatus(this.http, jobId);
+  async getCrawlStatus(jobId, pagination) {
+    return getCrawlStatus(this.http, jobId, pagination);
   }
   /**
    * Cancel a crawl job.
@@ -940,8 +999,8 @@ var FirecrawlClient = class {
    * Get the status and partial data of a batch scrape job.
    * @param jobId Batch job id.
    */
-  async getBatchScrapeStatus(jobId) {
-    return getBatchScrapeStatus(this.http, jobId);
+  async getBatchScrapeStatus(jobId, pagination) {
+    return getBatchScrapeStatus(this.http, jobId, pagination);
   }
   /**
    * Retrieve batch scrape errors and robots.txt blocks.

package/dist/index.d.cts CHANGED Viewed

@@ -151,6 +151,16 @@ interface Document {
     warning?: string;
     changeTracking?: Record<string, unknown>;
 }
+interface PaginationConfig {
+    /** When true (default), automatically follow `next` links and aggregate all documents. */
+    autoPaginate?: boolean;
+    /** Maximum number of additional pages to fetch after the first response. */
+    maxPages?: number;
+    /** Maximum total number of documents to return across all pages. */
+    maxResults?: number;
+    /** Maximum time to spend fetching additional pages (in seconds). */
+    maxWaitTime?: number;
+}
 interface SearchResultWeb {
     url: string;
     title?: string;
@@ -440,7 +450,7 @@ declare class FirecrawlClient {
      * Get the status and partial data of a crawl job.
      * @param jobId Crawl job id.
      */
-    getCrawlStatus(jobId: string): Promise<CrawlJob>;
+    getCrawlStatus(jobId: string, pagination?: PaginationConfig): Promise<CrawlJob>;
     /**
      * Cancel a crawl job.
      * @param jobId Crawl job id.
@@ -483,7 +493,7 @@ declare class FirecrawlClient {
      * Get the status and partial data of a batch scrape job.
      * @param jobId Batch job id.
      */
-    getBatchScrapeStatus(jobId: string): Promise<BatchScrapeJob>;
+    getBatchScrapeStatus(jobId: string, pagination?: PaginationConfig): Promise<BatchScrapeJob>;
     /**
      * Retrieve batch scrape errors and robots.txt blocks.
      * @param jobId Batch job id.
@@ -1361,4 +1371,4 @@ declare class Firecrawl extends FirecrawlClient {
     get v1(): FirecrawlApp;
 }
-export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
+export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };

package/dist/index.d.ts CHANGED Viewed

@@ -151,6 +151,16 @@ interface Document {
     warning?: string;
     changeTracking?: Record<string, unknown>;
 }
+interface PaginationConfig {
+    /** When true (default), automatically follow `next` links and aggregate all documents. */
+    autoPaginate?: boolean;
+    /** Maximum number of additional pages to fetch after the first response. */
+    maxPages?: number;
+    /** Maximum total number of documents to return across all pages. */
+    maxResults?: number;
+    /** Maximum time to spend fetching additional pages (in seconds). */
+    maxWaitTime?: number;
+}
 interface SearchResultWeb {
     url: string;
     title?: string;
@@ -440,7 +450,7 @@ declare class FirecrawlClient {
      * Get the status and partial data of a crawl job.
      * @param jobId Crawl job id.
      */
-    getCrawlStatus(jobId: string): Promise<CrawlJob>;
+    getCrawlStatus(jobId: string, pagination?: PaginationConfig): Promise<CrawlJob>;
     /**
      * Cancel a crawl job.
      * @param jobId Crawl job id.
@@ -483,7 +493,7 @@ declare class FirecrawlClient {
      * Get the status and partial data of a batch scrape job.
      * @param jobId Batch job id.
      */
-    getBatchScrapeStatus(jobId: string): Promise<BatchScrapeJob>;
+    getBatchScrapeStatus(jobId: string, pagination?: PaginationConfig): Promise<BatchScrapeJob>;
     /**
      * Retrieve batch scrape errors and robots.txt blocks.
      * @param jobId Batch job id.
@@ -1361,4 +1371,4 @@ declare class Firecrawl extends FirecrawlClient {
     get v1(): FirecrawlApp;
 }
-export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };
+export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type AttributesFormat, type BatchScrapeJob, type BatchScrapeOptions, type BatchScrapeResponse$1 as BatchScrapeResponse, type CategoryOption, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlOptions, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type FirecrawlClientOptions, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PaginationConfig, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResultImages, type SearchResultNews, type SearchResultWeb, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };

package/dist/index.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
   require_package
-} from "./chunk-Y3QF4XAJ.js";
+} from "./chunk-YH34PXKT.js";
 // src/v2/utils/httpClient.ts
 import axios from "axios";
@@ -279,6 +279,37 @@ async function map(http, url, options) {
   }
 }
+// src/v2/utils/pagination.ts
+async function fetchAllPages(http, nextUrl, initial, pagination) {
+  const docs = initial.slice();
+  let current = nextUrl;
+  let pageCount = 0;
+  const maxPages = pagination?.maxPages ?? void 0;
+  const maxResults = pagination?.maxResults ?? void 0;
+  const maxWaitTime = pagination?.maxWaitTime ?? void 0;
+  const started = Date.now();
+  while (current) {
+    if (maxPages != null && pageCount >= maxPages) break;
+    if (maxWaitTime != null && (Date.now() - started) / 1e3 > maxWaitTime) break;
+    let payload = null;
+    try {
+      const res = await http.get(current);
+      payload = res.data;
+    } catch {
+      break;
+    }
+    if (!payload?.success) break;
+    for (const d of payload.data || []) {
+      if (maxResults != null && docs.length >= maxResults) break;
+      docs.push(d);
+    }
+    if (maxResults != null && docs.length >= maxResults) break;
+    current = payload.next ?? null;
+    pageCount += 1;
+  }
+  return docs;
+}
 // src/v2/methods/crawl.ts
 function prepareCrawlPayload(request) {
   if (!request.url || !request.url.trim()) throw new Error("URL cannot be empty");
@@ -316,21 +347,35 @@ async function startCrawl(http, request) {
     throw err;
   }
 }
-async function getCrawlStatus(http, jobId) {
+async function getCrawlStatus(http, jobId, pagination) {
   try {
     const res = await http.get(`/v2/crawl/${jobId}`);
     if (res.status !== 200 || !res.data?.success) {
       throwForBadResponse(res, "get crawl status");
     }
     const body = res.data;
+    const initialDocs = body.data || [];
+    const auto = pagination?.autoPaginate ?? true;
+    if (!auto || !body.next) {
+      return {
+        status: body.status,
+        completed: body.completed ?? 0,
+        total: body.total ?? 0,
+        creditsUsed: body.creditsUsed,
+        expiresAt: body.expiresAt,
+        next: body.next ?? null,
+        data: initialDocs
+      };
+    }
+    const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
     return {
       status: body.status,
       completed: body.completed ?? 0,
       total: body.total ?? 0,
       creditsUsed: body.creditsUsed,
       expiresAt: body.expiresAt,
-      next: body.next ?? null,
-      data: body.data || []
+      next: null,
+      data: aggregated
     };
   } catch (err) {
     if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl status");
@@ -433,19 +478,33 @@ async function startBatchScrape(http, urls, {
     throw err;
   }
 }
-async function getBatchScrapeStatus(http, jobId) {
+async function getBatchScrapeStatus(http, jobId, pagination) {
   try {
     const res = await http.get(`/v2/batch/scrape/${jobId}`);
     if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get batch scrape status");
     const body = res.data;
+    const initialDocs = body.data || [];
+    const auto = pagination?.autoPaginate ?? true;
+    if (!auto || !body.next) {
+      return {
+        status: body.status,
+        completed: body.completed ?? 0,
+        total: body.total ?? 0,
+        creditsUsed: body.creditsUsed,
+        expiresAt: body.expiresAt,
+        next: body.next ?? null,
+        data: initialDocs
+      };
+    }
+    const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
     return {
       status: body.status,
       completed: body.completed ?? 0,
       total: body.total ?? 0,
       creditsUsed: body.creditsUsed,
       expiresAt: body.expiresAt,
-      next: body.next ?? null,
-      data: body.data || []
+      next: null,
+      data: aggregated
     };
   } catch (err) {
     if (err?.isAxiosError) return normalizeAxiosError(err, "get batch scrape status");
@@ -769,8 +828,8 @@ var FirecrawlClient = class {
    * Get the status and partial data of a crawl job.
    * @param jobId Crawl job id.
    */
-  async getCrawlStatus(jobId) {
-    return getCrawlStatus(this.http, jobId);
+  async getCrawlStatus(jobId, pagination) {
+    return getCrawlStatus(this.http, jobId, pagination);
   }
   /**
    * Cancel a crawl job.
@@ -824,8 +883,8 @@ var FirecrawlClient = class {
    * Get the status and partial data of a batch scrape job.
    * @param jobId Batch job id.
    */
-  async getBatchScrapeStatus(jobId) {
-    return getBatchScrapeStatus(this.http, jobId);
+  async getBatchScrapeStatus(jobId, pagination) {
+    return getBatchScrapeStatus(this.http, jobId, pagination);
   }
   /**
    * Retrieve batch scrape errors and robots.txt blocks.
@@ -933,7 +992,7 @@ var FirecrawlApp = class {
       if (typeof process !== "undefined" && process.env && process.env.npm_package_version) {
         return process.env.npm_package_version;
       }
-      const packageJson = await import("./package-LI2S3JCZ.js");
+      const packageJson = await import("./package-CW75NWUC.js");
       return packageJson.default.version;
     } catch (error) {
       const isTest = typeof process !== "undefined" && (process.env.JEST_WORKER_ID != null || false);

package/dist/{package-LI2S3JCZ.js → package-CW75NWUC.js} RENAMED Viewed

@@ -1,4 +1,4 @@
 import {
   require_package
-} from "./chunk-Y3QF4XAJ.js";
+} from "./chunk-YH34PXKT.js";
 export default require_package();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "firecrawl",
-  "version": "3.3.0",
+  "version": "4.0.0",
   "description": "JavaScript SDK for Firecrawl API",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",

package/src/__tests__/unit/v2/pagination.test.ts ADDED Viewed

@@ -0,0 +1,112 @@
+import { describe, test, expect, jest } from "@jest/globals";
+import { getCrawlStatus } from "../../../v2/methods/crawl";
+import { getBatchScrapeStatus } from "../../../v2/methods/batch";
+describe("JS SDK v2 pagination", () => {
+  function makeHttp(getImpl: (url: string) => any) {
+    return { get: jest.fn(async (u: string) => getImpl(u)) } as any;
+  }
+  test("crawl: autoPaginate=false returns next", async () => {
+    const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 2, next: "https://api/next", data: [{ markdown: "a" }] } };
+    const http = makeHttp(() => first);
+    const res = await getCrawlStatus(http, "job1", { autoPaginate: false });
+    expect(res.data.length).toBe(1);
+    expect(res.next).toBe("https://api/next");
+  });
+  test("crawl: default autoPaginate aggregates and nulls next", async () => {
+    const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 3, next: "https://api/n1", data: [{ markdown: "a" }] } };
+    const second = { status: 200, data: { success: true, next: "https://api/n2", data: [{ markdown: "b" }] } };
+    const third = { status: 200, data: { success: true, next: null, data: [{ markdown: "c" }] } };
+    const http = makeHttp((url) => {
+      if (url.includes("/v2/crawl/")) return first;
+      if (url.endsWith("n1")) return second;
+      return third;
+    });
+    const res = await getCrawlStatus(http, "job1");
+    expect(res.data.length).toBe(3);
+    expect(res.next).toBeNull();
+  });
+  test("crawl: respects maxPages and maxResults", async () => {
+    const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 10, next: "https://api/n1", data: [{ markdown: "a" }] } };
+    const page = (n: number) => ({ status: 200, data: { success: true, next: n < 3 ? `https://api/n${n + 1}` : null, data: [{ markdown: `p${n}` }] } });
+    const http = makeHttp((url) => {
+      if (url.includes("/v2/crawl/")) return first;
+      if (url.endsWith("n1")) return page(1);
+      if (url.endsWith("n2")) return page(2);
+      return page(3);
+    });
+    const res = await getCrawlStatus(http, "job1", { autoPaginate: true, maxPages: 2, maxResults: 2 });
+    expect(res.data.length).toBe(2);
+  });
+  test("batch: default autoPaginate aggregates and nulls next", async () => {
+    const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 3, next: "https://api/b1", data: [{ markdown: "a" }] } };
+    const second = { status: 200, data: { success: true, next: "https://api/b2", data: [{ markdown: "b" }] } };
+    const third = { status: 200, data: { success: true, next: null, data: [{ markdown: "c" }] } };
+    const http = makeHttp((url) => {
+      if (url.includes("/v2/batch/scrape/")) return first;
+      if (url.endsWith("b1")) return second;
+      return third;
+    });
+    const res = await getBatchScrapeStatus(http, "jobB");
+    expect(res.data.length).toBe(3);
+    expect(res.next).toBeNull();
+  });
+  test("batch: autoPaginate=false returns next", async () => {
+    const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 2, next: "https://api/nextBatch", data: [{ markdown: "a" }] } };
+    const http = makeHttp(() => first);
+    const res = await getBatchScrapeStatus(http, "jobB", { autoPaginate: false });
+    expect(res.data.length).toBe(1);
+    expect(res.next).toBe("https://api/nextBatch");
+  });
+  test("crawl: maxWaitTime stops pagination after first page", async () => {
+    const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 5, next: "https://api/n1", data: [{ markdown: "a" }] } };
+    const p1 = { status: 200, data: { success: true, next: "https://api/n2", data: [{ markdown: "b" }] } };
+    const http: any = makeHttp((url: string) => {
+      if (url.includes("/v2/crawl/")) return first;
+      if (url.endsWith("n1")) return p1;
+      return { status: 200, data: { success: true, next: null, data: [{ markdown: "c" }] } };
+    });
+    const nowSpy = jest.spyOn(Date, "now");
+    try {
+      nowSpy
+        .mockImplementationOnce(() => 0)   // started
+        .mockImplementationOnce(() => 0)   // first loop check
+        .mockImplementationOnce(() => 3000); // second loop check > maxWaitTime
+      const res = await getCrawlStatus(http, "jobC", { autoPaginate: true, maxWaitTime: 1 });
+      expect(res.data.length).toBe(2); // initial + first page
+      expect((http.get as jest.Mock).mock.calls.length).toBe(2); // initial + n1 only
+    } finally {
+      nowSpy.mockRestore();
+    }
+  });
+  test("batch: maxWaitTime stops pagination after first page", async () => {
+    const first = { status: 200, data: { success: true, status: "completed", completed: 1, total: 5, next: "https://api/b1", data: [{ markdown: "a" }] } };
+    const p1 = { status: 200, data: { success: true, next: "https://api/b2", data: [{ markdown: "b" }] } };
+    const http: any = makeHttp((url: string) => {
+      if (url.includes("/v2/batch/scrape/")) return first;
+      if (url.endsWith("b1")) return p1;
+      return { status: 200, data: { success: true, next: null, data: [{ markdown: "c" }] } };
+    });
+    const nowSpy = jest.spyOn(Date, "now");
+    try {
+      nowSpy
+        .mockImplementationOnce(() => 0)   // started
+        .mockImplementationOnce(() => 0)   // first loop check
+        .mockImplementationOnce(() => 3000); // second loop check > maxWaitTime
+      const res = await getBatchScrapeStatus(http, "jobB", { autoPaginate: true, maxWaitTime: 1 });
+      expect(res.data.length).toBe(2);
+      expect((http.get as jest.Mock).mock.calls.length).toBe(2);
+    } finally {
+      nowSpy.mockRestore();
+    }
+  });
+});

package/src/v2/client.ts CHANGED Viewed

@@ -36,6 +36,7 @@ import type {
   ExtractResponse,
   CrawlOptions,
   BatchScrapeOptions,
+  PaginationConfig,
 } from "./types";
 import { Watcher } from "./watcher";
 import type { WatcherOptions } from "./watcher";
@@ -145,8 +146,8 @@ export class FirecrawlClient {
    * Get the status and partial data of a crawl job.
    * @param jobId Crawl job id.
    */
-  async getCrawlStatus(jobId: string): Promise<CrawlJob> {
-    return getCrawlStatus(this.http, jobId);
+  async getCrawlStatus(jobId: string, pagination?: PaginationConfig): Promise<CrawlJob> {
+    return getCrawlStatus(this.http, jobId, pagination);
   }
   /**
    * Cancel a crawl job.
@@ -201,8 +202,8 @@ export class FirecrawlClient {
    * Get the status and partial data of a batch scrape job.
    * @param jobId Batch job id.
    */
-  async getBatchScrapeStatus(jobId: string): Promise<BatchScrapeJob> {
-    return getBatchScrapeStatus(this.http, jobId);
+  async getBatchScrapeStatus(jobId: string, pagination?: PaginationConfig): Promise<BatchScrapeJob> {
+    return getBatchScrapeStatus(this.http, jobId, pagination);
   }
   /**
    * Retrieve batch scrape errors and robots.txt blocks.

package/src/v2/methods/batch.ts CHANGED Viewed

@@ -4,9 +4,11 @@ import {
   type CrawlErrorsResponse,
   type Document,
   type BatchScrapeOptions,
+  type PaginationConfig,
 } from "../types";
 import { HttpClient } from "../utils/httpClient";
 import { ensureValidScrapeOptions } from "../utils/validation";
+import { fetchAllPages } from "../utils/pagination";
 import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler";
 export async function startBatchScrape(
@@ -47,19 +49,38 @@ export async function startBatchScrape(
   }
 }
-export async function getBatchScrapeStatus(http: HttpClient, jobId: string): Promise<BatchScrapeJob> {
+export async function getBatchScrapeStatus(
+  http: HttpClient,
+  jobId: string,
+  pagination?: PaginationConfig
+): Promise<BatchScrapeJob> {
   try {
     const res = await http.get<{ success: boolean; status: BatchScrapeJob["status"]; completed?: number; total?: number; creditsUsed?: number; expiresAt?: string; next?: string | null; data?: Document[] }>(`/v2/batch/scrape/${jobId}`);
     if (res.status !== 200 || !res.data?.success) throwForBadResponse(res, "get batch scrape status");
     const body = res.data;
+    const initialDocs = (body.data || []) as Document[];
+    const auto = pagination?.autoPaginate ?? true;
+    if (!auto || !body.next) {
+      return {
+        status: body.status,
+        completed: body.completed ?? 0,
+        total: body.total ?? 0,
+        creditsUsed: body.creditsUsed,
+        expiresAt: body.expiresAt,
+        next: body.next ?? null,
+        data: initialDocs,
+      };
+    }
+    const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
     return {
       status: body.status,
       completed: body.completed ?? 0,
       total: body.total ?? 0,
       creditsUsed: body.creditsUsed,
       expiresAt: body.expiresAt,
-      next: body.next ?? null,
-      data: (body.data || []) as Document[],
+      next: null,
+      data: aggregated,
     };
   } catch (err: any) {
     if (err?.isAxiosError) return normalizeAxiosError(err, "get batch scrape status");
@@ -115,5 +136,4 @@ export function chunkUrls(urls: string[], chunkSize = 100): string[][] {
   const chunks: string[][] = [];
   for (let i = 0; i < urls.length; i += chunkSize) chunks.push(urls.slice(i, i + chunkSize));
   return chunks;
-}
+}

package/src/v2/methods/crawl.ts CHANGED Viewed

@@ -5,10 +5,13 @@ import {
   type CrawlResponse,
   type Document,
   type CrawlOptions,
+  type PaginationConfig,
 } from "../types";
 import { HttpClient } from "../utils/httpClient";
 import { ensureValidScrapeOptions } from "../utils/validation";
 import { normalizeAxiosError, throwForBadResponse } from "../utils/errorHandler";
+import type { HttpClient as _Http } from "../utils/httpClient";
+import { fetchAllPages } from "../utils/pagination";
 export type CrawlRequest = CrawlOptions & {
   url: string;
@@ -52,21 +55,42 @@ export async function startCrawl(http: HttpClient, request: CrawlRequest): Promi
   }
 }
-export async function getCrawlStatus(http: HttpClient, jobId: string): Promise<CrawlJob> {
+export async function getCrawlStatus(
+  http: HttpClient,
+  jobId: string,
+  pagination?: PaginationConfig
+): Promise<CrawlJob> {
   try {
     const res = await http.get<{ success: boolean; status: CrawlJob["status"]; completed?: number; total?: number; creditsUsed?: number; expiresAt?: string; next?: string | null; data?: Document[] }>(`/v2/crawl/${jobId}`);
     if (res.status !== 200 || !res.data?.success) {
       throwForBadResponse(res, "get crawl status");
     }
     const body = res.data;
+    const initialDocs = (body.data || []) as Document[];
+    const auto = pagination?.autoPaginate ?? true;
+    if (!auto || !body.next) {
+      return {
+        status: body.status,
+        completed: body.completed ?? 0,
+        total: body.total ?? 0,
+        creditsUsed: body.creditsUsed,
+        expiresAt: body.expiresAt,
+        next: body.next ?? null,
+        data: initialDocs,
+      };
+    }
+    const aggregated = await fetchAllPages(http, body.next, initialDocs, pagination);
     return {
       status: body.status,
       completed: body.completed ?? 0,
       total: body.total ?? 0,
       creditsUsed: body.creditsUsed,
       expiresAt: body.expiresAt,
-      next: body.next ?? null,
-      data: (body.data || []) as Document[],
+      next: null,
+      data: aggregated,
     };
   } catch (err: any) {
     if (err?.isAxiosError) return normalizeAxiosError(err, "get crawl status");
@@ -140,5 +164,4 @@ export async function crawlParamsPreview(http: HttpClient, url: string, prompt:
     if (err?.isAxiosError) return normalizeAxiosError(err, "crawl params preview");
     throw err;
   }
-}
+}

package/src/v2/types.ts CHANGED Viewed

@@ -189,6 +189,18 @@ export interface Document {
   changeTracking?: Record<string, unknown>;
 }
+// Pagination configuration for auto-fetching pages from v2 endpoints that return a `next` URL
+export interface PaginationConfig {
+  /** When true (default), automatically follow `next` links and aggregate all documents. */
+  autoPaginate?: boolean;
+  /** Maximum number of additional pages to fetch after the first response. */
+  maxPages?: number;
+  /** Maximum total number of documents to return across all pages. */
+  maxResults?: number;
+  /** Maximum time to spend fetching additional pages (in seconds). */
+  maxWaitTime?: number;
+}
 export interface SearchResultWeb {
   url: string;
   title?: string;

package/src/v2/utils/pagination.ts ADDED Viewed

@@ -0,0 +1,45 @@
+import type { HttpClient } from "../utils/httpClient";
+import type { Document, PaginationConfig } from "../types";
+/**
+ * Shared helper to follow `next` cursors and aggregate documents with limits.
+ */
+export async function fetchAllPages(
+  http: HttpClient,
+  nextUrl: string,
+  initial: Document[],
+  pagination?: PaginationConfig
+): Promise<Document[]> {
+  const docs = initial.slice();
+  let current: string | null = nextUrl;
+  let pageCount = 0;
+  const maxPages = pagination?.maxPages ?? undefined;
+  const maxResults = pagination?.maxResults ?? undefined;
+  const maxWaitTime = pagination?.maxWaitTime ?? undefined;
+  const started = Date.now();
+  while (current) {
+    if (maxPages != null && pageCount >= maxPages) break;
+    if (maxWaitTime != null && (Date.now() - started) / 1000 > maxWaitTime) break;
+    let payload: { success: boolean; next?: string | null; data?: Document[] } | null = null;
+    try {
+      const res = await http.get<{ success: boolean; next?: string | null; data?: Document[] }>(current);
+      payload = res.data;
+    } catch {
+      break; // axios rejects on non-2xx; stop pagination gracefully
+    }
+    if (!payload?.success) break;
+    for (const d of payload.data || []) {
+      if (maxResults != null && docs.length >= maxResults) break;
+      docs.push(d as Document);
+    }
+    if (maxResults != null && docs.length >= maxResults) break;
+    current = (payload.next ?? null) as string | null;
+    pageCount += 1;
+  }
+  return docs;
+}