npm - firecrawl - Versions diffs - 1.16.0 → 1.18.0 - Mend

firecrawl 1.16.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.cjs CHANGED Viewed

@@ -42,11 +42,9 @@ var import_isows = require("isows");
 var import_typescript_event_target = require("typescript-event-target");
 var FirecrawlError = class extends Error {
   statusCode;
-  details;
-  constructor(message, statusCode, details) {
+  constructor(message, statusCode) {
     super(message);
     this.statusCode = statusCode;
-    this.details = details;
   }
 };
 var FirecrawlApp = class {
@@ -93,20 +91,6 @@ var FirecrawlApp = class {
         }
       };
     }
-    if (jsonData?.jsonOptions?.schema) {
-      let schema = jsonData.jsonOptions.schema;
-      try {
-        schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
-      } catch (error) {
-      }
-      jsonData = {
-        ...jsonData,
-        jsonOptions: {
-          ...jsonData.jsonOptions,
-          schema
-        }
-      };
-    }
     try {
       const response = await import_axios.default.post(
         this.apiUrl + `/v1/scrape`,
@@ -261,26 +245,16 @@ var FirecrawlApp = class {
    * Checks the status of a crawl job using the Firecrawl API.
    * @param id - The ID of the crawl operation.
    * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
-   * @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
-   * @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
-   * @param limit - How many entries to return. Only used when `getAllData = false`.
    * @returns The response containing the job status.
    */
-  async checkCrawlStatus(id, getAllData = false, nextURL, skip, limit) {
+  async checkCrawlStatus(id, getAllData = false) {
     if (!id) {
       throw new FirecrawlError("No crawl ID provided", 400);
     }
     const headers = this.prepareHeaders();
-    const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/crawl/${id}`);
-    if (skip !== void 0) {
-      targetURL.searchParams.set("skip", skip.toString());
-    }
-    if (limit !== void 0) {
-      targetURL.searchParams.set("limit", limit.toString());
-    }
     try {
       const response = await this.getRequest(
-        targetURL.href,
+        `${this.apiUrl}/v1/crawl/${id}`,
         headers
       );
       if (response.status === 200) {
@@ -305,7 +279,6 @@ var FirecrawlApp = class {
           total: response.data.total,
           completed: response.data.completed,
           creditsUsed: response.data.creditsUsed,
-          next: getAllData ? void 0 : response.data.next,
           expiresAt: new Date(response.data.expiresAt),
           data: allData
         };
@@ -328,28 +301,6 @@ var FirecrawlApp = class {
     }
     return { success: false, error: "Internal server error." };
   }
-  /**
-   * Returns information about crawl errors.
-   * @param id - The ID of the crawl operation.
-   * @returns Information about crawl errors.
-   */
-  async checkCrawlErrors(id) {
-    const headers = this.prepareHeaders();
-    try {
-      const response = await this.deleteRequest(
-        `${this.apiUrl}/v1/crawl/${id}/errors`,
-        headers
-      );
-      if (response.status === 200) {
-        return response.data;
-      } else {
-        this.handleError(response, "check crawl errors");
-      }
-    } catch (error) {
-      throw new FirecrawlError(error.message, 500);
-    }
-    return { success: false, error: "Internal server error." };
-  }
   /**
    * Cancels a crawl job using the Firecrawl API.
    * @param id - The ID of the crawl operation.
@@ -438,20 +389,6 @@ var FirecrawlApp = class {
         }
       };
     }
-    if (jsonData?.jsonOptions?.schema) {
-      let schema = jsonData.jsonOptions.schema;
-      try {
-        schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
-      } catch (error) {
-      }
-      jsonData = {
-        ...jsonData,
-        jsonOptions: {
-          ...jsonData.jsonOptions,
-          schema
-        }
-      };
-    }
     try {
       const response = await this.postRequest(
         this.apiUrl + `/v1/batch/scrape`,
@@ -515,26 +452,16 @@ var FirecrawlApp = class {
    * Checks the status of a batch scrape job using the Firecrawl API.
    * @param id - The ID of the batch scrape operation.
    * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
-   * @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
-   * @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
-   * @param limit - How many entries to return. Only used when `getAllData = false`.
    * @returns The response containing the job status.
    */
-  async checkBatchScrapeStatus(id, getAllData = false, nextURL, skip, limit) {
+  async checkBatchScrapeStatus(id, getAllData = false) {
     if (!id) {
       throw new FirecrawlError("No batch scrape ID provided", 400);
     }
     const headers = this.prepareHeaders();
-    const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/batch/scrape/${id}`);
-    if (skip !== void 0) {
-      targetURL.searchParams.set("skip", skip.toString());
-    }
-    if (limit !== void 0) {
-      targetURL.searchParams.set("limit", limit.toString());
-    }
     try {
       const response = await this.getRequest(
-        targetURL.href,
+        `${this.apiUrl}/v1/batch/scrape/${id}`,
         headers
       );
       if (response.status === 200) {
@@ -559,7 +486,6 @@ var FirecrawlApp = class {
           total: response.data.total,
           completed: response.data.completed,
           creditsUsed: response.data.creditsUsed,
-          next: getAllData ? void 0 : response.data.next,
           expiresAt: new Date(response.data.expiresAt),
           data: allData
         };
@@ -582,28 +508,6 @@ var FirecrawlApp = class {
     }
     return { success: false, error: "Internal server error." };
   }
-  /**
-   * Returns information about batch scrape errors.
-   * @param id - The ID of the batch scrape operation.
-   * @returns Information about batch scrape errors.
-   */
-  async checkBatchScrapeErrors(id) {
-    const headers = this.prepareHeaders();
-    try {
-      const response = await this.deleteRequest(
-        `${this.apiUrl}/v1/batch/scrape/${id}/errors`,
-        headers
-      );
-      if (response.status === 200) {
-        return response.data;
-      } else {
-        this.handleError(response, "check batch scrape errors");
-      }
-    } catch (error) {
-      throw new FirecrawlError(error.message, 500);
-    }
-    return { success: false, error: "Internal server error." };
-  }
   /**
    * Extracts information from URLs using the Firecrawl API.
    * Currently in Beta. Expect breaking changes on future minor versions.
@@ -626,65 +530,6 @@ var FirecrawlApp = class {
     } catch (error) {
       throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
     }
-    try {
-      const response = await this.postRequest(
-        this.apiUrl + `/v1/extract`,
-        { ...jsonData, schema: jsonSchema, origin: params?.origin || "api-sdk" },
-        headers
-      );
-      if (response.status === 200) {
-        const jobId = response.data.id;
-        let extractStatus;
-        do {
-          const statusResponse = await this.getRequest(
-            `${this.apiUrl}/v1/extract/${jobId}`,
-            headers
-          );
-          extractStatus = statusResponse.data;
-          if (extractStatus.status === "completed") {
-            if (extractStatus.success) {
-              return {
-                success: true,
-                data: extractStatus.data,
-                warning: extractStatus.warning,
-                error: extractStatus.error
-              };
-            } else {
-              throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
-            }
-          } else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
-            throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
-          }
-          await new Promise((resolve) => setTimeout(resolve, 1e3));
-        } while (extractStatus.status !== "completed");
-      } else {
-        this.handleError(response, "extract");
-      }
-    } catch (error) {
-      throw new FirecrawlError(error.message, 500, error.response?.data?.details);
-    }
-    return { success: false, error: "Internal server error." };
-  }
-  /**
-   * Initiates an asynchronous extract job for a URL using the Firecrawl API.
-   * @param url - The URL to extract data from.
-   * @param params - Additional parameters for the extract request.
-   * @param idempotencyKey - Optional idempotency key for the request.
-   * @returns The response from the extract operation.
-   */
-  async asyncExtract(urls, params, idempotencyKey) {
-    const headers = this.prepareHeaders(idempotencyKey);
-    let jsonData = { urls, ...params };
-    let jsonSchema;
-    try {
-      if (params?.schema instanceof zt.ZodType) {
-        jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema);
-      } else {
-        jsonSchema = params?.schema;
-      }
-    } catch (error) {
-      throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
-    }
     try {
       const response = await this.postRequest(
         this.apiUrl + `/v1/extract`,
@@ -692,34 +537,24 @@ var FirecrawlApp = class {
         headers
       );
       if (response.status === 200) {
-        return response.data;
-      } else {
-        this.handleError(response, "start extract job");
-      }
-    } catch (error) {
-      throw new FirecrawlError(error.message, 500, error.response?.data?.details);
-    }
-    return { success: false, error: "Internal server error." };
-  }
-  /**
-   * Retrieves the status of an extract job.
-   * @param jobId - The ID of the extract job.
-   * @returns The status of the extract job.
-   */
-  async getExtractStatus(jobId) {
-    try {
-      const response = await this.getRequest(
-        `${this.apiUrl}/v1/extract/${jobId}`,
-        this.prepareHeaders()
-      );
-      if (response.status === 200) {
-        return response.data;
+        const responseData = response.data;
+        if (responseData.success) {
+          return {
+            success: true,
+            data: responseData.data,
+            warning: responseData.warning,
+            error: responseData.error
+          };
+        } else {
+          throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
+        }
       } else {
-        this.handleError(response, "get extract status");
+        this.handleError(response, "extract");
       }
     } catch (error) {
       throw new FirecrawlError(error.message, 500);
     }
+    return { success: false, error: "Internal server error." };
   }
   /**
    * Prepares the headers for an API request.
@@ -835,13 +670,11 @@ var FirecrawlApp = class {
    * @param {string} action - The action being performed when the error occurred.
    */
   handleError(response, action) {
-    if ([400, 402, 408, 409, 500].includes(response.status)) {
+    if ([402, 408, 409, 500].includes(response.status)) {
       const errorMessage = response.data.error || "Unknown error occurred";
-      const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
       throw new FirecrawlError(
-        `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}${details}`,
-        response.status,
-        response?.data?.details
+        `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`,
+        response.status
       );
     } else {
       throw new FirecrawlError(
@@ -859,8 +692,7 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
   constructor(id, app) {
     super();
     this.id = id;
-    const wsUrl = app.apiUrl.replace(/^http/, "ws");
-    this.ws = new import_isows.WebSocket(`${wsUrl}/v1/crawl/${id}`, app.apiKey);
+    this.ws = new import_isows.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
     this.status = "scraping";
     this.data = [];
     const messageHandler = (msg) => {

package/dist/index.d.cts CHANGED Viewed

@@ -61,7 +61,6 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
     rawHtml?: string;
     links?: string[];
     extract?: T;
-    json?: T;
     screenshot?: string;
     metadata?: FirecrawlDocumentMetadata;
     actions: ActionsSchema;
@@ -73,7 +72,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
  * Defines the options and configurations available for scraping web content.
  */
 interface CrawlScrapeOptions {
-    formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
+    formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
     headers?: Record<string, string>;
     includeTags?: string[];
     excludeTags?: string[];
@@ -87,7 +86,6 @@ interface CrawlScrapeOptions {
     mobile?: boolean;
     skipTlsVerification?: boolean;
     removeBase64Images?: boolean;
-    blockAds?: boolean;
 }
 type Action = {
     type: "wait";
@@ -121,11 +119,6 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
         schema?: LLMSchema;
         systemPrompt?: string;
     };
-    jsonOptions?: {
-        prompt?: string;
-        schema?: LLMSchema;
-        systemPrompt?: string;
-    };
     actions?: ActionsSchema;
 }
 interface ActionsResult {
@@ -157,7 +150,6 @@ interface CrawlParams {
         url: string;
         headers?: Record<string, string>;
         metadata?: Record<string, string>;
-        events?: ["completed", "failed", "page", "started"][number][];
     };
     deduplicateSimilarURLs?: boolean;
     ignoreQueryParameters?: boolean;
@@ -221,7 +213,6 @@ interface MapParams {
     includeSubdomains?: boolean;
     sitemapOnly?: boolean;
     limit?: number;
-    timeout?: number;
 }
 /**
  * Response interface for mapping operations.
@@ -241,9 +232,7 @@ interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
     schema?: LLMSchema | object;
     systemPrompt?: string;
     allowExternalLinks?: boolean;
-    enableWebSearch?: boolean;
     includeSubdomains?: boolean;
-    origin?: string;
 }
 /**
  * Response interface for extracting information from URLs.
@@ -269,8 +258,7 @@ interface ErrorResponse {
  */
 declare class FirecrawlError extends Error {
     statusCode: number;
-    details?: any;
-    constructor(message: string, statusCode: number, details?: any);
+    constructor(message: string, statusCode: number);
 }
 /**
  * Parameters for search operations.
@@ -297,24 +285,6 @@ interface SearchResponse {
     warning?: string;
     error?: string;
 }
-/**
- * Response interface for crawl/batch scrape error monitoring.
- */
-interface CrawlErrorsResponse {
-    /**
-     * Scrapes that errored out + error details
-     */
-    errors: {
-        id: string;
-        timestamp?: string;
-        url: string;
-        error: string;
-    }[];
-    /**
-     * URLs blocked by robots.txt
-     */
-    robotsBlocked: string[];
-}
 /**
  * Main class for interacting with the Firecrawl API.
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -356,18 +326,9 @@ declare class FirecrawlApp {
      * Checks the status of a crawl job using the Firecrawl API.
      * @param id - The ID of the crawl operation.
      * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
-     * @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
-     * @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
-     * @param limit - How many entries to return. Only used when `getAllData = false`.
      * @returns The response containing the job status.
      */
-    checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
-    /**
-     * Returns information about crawl errors.
-     * @param id - The ID of the crawl operation.
-     * @returns Information about crawl errors.
-     */
-    checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
+    checkCrawlStatus(id?: string, getAllData?: boolean): Promise<CrawlStatusResponse | ErrorResponse>;
     /**
      * Cancels a crawl job using the Firecrawl API.
      * @param id - The ID of the crawl operation.
@@ -412,18 +373,9 @@ declare class FirecrawlApp {
      * Checks the status of a batch scrape job using the Firecrawl API.
      * @param id - The ID of the batch scrape operation.
      * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
-     * @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
-     * @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
-     * @param limit - How many entries to return. Only used when `getAllData = false`.
      * @returns The response containing the job status.
      */
-    checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
-    /**
-     * Returns information about batch scrape errors.
-     * @param id - The ID of the batch scrape operation.
-     * @returns Information about batch scrape errors.
-     */
-    checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
+    checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
     /**
      * Extracts information from URLs using the Firecrawl API.
      * Currently in Beta. Expect breaking changes on future minor versions.
@@ -432,20 +384,6 @@ declare class FirecrawlApp {
      * @returns The response from the extract operation.
      */
     extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
-    /**
-     * Initiates an asynchronous extract job for a URL using the Firecrawl API.
-     * @param url - The URL to extract data from.
-     * @param params - Additional parameters for the extract request.
-     * @param idempotencyKey - Optional idempotency key for the request.
-     * @returns The response from the extract operation.
-     */
-    asyncExtract(urls: string[], params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
-    /**
-     * Retrieves the status of an extract job.
-     * @param jobId - The ID of the extract job.
-     * @returns The status of the extract job.
-     */
-    getExtractStatus(jobId: string): Promise<any>;
     /**
      * Prepares the headers for an API request.
      * @param idempotencyKey - Optional key to ensure idempotency.
@@ -511,4 +449,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
     close(): void;
 }
-export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
+export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };

package/dist/index.d.ts CHANGED Viewed

@@ -61,7 +61,6 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
     rawHtml?: string;
     links?: string[];
     extract?: T;
-    json?: T;
     screenshot?: string;
     metadata?: FirecrawlDocumentMetadata;
     actions: ActionsSchema;
@@ -73,7 +72,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
  * Defines the options and configurations available for scraping web content.
  */
 interface CrawlScrapeOptions {
-    formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
+    formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
     headers?: Record<string, string>;
     includeTags?: string[];
     excludeTags?: string[];
@@ -87,7 +86,6 @@ interface CrawlScrapeOptions {
     mobile?: boolean;
     skipTlsVerification?: boolean;
     removeBase64Images?: boolean;
-    blockAds?: boolean;
 }
 type Action = {
     type: "wait";
@@ -121,11 +119,6 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
         schema?: LLMSchema;
         systemPrompt?: string;
     };
-    jsonOptions?: {
-        prompt?: string;
-        schema?: LLMSchema;
-        systemPrompt?: string;
-    };
     actions?: ActionsSchema;
 }
 interface ActionsResult {
@@ -157,7 +150,6 @@ interface CrawlParams {
         url: string;
         headers?: Record<string, string>;
         metadata?: Record<string, string>;
-        events?: ["completed", "failed", "page", "started"][number][];
     };
     deduplicateSimilarURLs?: boolean;
     ignoreQueryParameters?: boolean;
@@ -221,7 +213,6 @@ interface MapParams {
     includeSubdomains?: boolean;
     sitemapOnly?: boolean;
     limit?: number;
-    timeout?: number;
 }
 /**
  * Response interface for mapping operations.
@@ -241,9 +232,7 @@ interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
     schema?: LLMSchema | object;
     systemPrompt?: string;
     allowExternalLinks?: boolean;
-    enableWebSearch?: boolean;
     includeSubdomains?: boolean;
-    origin?: string;
 }
 /**
  * Response interface for extracting information from URLs.
@@ -269,8 +258,7 @@ interface ErrorResponse {
  */
 declare class FirecrawlError extends Error {
     statusCode: number;
-    details?: any;
-    constructor(message: string, statusCode: number, details?: any);
+    constructor(message: string, statusCode: number);
 }
 /**
  * Parameters for search operations.
@@ -297,24 +285,6 @@ interface SearchResponse {
     warning?: string;
     error?: string;
 }
-/**
- * Response interface for crawl/batch scrape error monitoring.
- */
-interface CrawlErrorsResponse {
-    /**
-     * Scrapes that errored out + error details
-     */
-    errors: {
-        id: string;
-        timestamp?: string;
-        url: string;
-        error: string;
-    }[];
-    /**
-     * URLs blocked by robots.txt
-     */
-    robotsBlocked: string[];
-}
 /**
  * Main class for interacting with the Firecrawl API.
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -356,18 +326,9 @@ declare class FirecrawlApp {
      * Checks the status of a crawl job using the Firecrawl API.
      * @param id - The ID of the crawl operation.
      * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
-     * @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
-     * @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
-     * @param limit - How many entries to return. Only used when `getAllData = false`.
      * @returns The response containing the job status.
      */
-    checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
-    /**
-     * Returns information about crawl errors.
-     * @param id - The ID of the crawl operation.
-     * @returns Information about crawl errors.
-     */
-    checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
+    checkCrawlStatus(id?: string, getAllData?: boolean): Promise<CrawlStatusResponse | ErrorResponse>;
     /**
      * Cancels a crawl job using the Firecrawl API.
      * @param id - The ID of the crawl operation.
@@ -412,18 +373,9 @@ declare class FirecrawlApp {
      * Checks the status of a batch scrape job using the Firecrawl API.
      * @param id - The ID of the batch scrape operation.
      * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
-     * @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
-     * @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
-     * @param limit - How many entries to return. Only used when `getAllData = false`.
      * @returns The response containing the job status.
      */
-    checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
-    /**
-     * Returns information about batch scrape errors.
-     * @param id - The ID of the batch scrape operation.
-     * @returns Information about batch scrape errors.
-     */
-    checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
+    checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
     /**
      * Extracts information from URLs using the Firecrawl API.
      * Currently in Beta. Expect breaking changes on future minor versions.
@@ -432,20 +384,6 @@ declare class FirecrawlApp {
      * @returns The response from the extract operation.
      */
     extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
-    /**
-     * Initiates an asynchronous extract job for a URL using the Firecrawl API.
-     * @param url - The URL to extract data from.
-     * @param params - Additional parameters for the extract request.
-     * @param idempotencyKey - Optional idempotency key for the request.
-     * @returns The response from the extract operation.
-     */
-    asyncExtract(urls: string[], params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
-    /**
-     * Retrieves the status of an extract job.
-     * @param jobId - The ID of the extract job.
-     * @returns The status of the extract job.
-     */
-    getExtractStatus(jobId: string): Promise<any>;
     /**
      * Prepares the headers for an API request.
      * @param idempotencyKey - Optional key to ensure idempotency.
@@ -511,4 +449,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
     close(): void;
 }
-export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
+export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };

package/dist/index.js CHANGED Viewed

@@ -6,11 +6,9 @@ import { WebSocket } from "isows";
 import { TypedEventTarget } from "typescript-event-target";
 var FirecrawlError = class extends Error {
   statusCode;
-  details;
-  constructor(message, statusCode, details) {
+  constructor(message, statusCode) {
     super(message);
     this.statusCode = statusCode;
-    this.details = details;
   }
 };
 var FirecrawlApp = class {
@@ -57,20 +55,6 @@ var FirecrawlApp = class {
         }
       };
     }
-    if (jsonData?.jsonOptions?.schema) {
-      let schema = jsonData.jsonOptions.schema;
-      try {
-        schema = zodToJsonSchema(schema);
-      } catch (error) {
-      }
-      jsonData = {
-        ...jsonData,
-        jsonOptions: {
-          ...jsonData.jsonOptions,
-          schema
-        }
-      };
-    }
     try {
       const response = await axios.post(
         this.apiUrl + `/v1/scrape`,
@@ -225,26 +209,16 @@ var FirecrawlApp = class {
    * Checks the status of a crawl job using the Firecrawl API.
    * @param id - The ID of the crawl operation.
    * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
-   * @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
-   * @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
-   * @param limit - How many entries to return. Only used when `getAllData = false`.
    * @returns The response containing the job status.
    */
-  async checkCrawlStatus(id, getAllData = false, nextURL, skip, limit) {
+  async checkCrawlStatus(id, getAllData = false) {
     if (!id) {
       throw new FirecrawlError("No crawl ID provided", 400);
     }
     const headers = this.prepareHeaders();
-    const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/crawl/${id}`);
-    if (skip !== void 0) {
-      targetURL.searchParams.set("skip", skip.toString());
-    }
-    if (limit !== void 0) {
-      targetURL.searchParams.set("limit", limit.toString());
-    }
     try {
       const response = await this.getRequest(
-        targetURL.href,
+        `${this.apiUrl}/v1/crawl/${id}`,
         headers
       );
       if (response.status === 200) {
@@ -269,7 +243,6 @@ var FirecrawlApp = class {
           total: response.data.total,
           completed: response.data.completed,
           creditsUsed: response.data.creditsUsed,
-          next: getAllData ? void 0 : response.data.next,
           expiresAt: new Date(response.data.expiresAt),
           data: allData
         };
@@ -292,28 +265,6 @@ var FirecrawlApp = class {
     }
     return { success: false, error: "Internal server error." };
   }
-  /**
-   * Returns information about crawl errors.
-   * @param id - The ID of the crawl operation.
-   * @returns Information about crawl errors.
-   */
-  async checkCrawlErrors(id) {
-    const headers = this.prepareHeaders();
-    try {
-      const response = await this.deleteRequest(
-        `${this.apiUrl}/v1/crawl/${id}/errors`,
-        headers
-      );
-      if (response.status === 200) {
-        return response.data;
-      } else {
-        this.handleError(response, "check crawl errors");
-      }
-    } catch (error) {
-      throw new FirecrawlError(error.message, 500);
-    }
-    return { success: false, error: "Internal server error." };
-  }
   /**
    * Cancels a crawl job using the Firecrawl API.
    * @param id - The ID of the crawl operation.
@@ -402,20 +353,6 @@ var FirecrawlApp = class {
         }
       };
     }
-    if (jsonData?.jsonOptions?.schema) {
-      let schema = jsonData.jsonOptions.schema;
-      try {
-        schema = zodToJsonSchema(schema);
-      } catch (error) {
-      }
-      jsonData = {
-        ...jsonData,
-        jsonOptions: {
-          ...jsonData.jsonOptions,
-          schema
-        }
-      };
-    }
     try {
       const response = await this.postRequest(
         this.apiUrl + `/v1/batch/scrape`,
@@ -479,26 +416,16 @@ var FirecrawlApp = class {
    * Checks the status of a batch scrape job using the Firecrawl API.
    * @param id - The ID of the batch scrape operation.
    * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
-   * @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
-   * @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
-   * @param limit - How many entries to return. Only used when `getAllData = false`.
    * @returns The response containing the job status.
    */
-  async checkBatchScrapeStatus(id, getAllData = false, nextURL, skip, limit) {
+  async checkBatchScrapeStatus(id, getAllData = false) {
     if (!id) {
       throw new FirecrawlError("No batch scrape ID provided", 400);
     }
     const headers = this.prepareHeaders();
-    const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/batch/scrape/${id}`);
-    if (skip !== void 0) {
-      targetURL.searchParams.set("skip", skip.toString());
-    }
-    if (limit !== void 0) {
-      targetURL.searchParams.set("limit", limit.toString());
-    }
     try {
       const response = await this.getRequest(
-        targetURL.href,
+        `${this.apiUrl}/v1/batch/scrape/${id}`,
         headers
       );
       if (response.status === 200) {
@@ -523,7 +450,6 @@ var FirecrawlApp = class {
           total: response.data.total,
           completed: response.data.completed,
           creditsUsed: response.data.creditsUsed,
-          next: getAllData ? void 0 : response.data.next,
           expiresAt: new Date(response.data.expiresAt),
           data: allData
         };
@@ -546,28 +472,6 @@ var FirecrawlApp = class {
     }
     return { success: false, error: "Internal server error." };
   }
-  /**
-   * Returns information about batch scrape errors.
-   * @param id - The ID of the batch scrape operation.
-   * @returns Information about batch scrape errors.
-   */
-  async checkBatchScrapeErrors(id) {
-    const headers = this.prepareHeaders();
-    try {
-      const response = await this.deleteRequest(
-        `${this.apiUrl}/v1/batch/scrape/${id}/errors`,
-        headers
-      );
-      if (response.status === 200) {
-        return response.data;
-      } else {
-        this.handleError(response, "check batch scrape errors");
-      }
-    } catch (error) {
-      throw new FirecrawlError(error.message, 500);
-    }
-    return { success: false, error: "Internal server error." };
-  }
   /**
    * Extracts information from URLs using the Firecrawl API.
    * Currently in Beta. Expect breaking changes on future minor versions.
@@ -590,65 +494,6 @@ var FirecrawlApp = class {
     } catch (error) {
       throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
     }
-    try {
-      const response = await this.postRequest(
-        this.apiUrl + `/v1/extract`,
-        { ...jsonData, schema: jsonSchema, origin: params?.origin || "api-sdk" },
-        headers
-      );
-      if (response.status === 200) {
-        const jobId = response.data.id;
-        let extractStatus;
-        do {
-          const statusResponse = await this.getRequest(
-            `${this.apiUrl}/v1/extract/${jobId}`,
-            headers
-          );
-          extractStatus = statusResponse.data;
-          if (extractStatus.status === "completed") {
-            if (extractStatus.success) {
-              return {
-                success: true,
-                data: extractStatus.data,
-                warning: extractStatus.warning,
-                error: extractStatus.error
-              };
-            } else {
-              throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
-            }
-          } else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
-            throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
-          }
-          await new Promise((resolve) => setTimeout(resolve, 1e3));
-        } while (extractStatus.status !== "completed");
-      } else {
-        this.handleError(response, "extract");
-      }
-    } catch (error) {
-      throw new FirecrawlError(error.message, 500, error.response?.data?.details);
-    }
-    return { success: false, error: "Internal server error." };
-  }
-  /**
-   * Initiates an asynchronous extract job for a URL using the Firecrawl API.
-   * @param url - The URL to extract data from.
-   * @param params - Additional parameters for the extract request.
-   * @param idempotencyKey - Optional idempotency key for the request.
-   * @returns The response from the extract operation.
-   */
-  async asyncExtract(urls, params, idempotencyKey) {
-    const headers = this.prepareHeaders(idempotencyKey);
-    let jsonData = { urls, ...params };
-    let jsonSchema;
-    try {
-      if (params?.schema instanceof zt.ZodType) {
-        jsonSchema = zodToJsonSchema(params.schema);
-      } else {
-        jsonSchema = params?.schema;
-      }
-    } catch (error) {
-      throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
-    }
     try {
       const response = await this.postRequest(
         this.apiUrl + `/v1/extract`,
@@ -656,34 +501,24 @@ var FirecrawlApp = class {
         headers
       );
       if (response.status === 200) {
-        return response.data;
-      } else {
-        this.handleError(response, "start extract job");
-      }
-    } catch (error) {
-      throw new FirecrawlError(error.message, 500, error.response?.data?.details);
-    }
-    return { success: false, error: "Internal server error." };
-  }
-  /**
-   * Retrieves the status of an extract job.
-   * @param jobId - The ID of the extract job.
-   * @returns The status of the extract job.
-   */
-  async getExtractStatus(jobId) {
-    try {
-      const response = await this.getRequest(
-        `${this.apiUrl}/v1/extract/${jobId}`,
-        this.prepareHeaders()
-      );
-      if (response.status === 200) {
-        return response.data;
+        const responseData = response.data;
+        if (responseData.success) {
+          return {
+            success: true,
+            data: responseData.data,
+            warning: responseData.warning,
+            error: responseData.error
+          };
+        } else {
+          throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
+        }
       } else {
-        this.handleError(response, "get extract status");
+        this.handleError(response, "extract");
       }
     } catch (error) {
       throw new FirecrawlError(error.message, 500);
     }
+    return { success: false, error: "Internal server error." };
   }
   /**
    * Prepares the headers for an API request.
@@ -799,13 +634,11 @@ var FirecrawlApp = class {
    * @param {string} action - The action being performed when the error occurred.
    */
   handleError(response, action) {
-    if ([400, 402, 408, 409, 500].includes(response.status)) {
+    if ([402, 408, 409, 500].includes(response.status)) {
       const errorMessage = response.data.error || "Unknown error occurred";
-      const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
       throw new FirecrawlError(
-        `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}${details}`,
-        response.status,
-        response?.data?.details
+        `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`,
+        response.status
       );
     } else {
       throw new FirecrawlError(
@@ -823,8 +656,7 @@ var CrawlWatcher = class extends TypedEventTarget {
   constructor(id, app) {
     super();
     this.id = id;
-    const wsUrl = app.apiUrl.replace(/^http/, "ws");
-    this.ws = new WebSocket(`${wsUrl}/v1/crawl/${id}`, app.apiKey);
+    this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
     this.status = "scraping";
     this.data = [];
     const messageHandler = (msg) => {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "firecrawl",
-  "version": "1.16.0",
+  "version": "1.18.0",
   "description": "JavaScript SDK for Firecrawl API",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",

package/src/index.ts CHANGED Viewed

@@ -94,6 +94,7 @@ export interface CrawlScrapeOptions {
   skipTlsVerification?: boolean;
   removeBase64Images?: boolean;
   blockAds?: boolean;
+  proxy?: "basic" | "stealth";
 }
 export type Action = {
@@ -262,6 +263,8 @@ export interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
   enableWebSearch?: boolean;
   includeSubdomains?: boolean;
   origin?: string;
+  showSources?: boolean;
+  scrapeOptions?: CrawlScrapeOptions;
 }
 /**
@@ -273,6 +276,7 @@ export interface ExtractResponse<LLMSchema extends zt.ZodSchema = any> {
   data: LLMSchema;
   error?: string;
   warning?: string;
+  sources?: string[];
 }
 /**
@@ -345,6 +349,70 @@ export interface CrawlErrorsResponse {
   robotsBlocked: string[];
 };
+/**
+ * Parameters for deep research operations.
+ * Defines options for conducting deep research on a topic.
+ */
+export interface DeepResearchParams {
+  /**
+   * Maximum depth of research iterations (1-10)
+   * @default 7
+   */
+  maxDepth?: number;
+  /**
+   * Time limit in seconds (30-300)
+   * @default 270
+   */
+  timeLimit?: number;
+  /**
+   * Experimental flag for streaming steps
+   */
+  __experimental_streamSteps?: boolean;
+}
+/**
+ * Response interface for deep research operations.
+ */
+export interface DeepResearchResponse {
+  success: boolean;
+  id: string;
+}
+/**
+ * Status response interface for deep research operations.
+ */
+export interface DeepResearchStatusResponse {
+  success: boolean;
+  data: {
+    findings: Array<{
+      text: string;
+      source: string;
+    }>;
+    finalAnalysis: string;
+    analysis: string;
+    completedSteps: number;
+    totalSteps: number;
+  };
+  status: "processing" | "completed" | "failed";
+  error?: string;
+  expiresAt: string;
+  currentDepth: number;
+  maxDepth: number;
+  activities: Array<{
+    type: string;
+    status: string;
+    message: string;
+    timestamp: string;
+    depth: number;
+  }>;
+  sources: Array<{
+    url: string;
+    title: string;
+    description: string;
+  }>;
+  summaries: string[];
+}
 /**
  * Main class for interacting with the Firecrawl API.
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -1041,7 +1109,8 @@ export default class FirecrawlApp {
                 success: true,
                 data: extractStatus.data,
                 warning: extractStatus.warning,
-                error: extractStatus.error
+                error: extractStatus.error,
+                sources: extractStatus?.sources || undefined,
               };
             } else {
               throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
@@ -1277,6 +1346,119 @@ export default class FirecrawlApp {
       );
     }
   }
+  /**
+   * Initiates a deep research operation on a given topic and polls until completion.
+   * @param params - Parameters for the deep research operation.
+   * @returns The final research results.
+   */
+  async __deepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchStatusResponse | ErrorResponse> {
+    try {
+      const response = await this.__asyncDeepResearch(topic, params);
+      if (!response.success || 'error' in response) {
+        return { success: false, error: 'error' in response ? response.error : 'Unknown error' };
+      }
+      if (!response.id) {
+        throw new FirecrawlError(`Failed to start research. No job ID returned.`, 500);
+      }
+      const jobId = response.id;
+      let researchStatus;
+      while (true) {
+        // console.log("Checking research status...");
+        researchStatus = await this.__checkDeepResearchStatus(jobId);
+        // console.log("Research status:", researchStatus);
+        if ('error' in researchStatus && !researchStatus.success) {
+          return researchStatus;
+        }
+        if (researchStatus.status === "completed") {
+          return researchStatus;
+        }
+        if (researchStatus.status === "failed") {
+          throw new FirecrawlError(
+            `Research job ${researchStatus.status}. Error: ${researchStatus.error}`,
+            500
+          );
+        }
+        if (researchStatus.status !== "processing") {
+          break;
+        }
+        await new Promise(resolve => setTimeout(resolve, 2000));
+      }
+      // console.log("Research status finished:", researchStatus);
+      return { success: false, error: "Research job terminated unexpectedly" };
+    } catch (error: any) {
+      throw new FirecrawlError(error.message, 500, error.response?.data?.details);
+    }
+  }
+  /**
+   * Initiates a deep research operation on a given topic without polling.
+   * @param params - Parameters for the deep research operation.
+   * @returns The response containing the research job ID.
+   */
+  async __asyncDeepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
+    const headers = this.prepareHeaders();
+    try {
+      const response: AxiosResponse = await this.postRequest(
+        `${this.apiUrl}/v1/deep-research`,
+        { topic, ...params },
+        headers
+      );
+      if (response.status === 200) {
+        return response.data;
+      } else {
+        this.handleError(response, "start deep research");
+      }
+    } catch (error: any) {
+      if (error.response?.data?.error) {
+        throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
+      } else {
+        throw new FirecrawlError(error.message, 500);
+      }
+    }
+    return { success: false, error: "Internal server error." };
+  }
+  /**
+   * Checks the status of a deep research operation.
+   * @param id - The ID of the deep research operation.
+   * @returns The current status and results of the research operation.
+   */
+  async __checkDeepResearchStatus(id: string): Promise<DeepResearchStatusResponse | ErrorResponse> {
+    const headers = this.prepareHeaders();
+    try {
+      const response: AxiosResponse = await this.getRequest(
+        `${this.apiUrl}/v1/deep-research/${id}`,
+        headers
+      );
+      if (response.status === 200) {
+        return response.data;
+      } else if (response.status === 404) {
+        throw new FirecrawlError("Deep research job not found", 404);
+      } else {
+        this.handleError(response, "check deep research status");
+      }
+    } catch (error: any) {
+      if (error.response?.data?.error) {
+        throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
+      } else {
+        throw new FirecrawlError(error.message, 500);
+      }
+    }
+    return { success: false, error: "Internal server error." };
+  }
 }
 interface CrawlWatcherEvents {

package/dump.rdb DELETED Viewed

Binary file