npm - firecrawl - Versions diffs - 1.29.3 → 3.0.2 - Mend

firecrawl 1.29.3 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/.env.example +4 -2
package/LICENSE +0 -0
package/README.md +85 -78
package/audit-ci.jsonc +4 -0
package/dist/chunk-JFWW4BWA.js +85 -0
package/dist/index.cjs +964 -39
package/dist/index.d.cts +529 -11
package/dist/index.d.ts +529 -11
package/dist/index.js +952 -27
package/dist/package-KYZ3HXR5.js +4 -0
package/dump.rdb +0 -0
package/jest.config.js +0 -0
package/package.json +6 -6
package/src/__tests__/e2e/v2/batch.test.ts +74 -0
package/src/__tests__/e2e/v2/crawl.test.ts +182 -0
package/src/__tests__/e2e/v2/extract.test.ts +70 -0
package/src/__tests__/e2e/v2/map.test.ts +55 -0
package/src/__tests__/e2e/v2/scrape.test.ts +130 -0
package/src/__tests__/e2e/v2/search.test.ts +247 -0
package/src/__tests__/e2e/v2/usage.test.ts +36 -0
package/src/__tests__/e2e/v2/utils/idmux.ts +58 -0
package/src/__tests__/e2e/v2/watcher.test.ts +96 -0
package/src/__tests__/unit/v2/errorHandler.test.ts +19 -0
package/src/__tests__/unit/v2/scrape.unit.test.ts +11 -0
package/src/__tests__/unit/v2/validation.test.ts +59 -0
package/src/index.backup.ts +2146 -0
package/src/index.ts +27 -2134
package/src/v1/index.ts +2158 -0
package/src/v2/client.ts +281 -0
package/src/v2/methods/batch.ts +131 -0
package/src/v2/methods/crawl.ts +160 -0
package/src/v2/methods/extract.ts +86 -0
package/src/v2/methods/map.ts +37 -0
package/src/v2/methods/scrape.ts +26 -0
package/src/v2/methods/search.ts +69 -0
package/src/v2/methods/usage.ts +39 -0
package/src/v2/types.ts +308 -0
package/src/v2/utils/errorHandler.ts +18 -0
package/src/v2/utils/getVersion.ts +14 -0
package/src/v2/utils/httpClient.ts +99 -0
package/src/v2/utils/validation.ts +50 -0
package/src/v2/watcher.ts +159 -0
package/tsconfig.json +2 -1
package/tsup.config.ts +0 -0
package/dist/package-Z6F7JDXI.js +0 -111
/package/src/__tests__/{v1/e2e_withAuth → e2e/v1}/index.test.ts +0 -0
/package/src/__tests__/{v1/unit → unit/v1}/monitor-job-status-retry.test.ts +0 -0

package/dist/index.d.ts CHANGED Viewed

@@ -1,7 +1,514 @@
-import { AxiosRequestHeaders, AxiosResponse } from 'axios';
 import * as zt from 'zod';
+import { ZodTypeAny, infer } from 'zod';
+import { AxiosResponse, AxiosRequestHeaders } from 'axios';
+import { EventEmitter } from 'events';
 import { TypedEventTarget } from 'typescript-event-target';
+type FormatString = "markdown" | "html" | "rawHtml" | "links" | "screenshot" | "summary" | "changeTracking" | "json";
+interface Viewport {
+    width: number;
+    height: number;
+}
+interface Format {
+    type: FormatString;
+}
+interface JsonFormat extends Format {
+    type: "json";
+    prompt?: string;
+    schema?: Record<string, unknown> | ZodTypeAny;
+}
+interface ScreenshotFormat {
+    type: "screenshot";
+    fullPage?: boolean;
+    quality?: number;
+    viewport?: Viewport | {
+        width: number;
+        height: number;
+    };
+}
+interface ChangeTrackingFormat extends Format {
+    type: "changeTracking";
+    modes: ("git-diff" | "json")[];
+    schema?: Record<string, unknown>;
+    prompt?: string;
+    tag?: string;
+}
+type FormatOption = FormatString | Format | JsonFormat | ChangeTrackingFormat | ScreenshotFormat;
+interface LocationConfig {
+    country?: string;
+    languages?: string[];
+}
+interface WaitAction {
+    type: "wait";
+    milliseconds?: number;
+    selector?: string;
+}
+interface ScreenshotAction {
+    type: "screenshot";
+    fullPage?: boolean;
+    quality?: number;
+    viewport?: Viewport | {
+        width: number;
+        height: number;
+    };
+}
+interface ClickAction {
+    type: "click";
+    selector: string;
+}
+interface WriteAction {
+    type: "write";
+    text: string;
+}
+interface PressAction {
+    type: "press";
+    key: string;
+}
+interface ScrollAction {
+    type: "scroll";
+    direction: "up" | "down";
+    selector?: string;
+}
+interface ScrapeAction {
+    type: "scrape";
+}
+interface ExecuteJavascriptAction {
+    type: "executeJavascript";
+    script: string;
+}
+interface PDFAction {
+    type: "pdf";
+    format?: "A0" | "A1" | "A2" | "A3" | "A4" | "A5" | "A6" | "Letter" | "Legal" | "Tabloid" | "Ledger";
+    landscape?: boolean;
+    scale?: number;
+}
+type ActionOption = WaitAction | ScreenshotAction | ClickAction | WriteAction | PressAction | ScrollAction | ScrapeAction | ExecuteJavascriptAction | PDFAction;
+interface ScrapeOptions {
+    formats?: FormatOption[];
+    headers?: Record<string, string>;
+    includeTags?: string[];
+    excludeTags?: string[];
+    onlyMainContent?: boolean;
+    timeout?: number;
+    waitFor?: number;
+    mobile?: boolean;
+    parsers?: string[];
+    actions?: ActionOption[];
+    location?: LocationConfig;
+    skipTlsVerification?: boolean;
+    removeBase64Images?: boolean;
+    fastMode?: boolean;
+    useMock?: string;
+    blockAds?: boolean;
+    proxy?: "basic" | "stealth" | "auto" | string;
+    maxAge?: number;
+    storeInCache?: boolean;
+}
+interface WebhookConfig {
+    url: string;
+    headers?: Record<string, string>;
+    metadata?: Record<string, string>;
+    events?: Array<"completed" | "failed" | "page" | "started">;
+}
+interface DocumentMetadata {
+    title?: string;
+    description?: string;
+    language?: string;
+    keywords?: string | string[];
+    robots?: string;
+    ogTitle?: string;
+    ogDescription?: string;
+    ogUrl?: string;
+    ogImage?: string;
+    sourceURL?: string;
+    statusCode?: number;
+    error?: string;
+    [key: string]: unknown;
+}
+interface Document {
+    markdown?: string;
+    html?: string;
+    rawHtml?: string;
+    json?: unknown;
+    summary?: string;
+    metadata?: DocumentMetadata;
+    links?: string[];
+    screenshot?: string;
+    actions?: Record<string, unknown>;
+    warning?: string;
+    changeTracking?: Record<string, unknown>;
+}
+interface SearchResult {
+    url: string;
+    title?: string;
+    description?: string;
+}
+interface SearchData {
+    web?: Array<SearchResult | Document>;
+    news?: Array<SearchResult | Document>;
+    images?: Array<SearchResult | Document>;
+}
+interface SearchRequest {
+    query: string;
+    sources?: Array<"web" | "news" | "images" | {
+        type: "web" | "news" | "images";
+    }>;
+    limit?: number;
+    tbs?: string;
+    location?: string;
+    ignoreInvalidURLs?: boolean;
+    timeout?: number;
+    scrapeOptions?: ScrapeOptions;
+}
+interface CrawlResponse$1 {
+    id: string;
+    url: string;
+}
+interface CrawlJob {
+    status: "scraping" | "completed" | "failed" | "cancelled";
+    total: number;
+    completed: number;
+    creditsUsed?: number;
+    expiresAt?: string;
+    next?: string | null;
+    data: Document[];
+}
+interface BatchScrapeResponse$1 {
+    id: string;
+    url: string;
+    invalidURLs?: string[];
+}
+interface BatchScrapeJob {
+    status: "scraping" | "completed" | "failed" | "cancelled";
+    completed: number;
+    total: number;
+    creditsUsed?: number;
+    expiresAt?: string;
+    next?: string | null;
+    data: Document[];
+}
+interface MapData {
+    links: SearchResult[];
+}
+interface MapOptions {
+    search?: string;
+    sitemap?: "only" | "include" | "skip";
+    includeSubdomains?: boolean;
+    limit?: number;
+    timeout?: number;
+}
+interface ExtractResponse$1 {
+    success?: boolean;
+    id?: string;
+    status?: "processing" | "completed" | "failed" | "cancelled";
+    data?: unknown;
+    error?: string;
+    warning?: string;
+    sources?: Record<string, unknown>;
+    expiresAt?: string;
+}
+interface ConcurrencyCheck {
+    concurrency: number;
+    maxConcurrency: number;
+}
+interface CreditUsage {
+    remainingCredits: number;
+}
+interface TokenUsage {
+    remainingTokens: number;
+}
+interface CrawlErrorsResponse$1 {
+    errors: {
+        id: string;
+        timestamp?: string;
+        url: string;
+        code?: string;
+        error: string;
+    }[];
+    robotsBlocked: string[];
+}
+interface ActiveCrawl {
+    id: string;
+    teamId: string;
+    url: string;
+    options?: Record<string, unknown> | null;
+}
+interface ActiveCrawlsResponse {
+    success: boolean;
+    crawls: ActiveCrawl[];
+}
+interface ErrorDetails {
+    code?: string;
+    message: string;
+    details?: Record<string, unknown>;
+    status?: number;
+}
+declare class SdkError extends Error {
+    status?: number;
+    code?: string;
+    details?: unknown;
+    constructor(message: string, status?: number, code?: string, details?: unknown);
+}
+interface HttpClientOptions {
+    apiKey: string;
+    apiUrl: string;
+    timeoutMs?: number;
+    maxRetries?: number;
+    backoffFactor?: number;
+}
+declare class HttpClient {
+    private instance;
+    private readonly apiKey;
+    private readonly apiUrl;
+    private readonly maxRetries;
+    private readonly backoffFactor;
+    constructor(options: HttpClientOptions);
+    getApiUrl(): string;
+    getApiKey(): string;
+    private request;
+    private sleep;
+    post<T = any>(endpoint: string, body: Record<string, unknown>, headers?: Record<string, string>): Promise<AxiosResponse<T, any>>;
+    get<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any>>;
+    delete<T = any>(endpoint: string, headers?: Record<string, string>): Promise<AxiosResponse<T, any>>;
+    prepareHeaders(idempotencyKey?: string): Record<string, string>;
+}
+interface CrawlRequest {
+    url: string;
+    prompt?: string | null;
+    excludePaths?: string[] | null;
+    includePaths?: string[] | null;
+    maxDiscoveryDepth?: number | null;
+    sitemap?: "skip" | "include";
+    ignoreQueryParameters?: boolean;
+    limit?: number | null;
+    crawlEntireDomain?: boolean;
+    allowExternalLinks?: boolean;
+    allowSubdomains?: boolean;
+    delay?: number | null;
+    maxConcurrency?: number | null;
+    webhook?: string | WebhookConfig | null;
+    scrapeOptions?: ScrapeOptions | null;
+    zeroDataRetention?: boolean;
+}
+declare function startCrawl(http: HttpClient, request: CrawlRequest): Promise<CrawlResponse$1>;
+interface StartBatchOptions {
+    options?: ScrapeOptions;
+    webhook?: string | WebhookConfig;
+    appendToId?: string;
+    ignoreInvalidURLs?: boolean;
+    maxConcurrency?: number;
+    zeroDataRetention?: boolean;
+    integration?: string;
+    idempotencyKey?: string;
+}
+declare function startBatchScrape(http: HttpClient, urls: string[], { options, webhook, appendToId, ignoreInvalidURLs, maxConcurrency, zeroDataRetention, integration, idempotencyKey, }?: StartBatchOptions): Promise<BatchScrapeResponse$1>;
+declare function prepareExtractPayload(args: {
+    urls?: string[];
+    prompt?: string;
+    schema?: Record<string, unknown> | ZodTypeAny;
+    systemPrompt?: string;
+    allowExternalLinks?: boolean;
+    enableWebSearch?: boolean;
+    showSources?: boolean;
+    scrapeOptions?: ScrapeOptions;
+    ignoreInvalidURLs?: boolean;
+}): Record<string, unknown>;
+declare function startExtract(http: HttpClient, args: Parameters<typeof prepareExtractPayload>[0]): Promise<ExtractResponse$1>;
+type JobKind = "crawl" | "batch";
+interface WatcherOptions {
+    kind?: JobKind;
+    pollInterval?: number;
+    timeout?: number;
+}
+declare class Watcher extends EventEmitter {
+    private readonly http;
+    private readonly jobId;
+    private readonly kind;
+    private readonly pollInterval;
+    private readonly timeout?;
+    private ws?;
+    private closed;
+    constructor(http: HttpClient, jobId: string, opts?: WatcherOptions);
+    private buildWsUrl;
+    start(): Promise<void>;
+    private attachWsHandlers;
+    private emitDocuments;
+    private emitSnapshot;
+    private pollLoop;
+    close(): void;
+}
+type ExtractJsonSchemaFromFormats<Formats> = Formats extends readonly any[] ? Extract<Formats[number], {
+    type: "json";
+    schema?: unknown;
+}>["schema"] : never;
+type InferredJsonFromOptions<Opts> = Opts extends {
+    formats?: infer Fmts;
+} ? ExtractJsonSchemaFromFormats<Fmts> extends ZodTypeAny ? infer<ExtractJsonSchemaFromFormats<Fmts>> : unknown : unknown;
+/**
+ * Configuration for the v2 client transport.
+ */
+interface FirecrawlClientOptions {
+    /** API key (falls back to FIRECRAWL_API_KEY). */
+    apiKey?: string | null;
+    /** API base URL (falls back to FIRECRAWL_API_URL or https://api.firecrawl.dev). */
+    apiUrl?: string | null;
+    /** Per-request timeout in milliseconds (optional). */
+    timeoutMs?: number;
+    /** Max automatic retries for transient failures (optional). */
+    maxRetries?: number;
+    /** Exponential backoff factor for retries (optional). */
+    backoffFactor?: number;
+}
+/**
+ * Firecrawl v2 client. Provides typed access to all v2 endpoints and utilities.
+ */
+declare class FirecrawlClient {
+    private readonly http;
+    /**
+     * Create a v2 client.
+     * @param options Transport configuration (API key, base URL, timeouts, retries).
+     */
+    constructor(options?: FirecrawlClientOptions);
+    /**
+     * Scrape a single URL.
+     * @param url Target URL.
+     * @param options Optional scrape options (formats, headers, etc.).
+     * @returns Resolved document with requested formats.
+     */
+    scrape<Opts extends ScrapeOptions>(url: string, options: Opts): Promise<Omit<Document, "json"> & {
+        json?: InferredJsonFromOptions<Opts>;
+    }>;
+    scrape(url: string, options?: ScrapeOptions): Promise<Document>;
+    /**
+     * Search the web and optionally scrape each result.
+     * @param query Search query string.
+     * @param req Additional search options (sources, limit, scrapeOptions, etc.).
+     * @returns Structured search results.
+     */
+    search(query: string, req?: Omit<SearchRequest, "query">): Promise<SearchData>;
+    /**
+     * Map a site to discover URLs (sitemap-aware).
+     * @param url Root URL to map.
+     * @param options Mapping options (sitemap mode, includeSubdomains, limit, timeout).
+     * @returns Discovered links.
+     */
+    map(url: string, options?: MapOptions): Promise<MapData>;
+    /**
+     * Start a crawl job (async).
+     * @param url Root URL to crawl.
+     * @param req Crawl configuration (paths, limits, scrapeOptions, webhook, etc.).
+     * @returns Job id and url.
+     */
+    startCrawl(url: string, req?: Omit<Parameters<typeof startCrawl>[1], "url">): Promise<CrawlResponse$1>;
+    /**
+     * Get the status and partial data of a crawl job.
+     * @param jobId Crawl job id.
+     */
+    getCrawlStatus(jobId: string): Promise<CrawlJob>;
+    /**
+     * Cancel a crawl job.
+     * @param jobId Crawl job id.
+     * @returns True if cancelled.
+     */
+    cancelCrawl(jobId: string): Promise<boolean>;
+    /**
+     * Convenience waiter: start a crawl and poll until it finishes.
+     * @param url Root URL to crawl.
+     * @param req Crawl configuration plus waiter controls (pollInterval, timeout seconds).
+     * @returns Final job snapshot.
+     */
+    crawl(url: string, req?: Omit<Parameters<typeof startCrawl>[1], "url"> & {
+        pollInterval?: number;
+        timeout?: number;
+    }): Promise<CrawlJob>;
+    /**
+     * Retrieve crawl errors and robots.txt blocks.
+     * @param crawlId Crawl job id.
+     */
+    getCrawlErrors(crawlId: string): Promise<CrawlErrorsResponse$1>;
+    /**
+     * List active crawls for the authenticated team.
+     */
+    getActiveCrawls(): Promise<ActiveCrawlsResponse>;
+    /**
+     * Preview normalized crawl parameters produced by a natural-language prompt.
+     * @param url Root URL.
+     * @param prompt Natural-language instruction.
+     */
+    crawlParamsPreview(url: string, prompt: string): Promise<Record<string, unknown>>;
+    /**
+     * Start a batch scrape job for multiple URLs (async).
+     * @param urls URLs to scrape.
+     * @param opts Batch options (scrape options, webhook, concurrency, idempotency key, etc.).
+     * @returns Job id and url.
+     */
+    startBatchScrape(urls: string[], opts?: Parameters<typeof startBatchScrape>[2]): Promise<BatchScrapeResponse$1>;
+    /**
+     * Get the status and partial data of a batch scrape job.
+     * @param jobId Batch job id.
+     */
+    getBatchScrapeStatus(jobId: string): Promise<BatchScrapeJob>;
+    /**
+     * Retrieve batch scrape errors and robots.txt blocks.
+     * @param jobId Batch job id.
+     */
+    getBatchScrapeErrors(jobId: string): Promise<CrawlErrorsResponse$1>;
+    /**
+     * Cancel a batch scrape job.
+     * @param jobId Batch job id.
+     * @returns True if cancelled.
+     */
+    cancelBatchScrape(jobId: string): Promise<boolean>;
+    /**
+     * Convenience waiter: start a batch scrape and poll until it finishes.
+     * @param urls URLs to scrape.
+     * @param opts Batch options plus waiter controls (pollInterval, timeout seconds).
+     * @returns Final job snapshot.
+     */
+    batchScrape(urls: string[], opts?: Parameters<typeof startBatchScrape>[2] & {
+        pollInterval?: number;
+        timeout?: number;
+    }): Promise<BatchScrapeJob>;
+    /**
+     * Start an extract job (async).
+     * @param args Extraction request (urls, schema or prompt, flags).
+     * @returns Job id or processing state.
+     */
+    startExtract(args: Parameters<typeof startExtract>[1]): Promise<ExtractResponse$1>;
+    /**
+     * Get extract job status/data.
+     * @param jobId Extract job id.
+     */
+    getExtractStatus(jobId: string): Promise<ExtractResponse$1>;
+    /**
+     * Convenience waiter: start an extract and poll until it finishes.
+     * @param args Extraction request plus waiter controls (pollInterval, timeout seconds).
+     * @returns Final extract response.
+     */
+    extract(args: Parameters<typeof startExtract>[1] & {
+        pollInterval?: number;
+        timeout?: number;
+    }): Promise<ExtractResponse$1>;
+    /** Current concurrency usage. */
+    getConcurrency(): Promise<ConcurrencyCheck>;
+    /** Current credit usage. */
+    getCreditUsage(): Promise<CreditUsage>;
+    /** Recent token usage. */
+    getTokenUsage(): Promise<TokenUsage>;
+    /**
+     * Create a watcher for a crawl or batch job. Emits: `document`, `snapshot`, `done`, `error`.
+     * @param jobId Job id.
+     * @param opts Watcher options (kind, pollInterval, timeout seconds).
+     */
+    watcher(jobId: string, opts?: WatcherOptions): Watcher;
+}
 /**
  * Configuration interface for FirecrawlApp.
  * @param apiKey - Optional API key for authentication.
@@ -343,15 +850,6 @@ interface ErrorResponse {
     success: false;
     error: string;
 }
-/**
- * Custom error class for Firecrawl.
- * Extends the built-in Error class to include a status code.
- */
-declare class FirecrawlError extends Error {
-    statusCode: number;
-    details?: any;
-    constructor(message: string, statusCode: number, details?: any);
-}
 /**
  * Parameters for search operations.
  * Defines options for searching and scraping search results.
@@ -388,6 +886,7 @@ interface CrawlErrorsResponse {
         id: string;
         timestamp?: string;
         url: string;
+        code?: string;
         error: string;
     }[];
     /**
@@ -813,4 +1312,23 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
     close(): void;
 }
-export { type Action, type ActionsResult, type AgentOptions, type AgentOptionsExtract, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type DeepResearchParams, type DeepResearchResponse, type DeepResearchStatusResponse, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type GenerateLLMsTextParams, type GenerateLLMsTextResponse, type GenerateLLMsTextStatusResponse, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
+/**
+ * Firecrawl JS/TS SDK — unified entrypoint.
+ * - v2 by default on the top‑level client
+ * - v1 available under `.v1` (feature‑frozen)
+ * - Exports: `Firecrawl` (default), `FirecrawlClient` (v2), `FirecrawlAppV1` (v1), and v2 types
+ */
+/** Direct v2 client. */
+/** Unified client: extends v2 and adds `.v1` for backward compatibility. */
+declare class Firecrawl extends FirecrawlClient {
+    /** Feature‑frozen v1 client (lazy). */
+    private _v1?;
+    private _v1Opts;
+    /** @param opts API credentials and base URL. */
+    constructor(opts?: FirecrawlAppConfig);
+    /** Access the legacy v1 client (instantiated on first access). */
+    get v1(): FirecrawlApp;
+}
+export { type ActionOption, type ActiveCrawl, type ActiveCrawlsResponse, type BatchScrapeJob, type BatchScrapeResponse$1 as BatchScrapeResponse, type ChangeTrackingFormat, type ClickAction, type ConcurrencyCheck, type CrawlErrorsResponse$1 as CrawlErrorsResponse, type CrawlJob, type CrawlResponse$1 as CrawlResponse, type CreditUsage, type Document, type DocumentMetadata, type ErrorDetails, type ExecuteJavascriptAction, type ExtractResponse$1 as ExtractResponse, Firecrawl, FirecrawlApp as FirecrawlAppV1, FirecrawlClient, type Format, type FormatOption, type FormatString, type JsonFormat, type LocationConfig, type MapData, type MapOptions, type PDFAction, type PressAction, type ScrapeAction, type ScrapeOptions, type ScreenshotAction, type ScreenshotFormat, type ScrollAction, SdkError, type SearchData, type SearchRequest, type SearchResult, type TokenUsage, type Viewport, type WaitAction, type WebhookConfig, type WriteAction, Firecrawl as default };