npm - firecrawl-mcp - Versions diffs - 3.10.3 → 3.14.0 - Mend

firecrawl-mcp 3.10.3 → 3.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/LICENSE +0 -0
package/README.md +21 -17
package/dist/index.js +454 -14
package/package.json +19 -20
package/dist/index-v1.js +0 -1313
package/dist/index.test.js +0 -255
package/dist/jest.setup.js +0 -58
package/dist/server-v1.js +0 -1154
package/dist/server-v2.js +0 -1067
package/dist/src/index.js +0 -1053
package/dist/src/index.test.js +0 -225
package/dist/versioned-server.js +0 -203

package/dist/index.js CHANGED Viewed

@@ -3,6 +3,8 @@ import dotenv from 'dotenv';
 import { FastMCP } from 'firecrawl-fastmcp';
 import { z } from 'zod';
 import FirecrawlApp from '@mendable/firecrawl-js';
+import { readFile } from 'node:fs/promises';
+import path from 'node:path';
 dotenv.config({ debug: false, quiet: true });
 function extractApiKey(headers) {
     const headerAuth = headers['authorization'];
@@ -35,6 +37,24 @@ function removeEmptyTopLevel(obj) {
     }
     return out;
 }
+const searchDomainSchema = z
+    .string()
+    .trim()
+    .toLowerCase()
+    .regex(/^(?=.{1,253}$)(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]$/, 'Domain must be a valid hostname without protocol or path');
+function buildSearchQueryWithDomains(query, includeDomains, excludeDomains) {
+    if (includeDomains?.length) {
+        return `${query} (${includeDomains
+            .map((domain) => `site:${domain}`)
+            .join(' OR ')})`;
+    }
+    if (excludeDomains?.length) {
+        return `${query} ${excludeDomains
+            .map((domain) => `-site:${domain}`)
+            .join(' ')}`;
+    }
+    return query;
+}
 class ConsoleLogger {
     shouldLog = process.env.CLOUD_SERVICE === 'true' ||
         process.env.SSE_LOCAL === 'true' ||
@@ -152,6 +172,10 @@ function buildFormatsArray(args) {
             const jsonOpts = args.jsonOptions;
             result.push({ type: 'json', ...jsonOpts });
         }
+        else if (fmt === 'query') {
+            const queryOpts = args.queryOptions;
+            result.push({ type: 'query', ...queryOpts });
+        }
         else if (fmt === 'screenshot' && args.screenshotOptions) {
             const ssOpts = args.screenshotOptions;
             result.push({ type: 'screenshot', ...ssOpts });
@@ -197,6 +221,7 @@ function transformScrapeParams(args) {
     if (parsers)
         out.parsers = parsers;
     delete out.jsonOptions;
+    delete out.queryOptions;
     delete out.screenshotOptions;
     delete out.pdfOptions;
     return out;
@@ -214,6 +239,8 @@ const scrapeParamsSchema = z.object({
         'changeTracking',
         'branding',
         'json',
+        'query',
+        'audio',
     ]))
         .optional(),
     jsonOptions: z
@@ -222,6 +249,11 @@ const scrapeParamsSchema = z.object({
         schema: z.record(z.string(), z.any()).optional(),
     })
         .optional(),
+    queryOptions: z
+        .object({
+        prompt: z.string().max(10000),
+    })
+        .optional(),
     screenshotOptions: z
         .object({
         fullPage: z.boolean().optional(),
@@ -269,10 +301,22 @@ const scrapeParamsSchema = z.object({
     storeInCache: z.boolean().optional(),
     zeroDataRetention: z.boolean().optional(),
     maxAge: z.number().optional(),
+    lockdown: z.boolean().optional(),
     proxy: z.enum(['basic', 'stealth', 'enhanced', 'auto']).optional(),
+    profile: z
+        .object({
+        name: z.string(),
+        saveChanges: z.boolean().optional(),
+    })
+        .optional(),
 });
 server.addTool({
     name: 'firecrawl_scrape',
+    annotations: {
+        title: 'Scrape a URL',
+        readOnlyHint: SAFE_MODE,
+        openWorldHint: true,
+    },
     description: `
 Scrape content from a single URL with advanced options.
 This is the most powerful, fastest and most reliable scraper tool, if available you should always default to using this tool for any web scraping needs.
@@ -335,7 +379,18 @@ If JSON extraction returns empty, minimal, or just navigation content, the page
   }
 }
 \`\`\`
-**Usage Example (markdown format - ONLY when full content genuinely needed):**
+**Prefer markdown format by default.** You can read and reason over the full page content directly — no need for an intermediate query step. Use markdown for questions about page content, factual lookups, and any task where you need to understand the page.
+**Use JSON format when user needs:**
+- Structured data with specific fields (extract all products with name, price, description)
+- Data in a specific schema for downstream processing
+**Use query format only when:**
+- The page is extremely long and you need a single targeted answer without processing the full content
+- You want a quick factual answer and don't need to retain the page content
+**Usage Example (markdown format - default for most tasks):**
 \`\`\`json
 {
   "name": "firecrawl_scrape",
@@ -358,6 +413,7 @@ If JSON extraction returns empty, minimal, or just navigation content, the page
 \`\`\`
 **Branding format:** Extracts comprehensive brand identity (colors, fonts, typography, spacing, logo, UI components) for design analysis or style replication.
 **Performance:** Add maxAge parameter for 500% faster scrapes using cached data.
+**Lockdown mode:** Set \`lockdown: true\` to serve the request only from the existing index/cache without any outbound network request. For air-gapped or compliance-constrained use where the request URL itself is considered sensitive. Errors on cache miss. Billed at 5 credits.
 **Returns:** JSON structured data, markdown, branding profile, or other formats as specified.
 ${SAFE_MODE
         ? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.'
@@ -369,7 +425,12 @@ ${SAFE_MODE
         const client = getClient(session);
         const transformed = transformScrapeParams(options);
         const cleaned = removeEmptyTopLevel(transformed);
-        log.info('Scraping URL', { url: String(url) });
+        if (cleaned.lockdown) {
+            log.info('Scraping URL (lockdown)');
+        }
+        else {
+            log.info('Scraping URL', { url: String(url) });
+        }
         const res = await client.scrape(String(url), {
             ...cleaned,
             origin: ORIGIN,
@@ -379,6 +440,11 @@ ${SAFE_MODE
 });
 server.addTool({
     name: 'firecrawl_map',
+    annotations: {
+        title: 'Map a website',
+        readOnlyHint: true,
+        openWorldHint: true,
+    },
     description: `
 Map a website to discover all indexed URLs on the site.
@@ -432,6 +498,11 @@ Map a website to discover all indexed URLs on the site.
 });
 server.addTool({
     name: 'firecrawl_search',
+    annotations: {
+        title: 'Search the web',
+        readOnlyHint: true,
+        openWorldHint: true,
+    },
     description: `
 Search the web and optionally extract content from search results. This is the most powerful web search tool available, and if available you should always default to using this tool for any web search needs.
@@ -454,6 +525,7 @@ The query also supports search operators, that you can use if needed to refine t
 **Common mistakes:** Using crawl or map for open-ended questions (use search instead).
 **Prompt Example:** "Find the latest research papers on AI published in 2023."
 **Sources:** web, images, news, default to web unless needed images or news.
+**Domain filters:** Use includeDomains to restrict results to specific domains, or excludeDomains to remove domains. Do not use both in the same request. Domains must be hostnames only, without protocol or path.
 **Scrape Options:** Only use scrapeOptions when you think it is absolutely necessary. When you do so default to a lower limit to avoid timeouts, 5 or lower.
 **Optimal Workflow:** Search first using firecrawl_search without formats, then after fetching the results, use the scrape tool to get the content of the relevantpage(s) that you want to scrape
@@ -464,6 +536,7 @@ The query also supports search operators, that you can use if needed to refine t
   "arguments": {
     "query": "top AI companies",
     "limit": 5,
+    "includeDomains": ["example.com"],
     "sources": [
       { "type": "web" }
     ]
@@ -493,28 +566,40 @@ The query also supports search operators, that you can use if needed to refine t
 \`\`\`
 **Returns:** Array of search results (with optional scraped content).
 `,
-    parameters: z.object({
+    parameters: z
+        .object({
         query: z.string().min(1),
         limit: z.number().optional(),
         tbs: z.string().optional(),
         filter: z.string().optional(),
         location: z.string().optional(),
+        includeDomains: z.array(searchDomainSchema).optional(),
+        excludeDomains: z.array(searchDomainSchema).optional(),
         sources: z
             .array(z.object({ type: z.enum(['web', 'images', 'news']) }))
             .optional(),
-        scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
+        scrapeOptions: scrapeParamsSchema
+            .omit({ url: true })
+            .partial()
+            .optional(),
         enterprise: z.array(z.enum(['default', 'anon', 'zdr'])).optional(),
-    }),
+    })
+        .refine((args) => !(args.includeDomains?.length && args.excludeDomains?.length), 'includeDomains and excludeDomains cannot both be specified'),
     execute: async (args, { session, log }) => {
         const client = getClient(session);
         const { query, ...opts } = args;
         const searchOpts = { ...opts };
+        const includeDomains = searchOpts.includeDomains;
+        const excludeDomains = searchOpts.excludeDomains;
+        delete searchOpts.includeDomains;
+        delete searchOpts.excludeDomains;
         if (searchOpts.scrapeOptions) {
             searchOpts.scrapeOptions = transformScrapeParams(searchOpts.scrapeOptions);
         }
         const cleaned = removeEmptyTopLevel(searchOpts);
-        log.info('Searching', { query: String(query) });
-        const res = await client.search(query, {
+        const searchQuery = buildSearchQueryWithDomains(query, includeDomains, excludeDomains);
+        log.info('Searching', { query: searchQuery });
+        const res = await client.search(searchQuery, {
             ...cleaned,
             origin: ORIGIN,
         });
@@ -523,6 +608,12 @@ The query also supports search operators, that you can use if needed to refine t
 });
 server.addTool({
     name: 'firecrawl_crawl',
+    annotations: {
+        title: 'Start a site crawl',
+        readOnlyHint: false,
+        openWorldHint: true,
+        destructiveHint: false,
+    },
     description: `
  Starts a crawl job on a website and extracts content from all pages.
@@ -595,6 +686,11 @@ server.addTool({
 });
 server.addTool({
     name: 'firecrawl_check_crawl_status',
+    annotations: {
+        title: 'Get crawl status',
+        readOnlyHint: true,
+        openWorldHint: false,
+    },
     description: `
 Check the status of a crawl job.
@@ -618,6 +714,11 @@ Check the status of a crawl job.
 });
 server.addTool({
     name: 'firecrawl_extract',
+    annotations: {
+        title: 'Extract structured data',
+        readOnlyHint: true,
+        openWorldHint: true,
+    },
     description: `
 Extract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction.
@@ -684,6 +785,12 @@ Extract structured information from web pages using LLM capabilities. Supports b
 });
 server.addTool({
     name: 'firecrawl_agent',
+    annotations: {
+        title: 'Start a research agent',
+        readOnlyHint: false,
+        openWorldHint: true,
+        destructiveHint: false,
+    },
     description: `
 Autonomous web research agent. This is a separate AI agent layer that independently browses the internet, searches for information, navigates through pages, and extracts structured data based on your query. You describe what you need, and the agent figures out where to find it.
@@ -702,7 +809,11 @@ Autonomous web research agent. This is a separate AI agent layer that independen
 - Deep research tasks: 5+ minutes
 **Best for:** Complex research tasks where you don't know the exact URLs; multi-source data gathering; finding information scattered across the web; extracting data from JavaScript-heavy SPAs that fail with regular scrape.
-**Not recommended for:** Simple single-page scraping where you know the URL (use scrape with JSON format instead - faster and cheaper).
+**Not recommended for:**
+- Single-page extraction when you have a URL (use firecrawl_scrape, faster and cheaper)
+- Web search (use firecrawl_search first)
+- Interactive page tasks like clicking, filling forms, login, or navigating JS-heavy SPAs (use firecrawl_scrape + firecrawl_interact)
+- Extracting specific data from a known page (use firecrawl_scrape with JSON format)
 **Arguments:**
 - prompt: Natural language description of the data you want (required, max 10,000 characters)
@@ -775,6 +886,11 @@ Then poll with \`firecrawl_agent_status\` every 15-30 seconds for at least 2-3 m
 });
 server.addTool({
     name: 'firecrawl_agent_status',
+    annotations: {
+        title: 'Get agent job status',
+        readOnlyHint: true,
+        openWorldHint: false,
+    },
     description: `
 Check the status of an agent job and retrieve results when complete. Use this to poll for results after starting an agent with \`firecrawl_agent\`.
@@ -809,14 +925,19 @@ Check the status of an agent job and retrieve results when complete. Use this to
         return asText(res);
     },
 });
-// Browser session tools
+// Browser session tools (deprecated — prefer firecrawl_scrape + firecrawl_interact)
 server.addTool({
     name: 'firecrawl_browser_create',
+    annotations: {
+        title: 'Create browser session',
+        readOnlyHint: false,
+        openWorldHint: false,
+        destructiveHint: false,
+    },
     description: `
-Create a browser session for code execution via CDP (Chrome DevTools Protocol).
+**DEPRECATED — prefer firecrawl_scrape + firecrawl_interact instead.** Interact lets you scrape a page and then click, fill forms, and navigate without managing sessions manually.
-**Best for:** Running code (Python/JS) that interacts with a live browser page, multi-step browser automation, sessions with profiles that survive across multiple tool calls.
-**Not recommended for:** Simple page scraping (use firecrawl_scrape instead).
+Create a browser session for code execution via CDP (Chrome DevTools Protocol).
 **Arguments:**
 - ttl: Total session lifetime in seconds (30-3600, optional)
@@ -858,10 +979,16 @@ Create a browser session for code execution via CDP (Chrome DevTools Protocol).
 if (!SAFE_MODE) {
     server.addTool({
         name: 'firecrawl_browser_execute',
+        annotations: {
+            title: 'Run code in browser session',
+            readOnlyHint: false,
+            openWorldHint: false,
+            destructiveHint: true,
+        },
         description: `
-Execute code in a browser session. Supports agent-browser commands (bash), Python, or JavaScript.
+**DEPRECATED — prefer firecrawl_scrape + firecrawl_interact instead.** Interact lets you scrape a page and then click, fill forms, and navigate without managing sessions manually.
-**Best for:** Browser automation, navigating pages, clicking elements, extracting data, multi-step browser workflows.
+Execute code in a browser session. Supports agent-browser commands (bash), Python, or JavaScript.
 **Requires:** An active browser session (create one with firecrawl_browser_create first).
 **Arguments:**
@@ -927,7 +1054,15 @@ Execute code in a browser session. Supports agent-browser commands (bash), Pytho
 }
 server.addTool({
     name: 'firecrawl_browser_delete',
+    annotations: {
+        title: 'Delete browser session',
+        readOnlyHint: false,
+        openWorldHint: false,
+        destructiveHint: true,
+    },
     description: `
+**DEPRECATED — prefer firecrawl_scrape + firecrawl_interact instead.**
 Destroy a browser session.
 **Usage Example:**
@@ -954,7 +1089,14 @@ Destroy a browser session.
 });
 server.addTool({
     name: 'firecrawl_browser_list',
+    annotations: {
+        title: 'List browser sessions',
+        readOnlyHint: true,
+        openWorldHint: false,
+    },
     description: `
+**DEPRECATED — prefer firecrawl_scrape + firecrawl_interact instead.**
 List browser sessions, optionally filtered by status.
 **Usage Example:**
@@ -979,6 +1121,304 @@ List browser sessions, optionally filtered by status.
         return asText(res);
     },
 });
+// Interact tools (scrape-bound browser sessions)
+server.addTool({
+    name: 'firecrawl_interact',
+    annotations: {
+        title: 'Interact with a scraped page',
+        readOnlyHint: false,
+        openWorldHint: true,
+        destructiveHint: false,
+    },
+    description: `
+Interact with a previously scraped page in a live browser session. Scrape a page first with firecrawl_scrape, then use the returned scrapeId to click buttons, fill forms, extract dynamic content, or navigate deeper.
+**Best for:** Multi-step workflows on a single page — searching a site, clicking through results, filling forms, extracting data that requires interaction.
+**Requires:** A scrapeId from a previous firecrawl_scrape call (found in the metadata of the scrape response).
+**Arguments:**
+- scrapeId: The scrape job ID from a previous scrape (required)
+- prompt: Natural language instruction describing the action to take (use this OR code)
+- code: Code to execute in the browser session (use this OR prompt)
+- language: "bash", "python", or "node" (optional, defaults to "node", only used with code)
+- timeout: Execution timeout in seconds, 1-300 (optional, defaults to 30)
+**Usage Example (prompt):**
+\`\`\`json
+{
+  "name": "firecrawl_interact",
+  "arguments": {
+    "scrapeId": "scrape-id-from-previous-scrape",
+    "prompt": "Click on the first product and tell me its price"
+  }
+}
+\`\`\`
+**Usage Example (code):**
+\`\`\`json
+{
+  "name": "firecrawl_interact",
+  "arguments": {
+    "scrapeId": "scrape-id-from-previous-scrape",
+    "code": "agent-browser click @e5",
+    "language": "bash"
+  }
+}
+\`\`\`
+**Returns:** Execution result including output, stdout, stderr, exit code, and live view URLs.
+`,
+    parameters: z.object({
+        scrapeId: z.string(),
+        prompt: z.string().optional(),
+        code: z.string().optional(),
+        language: z.enum(['bash', 'python', 'node']).optional(),
+        timeout: z.number().min(1).max(300).optional(),
+    }).refine(data => data.code || data.prompt, {
+        message: "Either 'code' or 'prompt' must be provided.",
+    }),
+    execute: async (args, { session, log }) => {
+        const client = getClient(session);
+        const { scrapeId, prompt, code, language, timeout } = args;
+        log.info('Interacting with scraped page', { scrapeId });
+        const interactArgs = { origin: ORIGIN };
+        if (prompt)
+            interactArgs.prompt = prompt;
+        if (code)
+            interactArgs.code = code;
+        if (language)
+            interactArgs.language = language;
+        if (timeout != null)
+            interactArgs.timeout = timeout;
+        const res = await client.interact(scrapeId, interactArgs);
+        return asText(res);
+    },
+});
+server.addTool({
+    name: 'firecrawl_interact_stop',
+    annotations: {
+        title: 'Stop interact session',
+        readOnlyHint: false,
+        openWorldHint: false,
+        destructiveHint: true,
+    },
+    description: `
+Stop an interact session for a scraped page. Call this when you are done interacting to free resources.
+**Usage Example:**
+\`\`\`json
+{
+  "name": "firecrawl_interact_stop",
+  "arguments": {
+    "scrapeId": "scrape-id-here"
+  }
+}
+\`\`\`
+**Returns:** Success confirmation.
+`,
+    parameters: z.object({
+        scrapeId: z.string(),
+    }),
+    execute: async (args, { session, log }) => {
+        const client = getClient(session);
+        const { scrapeId } = args;
+        log.info('Stopping interact session', { scrapeId });
+        const res = await client.stopInteraction(scrapeId);
+        return asText(res);
+    },
+});
+// Local-only: parse a local file via the self-hosted Firecrawl /v2/parse endpoint.
+// The parse endpoint is only exposed on self-hosted/local Firecrawl API deployments,
+// so this tool is registered only when the MCP is NOT running in cloud mode.
+if (process.env.CLOUD_SERVICE !== 'true') {
+    const parseParamsSchema = z.object({
+        filePath: z
+            .string()
+            .min(1)
+            .describe('Absolute or relative path to a local file to parse. Supported: .html, .htm, .pdf, .docx, .doc, .odt, .rtf, .xlsx, .xls'),
+        contentType: z
+            .string()
+            .optional()
+            .describe('Optional MIME type override. If omitted, the server infers the file kind from the extension.'),
+        formats: z
+            .array(z.enum([
+            'markdown',
+            'html',
+            'rawHtml',
+            'links',
+            'summary',
+            'json',
+            'query',
+        ]))
+            .optional(),
+        jsonOptions: z
+            .object({
+            prompt: z.string().optional(),
+            schema: z.record(z.string(), z.any()).optional(),
+        })
+            .optional(),
+        queryOptions: z
+            .object({
+            prompt: z.string().max(10000),
+        })
+            .optional(),
+        parsers: z.array(z.enum(['pdf'])).optional(),
+        pdfOptions: z
+            .object({
+            maxPages: z.number().int().min(1).max(10000).optional(),
+        })
+            .optional(),
+        onlyMainContent: z.boolean().optional(),
+        includeTags: z.array(z.string()).optional(),
+        excludeTags: z.array(z.string()).optional(),
+        removeBase64Images: z.boolean().optional(),
+        skipTlsVerification: z.boolean().optional(),
+        storeInCache: z.boolean().optional(),
+        zeroDataRetention: z.boolean().optional(),
+        maxAge: z.number().optional(),
+        proxy: z.enum(['basic', 'auto']).optional(),
+    });
+    const EXTENSION_CONTENT_TYPES = {
+        '.html': 'text/html',
+        '.htm': 'text/html',
+        '.pdf': 'application/pdf',
+        '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+        '.doc': 'application/msword',
+        '.odt': 'application/vnd.oasis.opendocument.text',
+        '.rtf': 'application/rtf',
+        '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+        '.xls': 'application/vnd.ms-excel',
+    };
+    function inferContentType(filename) {
+        const ext = path.extname(filename).toLowerCase();
+        return EXTENSION_CONTENT_TYPES[ext] ?? 'application/octet-stream';
+    }
+    server.addTool({
+        name: 'firecrawl_parse',
+        annotations: {
+            title: 'Parse a local file',
+            readOnlyHint: true,
+            openWorldHint: false,
+        },
+        description: `
+Parse a file from the local filesystem using a self-hosted Firecrawl API's /v2/parse endpoint.
+This is the fastest and most reliable way to extract content from a document on disk — if the file lives locally and the MCP is pointed at a self-hosted Firecrawl instance, you should always prefer this tool over uploading the file elsewhere and then scraping it.
+**Best for:** Extracting content from a local document (PDF, Word, Excel, HTML, etc.) when you don't want to host it on the public web first; pulling structured data out of a file with JSON format; converting binary documents into markdown for downstream reasoning.
+**Not recommended for:** Remote URLs (use firecrawl_scrape); multiple files at once (call parse multiple times); documents that require interactive actions, screenshots, or change tracking — those aren't supported by the parse endpoint.
+**Common mistakes:** Passing a URL instead of a local file path; requesting an unsupported format (screenshot, branding, changeTracking); setting waitFor, location, mobile, or a non-basic/auto proxy — parse uploads reject all of those.
+**Supported file types:** .html, .htm, .xhtml, .pdf, .docx, .doc, .odt, .rtf, .xlsx, .xls
+**Unsupported options:** actions, screenshot/branding/changeTracking formats, waitFor > 0, location, mobile, proxy values other than "auto" or "basic".
+**CRITICAL - Format Selection (same rules as firecrawl_scrape):**
+When the user asks for SPECIFIC data points from a document, you MUST use JSON format with a schema. Only use markdown when the user needs the ENTIRE document content.
+**Use JSON format when the user asks for:**
+- Specific fields, parameters, or values from a form / PDF / spreadsheet
+- Prices, numbers, or other structured data
+- Lists of items or properties
+**Use markdown format when:**
+- User wants to read, summarize, or analyze the full document
+- User explicitly asks for the complete content
+**Handling PDFs:**
+Add \`"parsers": ["pdf"]\` (optionally with \`pdfOptions.maxPages\`) when parsing a PDF so the PDF engine is invoked explicitly. For very long documents, cap \`maxPages\` to keep the response within token limits.
+**Usage Example (markdown from a local PDF):**
+\`\`\`json
+{
+  "name": "firecrawl_parse",
+  "arguments": {
+    "filePath": "/absolute/path/to/document.pdf",
+    "formats": ["markdown"],
+    "parsers": ["pdf"],
+    "onlyMainContent": true
+  }
+}
+\`\`\`
+**Usage Example (structured JSON extraction from a local HTML file):**
+\`\`\`json
+{
+  "name": "firecrawl_parse",
+  "arguments": {
+    "filePath": "./invoice.html",
+    "formats": ["json"],
+    "jsonOptions": {
+      "prompt": "Extract the invoice number, total, and line items",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "invoiceNumber": { "type": "string" },
+          "total": { "type": "number" },
+          "lineItems": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "description": { "type": "string" },
+                "amount": { "type": "number" }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+\`\`\`
+**Returns:** A parsed document with markdown, html, links, summary, json, or query results depending on the requested formats.
+`,
+        parameters: parseParamsSchema,
+        execute: async (args, { session, log }) => {
+            const apiUrl = process.env.FIRECRAWL_API_URL;
+            if (!apiUrl) {
+                throw new Error('firecrawl_parse requires FIRECRAWL_API_URL to be set to a self-hosted Firecrawl API instance.');
+            }
+            const { filePath, contentType: overrideContentType, ...options } = args;
+            const absPath = path.resolve(filePath);
+            const buffer = await readFile(absPath);
+            const filename = path.basename(absPath);
+            const fileContentType = overrideContentType && overrideContentType.length > 0
+                ? overrideContentType
+                : inferContentType(filename);
+            const transformed = transformScrapeParams(options);
+            const cleaned = removeEmptyTopLevel(transformed);
+            const optionsPayload = { origin: ORIGIN, ...cleaned };
+            const form = new FormData();
+            const blob = new Blob([new Uint8Array(buffer)], { type: fileContentType });
+            form.append('file', blob, filename);
+            form.append('options', JSON.stringify(optionsPayload));
+            const headers = {};
+            const apiKey = session?.firecrawlApiKey;
+            if (apiKey) {
+                headers['Authorization'] = `Bearer ${apiKey}`;
+            }
+            const endpoint = `${apiUrl.replace(/\/$/, '')}/v2/parse`;
+            log.info('Parsing local file', {
+                endpoint,
+                filename,
+                size: buffer.length,
+            });
+            const response = await fetch(endpoint, {
+                method: 'POST',
+                headers,
+                body: form,
+            });
+            const responseText = await response.text();
+            if (!response.ok) {
+                throw new Error(`Parse request failed with status ${response.status}: ${responseText}`);
+            }
+            try {
+                return asText(JSON.parse(responseText));
+            }
+            catch {
+                return responseText;
+            }
+        },
+    });
+}
 const PORT = Number(process.env.PORT || 3000);
 const HOST = process.env.CLOUD_SERVICE === 'true'
     ? '0.0.0.0'