npm - firecrawl-mcp - Versions diffs - 3.20.3 → 3.20.5 - Mend

firecrawl-mcp 3.20.3 → 3.20.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -223,6 +223,19 @@ const server = new FastMCP({
         const envCred = resolveCredentialFromEnv();
         if (process.env.CLOUD_SERVICE === 'true') {
             if (!headerCred) {
+                // Keyless free tier over the hosted MCP: serve it only when a forwarding
+                // secret is configured, we know the end-user's client IP (so the API can
+                // rate-limit per real IP, not the shared server IP), AND that IP still
+                // has free quota. If the IP is out of quota (or keyless is off), fall
+                // through to throw so FastMCP emits the OAuth 401 + WWW-Authenticate
+                // challenge — i.e. prompt the user to connect an account exactly when
+                // their free quota runs out.
+                const clientIp = extractClientIp(request);
+                if (process.env.KEYLESS_PROXY_SECRET &&
+                    clientIp &&
+                    (await keylessEligible(clientIp))) {
+                    return { firecrawlApiKey: undefined, research, keylessClientIp: clientIp };
+                }
                 throw new Error('Firecrawl credentials required: OAuth access token (Authorization: Bearer fco_...) or API key (x-firecrawl-api-key)');
             }
             return { firecrawlApiKey: headerCred, research };
@@ -233,8 +246,12 @@ const server = new FastMCP({
         if (!httpStreaming &&
             !process.env.FIRECRAWL_API_KEY &&
             !process.env.FIRECRAWL_API_URL) {
-            console.error('Either FIRECRAWL_API_KEY or FIRECRAWL_API_URL must be provided');
-            process.exit(1);
+            // No credential and no self-hosted URL: run in keyless mode. scrape and
+            // search work for free (rate-limited per IP) against the Firecrawl cloud;
+            // every other tool needs an API key and will return Unauthorized.
+            console.error('No FIRECRAWL_API_KEY or FIRECRAWL_API_URL set — running in keyless mode. ' +
+                'firecrawl_scrape and firecrawl_search are free (rate-limited per IP) against the Firecrawl cloud; ' +
+                'other tools require an API key (get one free at https://firecrawl.dev).');
         }
         if (httpStreaming && !credential && !process.env.FIRECRAWL_API_URL) {
             console.error('HTTP MCP transport requires FIRECRAWL_API_URL and/or credentials (OAuth: Authorization Bearer fco_..., or FIRECRAWL_API_KEY / FIRECRAWL_OAUTH_TOKEN)');
@@ -559,7 +576,6 @@ ${SAFE_MODE
     parameters: scrapeParamsSchema,
     execute: async (args, { session, log }) => {
         const { url, ...options } = args;
-        const client = getClient(session);
         const transformed = transformScrapeParams(options);
         const cleaned = removeEmptyTopLevel(transformed);
         if (cleaned.lockdown) {
@@ -568,6 +584,15 @@ ${SAFE_MODE
         else {
             log.info('Scraping URL', { url: String(url) });
         }
+        if (isKeylessMode(session)) {
+            const json = await keylessPost('/v2/scrape', {
+                url: String(url),
+                ...cleaned,
+                origin: ORIGIN,
+            }, session);
+            return asText(json?.data ?? json);
+        }
+        const client = getClient(session);
         const res = await client.scrape(String(url), {
             ...cleaned,
             origin: ORIGIN,
@@ -724,7 +749,6 @@ The query also supports search operators, that you can use if needed to refine t
     })
         .refine((args) => !(args.includeDomains?.length && args.excludeDomains?.length), 'includeDomains and excludeDomains cannot both be specified'),
     execute: async (args, { session, log }) => {
-        const client = getClient(session);
         const { query, ...opts } = args;
         const searchOpts = { ...opts };
         const includeDomains = searchOpts.includeDomains;
@@ -737,16 +761,22 @@ The query also supports search operators, that you can use if needed to refine t
         const cleaned = removeEmptyTopLevel(searchOpts);
         const searchQuery = buildSearchQueryWithDomains(query, includeDomains, excludeDomains);
         log.info('Searching', { query: searchQuery });
+        const searchBody = {
+            query: searchQuery,
+            ...cleaned,
+            origin: ORIGIN,
+        };
+        if (isKeylessMode(session)) {
+            const json = await keylessPost('/v2/search', searchBody, session);
+            return asText(json ?? {});
+        }
         // Call /v2/search through the SDK's HTTP layer (auth + retries) instead
         // of `client.search()` so we preserve the full response envelope. The
         // high-level `search()` helper strips `id` and `creditsUsed`, which
         // breaks the `firecrawl_search_feedback` workflow that this server
         // explicitly tells the LLM to use after every search.
-        const httpRes = await client.http.post('/v2/search', {
-            query: searchQuery,
-            ...cleaned,
-            origin: ORIGIN,
-        });
+        const client = getClient(session);
+        const httpRes = await client.http.post('/v2/search', searchBody);
         return asText(httpRes?.data ?? {});
     },
 });
@@ -754,6 +784,74 @@ const DEFAULT_CLOUD_API_URL = 'https://api.firecrawl.dev';
 function resolveApiBaseUrl() {
     return (process.env.FIRECRAWL_API_URL || DEFAULT_CLOUD_API_URL).replace(/\/$/, '');
 }
+// Keyless free tier: when no credential is configured and we're targeting the
+// Firecrawl cloud (not self-hosted via FIRECRAWL_API_URL, not the multi-tenant
+// CLOUD_SERVICE deployment), scrape and search are free, rate-limited per IP.
+// The cloud only grants this when NO Authorization header is sent, so we bypass
+// the SDK — which always attaches a Bearer header — and post directly.
+/** Best-effort end-user client IP from the incoming MCP request headers. */
+function extractClientIp(request) {
+    const xff = request?.headers?.['x-forwarded-for'];
+    const raw = Array.isArray(xff) ? xff[0] : xff;
+    const first = typeof raw === 'string' ? raw.split(',')[0].trim() : undefined;
+    return first || undefined;
+}
+/**
+ * Read-only check (no quota consumed) of whether a client IP can still use the
+ * keyless free tier, via the API's secret-gated eligibility endpoint. Fails
+ * closed: anything other than a clear "eligible: true" means fall through to the
+ * OAuth challenge rather than silently granting keyless.
+ */
+async function keylessEligible(clientIp) {
+    const secret = process.env.KEYLESS_PROXY_SECRET;
+    if (!secret)
+        return false;
+    try {
+        const response = await fetch(`${resolveApiBaseUrl()}/v2/keyless/eligibility`, {
+            headers: {
+                'x-firecrawl-keyless-ip': clientIp,
+                'x-firecrawl-keyless-secret': secret,
+            },
+        });
+        if (!response.ok)
+            return false;
+        const json = await response.json().catch(() => ({}));
+        return json?.eligible === true;
+    }
+    catch {
+        return false;
+    }
+}
+function isKeylessMode(session) {
+    if (session?.firecrawlApiKey)
+        return false;
+    if (process.env.CLOUD_SERVICE === 'true') {
+        // Hosted: keyless only for secret-gated sessions carrying the forwarded
+        // client IP (so the per-IP cap is meaningful, not the shared server IP).
+        return !!session?.keylessClientIp;
+    }
+    // Local/stdio against the cloud (not a self-hosted FIRECRAWL_API_URL).
+    return !process.env.FIRECRAWL_API_URL;
+}
+async function keylessPost(path, body, session) {
+    const headers = { 'Content-Type': 'application/json' };
+    // Forward the real client IP (secret-authenticated) when proxying keyless
+    // requests through the hosted MCP, so the API rate-limits per real IP.
+    if (session?.keylessClientIp && process.env.KEYLESS_PROXY_SECRET) {
+        headers['x-firecrawl-keyless-ip'] = session.keylessClientIp;
+        headers['x-firecrawl-keyless-secret'] = process.env.KEYLESS_PROXY_SECRET;
+    }
+    const response = await fetch(`${resolveApiBaseUrl()}${path}`, {
+        method: 'POST',
+        headers,
+        body: JSON.stringify(body),
+    });
+    const json = await response.json().catch(() => ({}));
+    if (!response.ok) {
+        throw new Error(json?.error || `Firecrawl request failed (HTTP ${response.status})`);
+    }
+    return json;
+}
 const SEARCH_FEEDBACK_DISABLED = ['1', 'true', 'yes', 'on'].includes((process.env.FIRECRAWL_NO_SEARCH_FEEDBACK ||
     process.env.FIRECRAWL_DISABLE_SEARCH_FEEDBACK ||
     '')

package/dist/research.js CHANGED Viewed

@@ -14,9 +14,6 @@
  */
 import { z } from 'zod';
 const BASE = '/v2/research';
-function asText(data) {
-    return JSON.stringify(data, null, 2);
-}
 /** Append a value (or repeated array values) to a URLSearchParams instance. */
 function appendParam(params, key, value) {
     if (value == null)
@@ -35,6 +32,104 @@ function withQuery(path, params) {
     const qs = params.toString();
     return qs ? `${path}?${qs}` : path;
 }
+// --- result formatting (ported from research-index-front/src/agent_eval.ts) ---
+// Max authors to print per paper (with affiliations); the rest collapse to a
+// "+N more" tail so a large collaboration doesn't flood the context.
+const MAX_AUTHORS = 15;
+// Cap each abstract so a page of hits stays within the MCP output-token limit.
+const MAX_ABSTRACT_CHARS = 600;
+// Per-affiliation char cap — keeps one long org string (e.g. a full multi-dept
+// university address) from bloating the authors line.
+const MAX_AFFIL_CHARS = 60;
+// Hard ceiling on the whole authors line, as a final guard.
+const MAX_AUTHORS_LINE_CHARS = 400;
+/** Best display id for a paper: its arXiv id, falling back to the canonical id. */
+function displayId(p) {
+    return p.ids?.arxiv?.[0] ?? p.paper_id ?? '?';
+}
+/** Format the authors line, accepting either the string or structured form. */
+function fmtAuthors(authors) {
+    if (!authors)
+        return null;
+    let shown;
+    let total;
+    if (typeof authors === 'string') {
+        const names = authors
+            .split(',')
+            .map((s) => s.trim())
+            .filter(Boolean);
+        if (names.length === 0)
+            return null;
+        total = names.length;
+        shown = names.slice(0, MAX_AUTHORS);
+    }
+    else {
+        if (authors.length === 0)
+            return null;
+        total = authors.length;
+        shown = authors.slice(0, MAX_AUTHORS).map((a) => {
+            const aff = a.affiliation?.trim();
+            return aff ? `${a.name} (${aff.slice(0, MAX_AFFIL_CHARS)})` : a.name;
+        });
+    }
+    const extra = total > MAX_AUTHORS ? `; +${total - MAX_AUTHORS} more` : '';
+    return ('Authors: ' + shown.join('; ') + extra).slice(0, MAX_AUTHORS_LINE_CHARS);
+}
+/** Render ranked papers as `[id] title` / authors / abstract blocks. */
+function fmtHits(results) {
+    if (!results || results.length === 0)
+        return '(no results)';
+    return results
+        .map((r) => {
+        const lines = [`[${displayId(r)}] ${r.title ?? '(untitled)'}`];
+        const authors = fmtAuthors(r.authors);
+        if (authors)
+            lines.push(authors);
+        lines.push((r.abstract || '(no abstract)')
+            .replace(/\s+/g, ' ')
+            .slice(0, MAX_ABSTRACT_CHARS));
+        return lines.join('\n');
+    })
+        .join('\n\n');
+}
+// Cap GitHub matched content so a page of results stays within the MCP
+// output-token limit. Higher than abstracts since issue/PR threads carry the
+// signal (repro steps, stack traces) the agent actually needs to verify.
+const MAX_GITHUB_CONTENT_CHARS = 1200;
+/**
+ * Render GitHub history/readme hits as `[repo#number] (kind)` / url / body
+ * blocks — the same shape as `fmtHits`, but tuned for issues/PRs and readmes.
+ * Markdown content keeps its newlines (so tables/code survive); only readmes and
+ * snippets fall back when full content is absent.
+ */
+function fmtGithub(results) {
+    if (!results || results.length === 0)
+        return '(no results)';
+    return results
+        .map((r) => {
+        const lines = [];
+        if (r.resultType === 'repo_readme') {
+            lines.push(`[${r.repo ?? '?'}] README`);
+        }
+        else {
+            const ref = r.number != null ? `#${r.number}` : '';
+            const meta = [
+                r.pageType,
+                r.segmentCount ? `${r.segmentCount} segments` : '',
+            ]
+                .filter(Boolean)
+                .join(', ');
+            lines.push(`[${r.repo ?? '?'}${ref}]${meta ? ` (${meta})` : ''}`);
+        }
+        const url = r.readmeUrl ?? r.url;
+        if (url)
+            lines.push(url);
+        const body = (r.contentMd || r.snippet || '').trim();
+        lines.push(body ? body.slice(0, MAX_GITHUB_CONTENT_CHARS) : '(no content)');
+        return lines.join('\n');
+    })
+        .join('\n\n');
+}
 /** Only present these tools when the session has research enabled. */
 const canAccess = (session) => session?.research === true;
 export function registerResearchTools(server, getClient) {
@@ -83,7 +178,7 @@ export function registerResearchTools(server, getClient) {
             appendParam(params, 'to', to);
             const client = getClient(session);
             const res = await client.http.get(withQuery(`${BASE}/papers`, params));
-            return asText(res.data);
+            return fmtHits(res.data?.results);
         },
     });
     // --- related_papers ---
@@ -127,7 +222,8 @@ export function registerResearchTools(server, getClient) {
             appendParam(params, 'anchor', anchors);
             const client = getClient(session);
             const res = await client.http.get(withQuery(`${BASE}/papers/${encodeURIComponent(primary)}/similar`, params));
-            return asText(res.data);
+            const note = res.data?.note ? `\nnote: ${res.data.note}` : '';
+            return `${fmtHits(res.data?.results)}\n(pool_size=${res.data?.pool_size ?? 0})${note}`;
         },
     });
     // --- read_paper ---
@@ -161,11 +257,13 @@ export function registerResearchTools(server, getClient) {
             appendParam(params, 'k', k);
             const client = getClient(session);
             const res = await client.http.get(withQuery(`${BASE}/papers/${encodeURIComponent(arxiv_id)}`, params));
-            return asText(res.data);
+            const passages = res.data?.passages ?? [];
+            return passages.length
+                ? passages.map((p) => p.text).join('\n---\n')
+                : '(no full-text passages available for this paper)';
         },
     });
     // --- search_github ---
-    // TODO: description pending — the user is writing this one.
     server.addTool({
         name: 'firecrawl_research_search_github',
         canAccess,
@@ -187,7 +285,7 @@ export function registerResearchTools(server, getClient) {
             appendParam(params, 'k', k);
             const client = getClient(session);
             const res = await client.http.get(withQuery(`${BASE}/github`, params));
-            return asText(res.data);
+            return fmtGithub(res.data?.results);
         },
     });
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "firecrawl-mcp",
-  "version": "3.20.3",
+  "version": "3.20.5",
   "description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",
   "type": "module",
   "mcpName": "io.github.firecrawl/firecrawl-mcp-server",