firecrawl-mcp 3.20.2 → 3.20.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -6,7 +6,30 @@ import { readFile } from 'node:fs/promises';
6
6
  import path from 'node:path';
7
7
  import { z } from 'zod';
8
8
  import { registerMonitorTools } from './monitor.js';
9
+ import { registerResearchTools } from './research.js';
9
10
  dotenv.config({ debug: false, quiet: true });
11
+ /**
12
+ * Decide whether the research tools should be visible for a session.
13
+ * Local/stdio/self-hosted: gated by `FIRECRAWL_RESEARCH=true`.
14
+ * Remote (HTTP): additionally enabled by a `?research=true` query param on the
15
+ * incoming MCP request URL.
16
+ */
17
+ function isResearchEnabled(request) {
18
+ if (process.env.FIRECRAWL_RESEARCH === 'true')
19
+ return true;
20
+ const url = request?.url;
21
+ if (url) {
22
+ try {
23
+ const research = new URL(url, 'http://localhost').searchParams.get('research');
24
+ if (research === 'true')
25
+ return true;
26
+ }
27
+ catch {
28
+ // malformed URL — fall through to disabled
29
+ }
30
+ }
31
+ return false;
32
+ }
10
33
  function normalizeHeader(value) {
11
34
  if (value == null)
12
35
  return undefined;
@@ -187,6 +210,7 @@ const server = new FastMCP({
187
210
  protectedResourceMetadataUrl: getOAuthProtectedResourceMetadataUrl(),
188
211
  },
189
212
  authenticate: async (request) => {
213
+ const research = isResearchEnabled(request);
190
214
  // FastMCP invokes `authenticate(undefined)` for the stdio transport
191
215
  // because there is no HTTP request context. Without this null guard,
192
216
  // accessing `request.headers` throws a TypeError, FastMCP silently
@@ -201,7 +225,7 @@ const server = new FastMCP({
201
225
  if (!headerCred) {
202
226
  throw new Error('Firecrawl credentials required: OAuth access token (Authorization: Bearer fco_...) or API key (x-firecrawl-api-key)');
203
227
  }
204
- return { firecrawlApiKey: headerCred };
228
+ return { firecrawlApiKey: headerCred, research };
205
229
  }
206
230
  const credential = headerCred ?? envCred;
207
231
  // Self-hosted / stdio / HTTP streamable — headers supply MCP OAuth token when present
@@ -216,7 +240,7 @@ const server = new FastMCP({
216
240
  console.error('HTTP MCP transport requires FIRECRAWL_API_URL and/or credentials (OAuth: Authorization Bearer fco_..., or FIRECRAWL_API_KEY / FIRECRAWL_OAUTH_TOKEN)');
217
241
  process.exit(1);
218
242
  }
219
- return { firecrawlApiKey: credential };
243
+ return { firecrawlApiKey: credential, research };
220
244
  },
221
245
  // Lightweight health endpoint for LB checks
222
246
  health: {
@@ -1542,4 +1566,18 @@ else {
1542
1566
  };
1543
1567
  }
1544
1568
  registerMonitorTools(server);
1569
+ // Research tools gating. FastMCP's `canAccess` is only honored on the HTTP
1570
+ // transport (the stdio path exposes every registered tool regardless), so we
1571
+ // split the two cases:
1572
+ // - HTTP (cloud / SSE_LOCAL / HTTP_STREAMABLE_SERVER): always register; each
1573
+ // tool's `canAccess` hides it unless the session has research enabled
1574
+ // (`FIRECRAWL_RESEARCH=true` env or `?research=true` on the request).
1575
+ // - stdio (local): register only when `FIRECRAWL_RESEARCH=true`, since
1576
+ // `canAccess` cannot hide them there.
1577
+ const isHttpTransport = process.env.CLOUD_SERVICE === 'true' ||
1578
+ process.env.SSE_LOCAL === 'true' ||
1579
+ process.env.HTTP_STREAMABLE_SERVER === 'true';
1580
+ if (isHttpTransport || process.env.FIRECRAWL_RESEARCH === 'true') {
1581
+ registerResearchTools(server, getClient);
1582
+ }
1545
1583
  await server.start(args);
@@ -0,0 +1,193 @@
1
+ /**
2
+ * Firecrawl Research tools (experimental).
3
+ *
4
+ * Thin MCP wrappers over the `/v2/research/*` endpoints (arXiv papers + GitHub
5
+ * history/readmes). These tools are hidden unless research is enabled for the
6
+ * session — locally via `FIRECRAWL_RESEARCH=true`, or remotely via the
7
+ * `?research=true` query param on the MCP endpoint (see `isResearchEnabled` in
8
+ * index.ts, which sets `session.research`).
9
+ *
10
+ * The installed `@mendable/firecrawl-js` predates the SDK's `research` client,
11
+ * so we call the endpoints directly through the SDK's HTTP layer (auth +
12
+ * retries) via `client.http.get(...)`, mirroring how the search tool reaches
13
+ * `/v2/search`.
14
+ */
15
+ import { z } from 'zod';
16
+ const BASE = '/v2/research';
17
+ function asText(data) {
18
+ return JSON.stringify(data, null, 2);
19
+ }
20
+ /** Append a value (or repeated array values) to a URLSearchParams instance. */
21
+ function appendParam(params, key, value) {
22
+ if (value == null)
23
+ return;
24
+ if (Array.isArray(value)) {
25
+ for (const v of value) {
26
+ if (v != null && String(v).length > 0)
27
+ params.append(key, String(v));
28
+ }
29
+ }
30
+ else {
31
+ params.append(key, String(value));
32
+ }
33
+ }
34
+ function withQuery(path, params) {
35
+ const qs = params.toString();
36
+ return qs ? `${path}?${qs}` : path;
37
+ }
38
+ /** Only present these tools when the session has research enabled. */
39
+ const canAccess = (session) => session?.research === true;
40
+ export function registerResearchTools(server, getClient) {
41
+ // --- search_papers ---
42
+ server.addTool({
43
+ name: 'firecrawl_research_search_papers',
44
+ canAccess,
45
+ annotations: {
46
+ title: 'Search arXiv papers',
47
+ readOnlyHint: true,
48
+ openWorldHint: true,
49
+ },
50
+ description: 'Primary entry point for finding arXiv papers by topic. Semantic (HyDE) search over arXiv ' +
51
+ 'abstracts; returns ranked papers with arXiv id, title, and abstract. The query should be a ' +
52
+ 'natural-language description of what you want. Run SEVERAL distinct framings of the question ' +
53
+ '(sibling domains, rival methods, dataset/benchmark names) rather than one query — recall ' +
54
+ 'improves markedly with diverse framings. Returns up to `k` results (default 40).',
55
+ parameters: z.object({
56
+ query: z.string().min(1),
57
+ k: z.number().int().min(1).max(500).optional(),
58
+ authors: z
59
+ .array(z.string())
60
+ .optional()
61
+ .describe('Author substring filter(s); ALL must match (case-insensitive).'),
62
+ categories: z
63
+ .array(z.string())
64
+ .optional()
65
+ .describe('arXiv category filter(s) (e.g. `cs.LG`); ALL must match.'),
66
+ from: z
67
+ .string()
68
+ .optional()
69
+ .describe('Inclusive lower bound on created/updated date (`YYYY-MM-DD`).'),
70
+ to: z
71
+ .string()
72
+ .optional()
73
+ .describe('Inclusive upper bound on created/updated date (`YYYY-MM-DD`).'),
74
+ }),
75
+ execute: async (args, { session }) => {
76
+ const { query, k, authors, categories, from, to } = args;
77
+ const params = new URLSearchParams();
78
+ appendParam(params, 'query', query);
79
+ appendParam(params, 'k', k);
80
+ appendParam(params, 'authors', authors);
81
+ appendParam(params, 'categories', categories);
82
+ appendParam(params, 'from', from);
83
+ appendParam(params, 'to', to);
84
+ const client = getClient(session);
85
+ const res = await client.http.get(withQuery(`${BASE}/papers`, params));
86
+ return asText(res.data);
87
+ },
88
+ });
89
+ // --- related_papers ---
90
+ server.addTool({
91
+ name: 'firecrawl_research_related_papers',
92
+ canAccess,
93
+ annotations: {
94
+ title: 'Find related arXiv papers',
95
+ readOnlyHint: true,
96
+ openWorldHint: true,
97
+ },
98
+ description: 'Expand from anchor papers you have already found, via the citation graph, ranked and filtered ' +
99
+ 'to a natural-language `intent`. Pass arXiv ids of your strongest hits as `seed_ids`. Modes: ' +
100
+ '`similar` (cocitation/coupling — papers in the same niche; the default), `citers` (papers ' +
101
+ 'that cite the anchors), `references` (papers the anchors cite). This reaches relevant papers ' +
102
+ 'that plain search misses, so use it on your best hits before finishing. A `similar` call ' +
103
+ 'already runs a DEEP multi-round expansion internally (re-seeding from each round’s best ' +
104
+ 'finds), so one call reaches the wider neighborhood — no need to chain many. Returns the ' +
105
+ 'candidates plus the pool size.',
106
+ parameters: z.object({
107
+ seed_ids: z.array(z.string()).min(1).max(10),
108
+ intent: z.string().min(1),
109
+ mode: z.enum(['similar', 'citers', 'references']).optional(),
110
+ k: z.number().int().min(1).max(500).optional(),
111
+ rerank: z
112
+ .boolean()
113
+ .optional()
114
+ .describe('Apply an additional rerank over the fused candidates.'),
115
+ }),
116
+ execute: async (args, { session }) => {
117
+ const { seed_ids, intent, mode, k, rerank } = args;
118
+ // The endpoint takes a single primary seed in the path; any additional
119
+ // seeds ride along as repeated `anchor` params.
120
+ const [primary, ...anchors] = seed_ids;
121
+ const params = new URLSearchParams();
122
+ appendParam(params, 'intent', intent);
123
+ appendParam(params, 'mode', mode);
124
+ appendParam(params, 'k', k);
125
+ if (rerank != null)
126
+ appendParam(params, 'rerank', rerank);
127
+ appendParam(params, 'anchor', anchors);
128
+ const client = getClient(session);
129
+ const res = await client.http.get(withQuery(`${BASE}/papers/${encodeURIComponent(primary)}/similar`, params));
130
+ return asText(res.data);
131
+ },
132
+ });
133
+ // --- read_paper ---
134
+ server.addTool({
135
+ name: 'firecrawl_research_read_paper',
136
+ canAccess,
137
+ annotations: {
138
+ title: 'Read an arXiv paper',
139
+ readOnlyHint: true,
140
+ openWorldHint: true,
141
+ },
142
+ description: 'Read the most relevant in-body (full-text) passages of ONE specific paper for a question. Use ' +
143
+ 'this to VERIFY whether a candidate actually satisfies a constraint before you include or ' +
144
+ "reject it (e.g. 'does this paper actually use technique X / report a score on benchmark Y'). " +
145
+ "Returns the best-matching passages, or a notice if the paper's full text is unavailable.",
146
+ parameters: z.object({
147
+ arxiv_id: z.string().min(1),
148
+ question: z.string().min(1),
149
+ k: z
150
+ .number()
151
+ .int()
152
+ .min(1)
153
+ .max(50)
154
+ .optional()
155
+ .describe('Number of passages to return (default 4).'),
156
+ }),
157
+ execute: async (args, { session }) => {
158
+ const { arxiv_id, question, k } = args;
159
+ const params = new URLSearchParams();
160
+ appendParam(params, 'query', question);
161
+ appendParam(params, 'k', k);
162
+ const client = getClient(session);
163
+ const res = await client.http.get(withQuery(`${BASE}/papers/${encodeURIComponent(arxiv_id)}`, params));
164
+ return asText(res.data);
165
+ },
166
+ });
167
+ // --- search_github ---
168
+ // TODO: description pending — the user is writing this one.
169
+ server.addTool({
170
+ name: 'firecrawl_research_search_github',
171
+ canAccess,
172
+ annotations: {
173
+ title: 'Search GitHub history',
174
+ readOnlyHint: true,
175
+ openWorldHint: true,
176
+ },
177
+ description: 'Search GitHub issue/PR history and repository readmes. Returns ranked matches with repo, ' +
178
+ 'url, a short snippet, and (when available) the full matched content in markdown.',
179
+ parameters: z.object({
180
+ query: z.string().min(1),
181
+ k: z.number().int().min(1).max(100).optional(),
182
+ }),
183
+ execute: async (args, { session }) => {
184
+ const { query, k } = args;
185
+ const params = new URLSearchParams();
186
+ appendParam(params, 'query', query);
187
+ appendParam(params, 'k', k);
188
+ const client = getClient(session);
189
+ const res = await client.http.get(withQuery(`${BASE}/github`, params));
190
+ return asText(res.data);
191
+ },
192
+ });
193
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl-mcp",
3
- "version": "3.20.2",
3
+ "version": "3.20.3",
4
4
  "description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",
5
5
  "type": "module",
6
6
  "mcpName": "io.github.firecrawl/firecrawl-mcp-server",
@@ -15,7 +15,7 @@
15
15
  },
16
16
  "license": "MIT",
17
17
  "dependencies": {
18
- "@mendable/firecrawl-js": "4.24.0",
18
+ "@mendable/firecrawl-js": "4.25.2",
19
19
  "dotenv": "^17.2.2",
20
20
  "firecrawl-fastmcp": "^1.0.5",
21
21
  "typescript": "^5.9.2",