firecrawl-mcp 3.20.3 → 3.20.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/research.js +106 -8
- package/package.json +1 -1
package/dist/research.js
CHANGED
|
@@ -14,9 +14,6 @@
|
|
|
14
14
|
*/
|
|
15
15
|
import { z } from 'zod';
|
|
16
16
|
const BASE = '/v2/research';
|
|
17
|
-
function asText(data) {
|
|
18
|
-
return JSON.stringify(data, null, 2);
|
|
19
|
-
}
|
|
20
17
|
/** Append a value (or repeated array values) to a URLSearchParams instance. */
|
|
21
18
|
function appendParam(params, key, value) {
|
|
22
19
|
if (value == null)
|
|
@@ -35,6 +32,104 @@ function withQuery(path, params) {
|
|
|
35
32
|
const qs = params.toString();
|
|
36
33
|
return qs ? `${path}?${qs}` : path;
|
|
37
34
|
}
|
|
35
|
+
// --- result formatting (ported from research-index-front/src/agent_eval.ts) ---
|
|
36
|
+
// Max authors to print per paper (with affiliations); the rest collapse to a
|
|
37
|
+
// "+N more" tail so a large collaboration doesn't flood the context.
|
|
38
|
+
const MAX_AUTHORS = 15;
|
|
39
|
+
// Cap each abstract so a page of hits stays within the MCP output-token limit.
|
|
40
|
+
const MAX_ABSTRACT_CHARS = 600;
|
|
41
|
+
// Per-affiliation char cap — keeps one long org string (e.g. a full multi-dept
|
|
42
|
+
// university address) from bloating the authors line.
|
|
43
|
+
const MAX_AFFIL_CHARS = 60;
|
|
44
|
+
// Hard ceiling on the whole authors line, as a final guard.
|
|
45
|
+
const MAX_AUTHORS_LINE_CHARS = 400;
|
|
46
|
+
/** Best display id for a paper: its arXiv id, falling back to the canonical id. */
|
|
47
|
+
function displayId(p) {
|
|
48
|
+
return p.ids?.arxiv?.[0] ?? p.paper_id ?? '?';
|
|
49
|
+
}
|
|
50
|
+
/** Format the authors line, accepting either the string or structured form. */
|
|
51
|
+
function fmtAuthors(authors) {
|
|
52
|
+
if (!authors)
|
|
53
|
+
return null;
|
|
54
|
+
let shown;
|
|
55
|
+
let total;
|
|
56
|
+
if (typeof authors === 'string') {
|
|
57
|
+
const names = authors
|
|
58
|
+
.split(',')
|
|
59
|
+
.map((s) => s.trim())
|
|
60
|
+
.filter(Boolean);
|
|
61
|
+
if (names.length === 0)
|
|
62
|
+
return null;
|
|
63
|
+
total = names.length;
|
|
64
|
+
shown = names.slice(0, MAX_AUTHORS);
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
if (authors.length === 0)
|
|
68
|
+
return null;
|
|
69
|
+
total = authors.length;
|
|
70
|
+
shown = authors.slice(0, MAX_AUTHORS).map((a) => {
|
|
71
|
+
const aff = a.affiliation?.trim();
|
|
72
|
+
return aff ? `${a.name} (${aff.slice(0, MAX_AFFIL_CHARS)})` : a.name;
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
const extra = total > MAX_AUTHORS ? `; +${total - MAX_AUTHORS} more` : '';
|
|
76
|
+
return ('Authors: ' + shown.join('; ') + extra).slice(0, MAX_AUTHORS_LINE_CHARS);
|
|
77
|
+
}
|
|
78
|
+
/** Render ranked papers as `[id] title` / authors / abstract blocks. */
|
|
79
|
+
function fmtHits(results) {
|
|
80
|
+
if (!results || results.length === 0)
|
|
81
|
+
return '(no results)';
|
|
82
|
+
return results
|
|
83
|
+
.map((r) => {
|
|
84
|
+
const lines = [`[${displayId(r)}] ${r.title ?? '(untitled)'}`];
|
|
85
|
+
const authors = fmtAuthors(r.authors);
|
|
86
|
+
if (authors)
|
|
87
|
+
lines.push(authors);
|
|
88
|
+
lines.push((r.abstract || '(no abstract)')
|
|
89
|
+
.replace(/\s+/g, ' ')
|
|
90
|
+
.slice(0, MAX_ABSTRACT_CHARS));
|
|
91
|
+
return lines.join('\n');
|
|
92
|
+
})
|
|
93
|
+
.join('\n\n');
|
|
94
|
+
}
|
|
95
|
+
// Cap GitHub matched content so a page of results stays within the MCP
|
|
96
|
+
// output-token limit. Higher than abstracts since issue/PR threads carry the
|
|
97
|
+
// signal (repro steps, stack traces) the agent actually needs to verify.
|
|
98
|
+
const MAX_GITHUB_CONTENT_CHARS = 1200;
|
|
99
|
+
/**
|
|
100
|
+
* Render GitHub history/readme hits as `[repo#number] (kind)` / url / body
|
|
101
|
+
* blocks — the same shape as `fmtHits`, but tuned for issues/PRs and readmes.
|
|
102
|
+
* Markdown content keeps its newlines (so tables/code survive); only readmes and
|
|
103
|
+
* snippets fall back when full content is absent.
|
|
104
|
+
*/
|
|
105
|
+
function fmtGithub(results) {
|
|
106
|
+
if (!results || results.length === 0)
|
|
107
|
+
return '(no results)';
|
|
108
|
+
return results
|
|
109
|
+
.map((r) => {
|
|
110
|
+
const lines = [];
|
|
111
|
+
if (r.resultType === 'repo_readme') {
|
|
112
|
+
lines.push(`[${r.repo ?? '?'}] README`);
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
const ref = r.number != null ? `#${r.number}` : '';
|
|
116
|
+
const meta = [
|
|
117
|
+
r.pageType,
|
|
118
|
+
r.segmentCount ? `${r.segmentCount} segments` : '',
|
|
119
|
+
]
|
|
120
|
+
.filter(Boolean)
|
|
121
|
+
.join(', ');
|
|
122
|
+
lines.push(`[${r.repo ?? '?'}${ref}]${meta ? ` (${meta})` : ''}`);
|
|
123
|
+
}
|
|
124
|
+
const url = r.readmeUrl ?? r.url;
|
|
125
|
+
if (url)
|
|
126
|
+
lines.push(url);
|
|
127
|
+
const body = (r.contentMd || r.snippet || '').trim();
|
|
128
|
+
lines.push(body ? body.slice(0, MAX_GITHUB_CONTENT_CHARS) : '(no content)');
|
|
129
|
+
return lines.join('\n');
|
|
130
|
+
})
|
|
131
|
+
.join('\n\n');
|
|
132
|
+
}
|
|
38
133
|
/** Only present these tools when the session has research enabled. */
|
|
39
134
|
const canAccess = (session) => session?.research === true;
|
|
40
135
|
export function registerResearchTools(server, getClient) {
|
|
@@ -83,7 +178,7 @@ export function registerResearchTools(server, getClient) {
|
|
|
83
178
|
appendParam(params, 'to', to);
|
|
84
179
|
const client = getClient(session);
|
|
85
180
|
const res = await client.http.get(withQuery(`${BASE}/papers`, params));
|
|
86
|
-
return
|
|
181
|
+
return fmtHits(res.data?.results);
|
|
87
182
|
},
|
|
88
183
|
});
|
|
89
184
|
// --- related_papers ---
|
|
@@ -127,7 +222,8 @@ export function registerResearchTools(server, getClient) {
|
|
|
127
222
|
appendParam(params, 'anchor', anchors);
|
|
128
223
|
const client = getClient(session);
|
|
129
224
|
const res = await client.http.get(withQuery(`${BASE}/papers/${encodeURIComponent(primary)}/similar`, params));
|
|
130
|
-
|
|
225
|
+
const note = res.data?.note ? `\nnote: ${res.data.note}` : '';
|
|
226
|
+
return `${fmtHits(res.data?.results)}\n(pool_size=${res.data?.pool_size ?? 0})${note}`;
|
|
131
227
|
},
|
|
132
228
|
});
|
|
133
229
|
// --- read_paper ---
|
|
@@ -161,11 +257,13 @@ export function registerResearchTools(server, getClient) {
|
|
|
161
257
|
appendParam(params, 'k', k);
|
|
162
258
|
const client = getClient(session);
|
|
163
259
|
const res = await client.http.get(withQuery(`${BASE}/papers/${encodeURIComponent(arxiv_id)}`, params));
|
|
164
|
-
|
|
260
|
+
const passages = res.data?.passages ?? [];
|
|
261
|
+
return passages.length
|
|
262
|
+
? passages.map((p) => p.text).join('\n---\n')
|
|
263
|
+
: '(no full-text passages available for this paper)';
|
|
165
264
|
},
|
|
166
265
|
});
|
|
167
266
|
// --- search_github ---
|
|
168
|
-
// TODO: description pending — the user is writing this one.
|
|
169
267
|
server.addTool({
|
|
170
268
|
name: 'firecrawl_research_search_github',
|
|
171
269
|
canAccess,
|
|
@@ -187,7 +285,7 @@ export function registerResearchTools(server, getClient) {
|
|
|
187
285
|
appendParam(params, 'k', k);
|
|
188
286
|
const client = getClient(session);
|
|
189
287
|
const res = await client.http.get(withQuery(`${BASE}/github`, params));
|
|
190
|
-
return
|
|
288
|
+
return fmtGithub(res.data?.results);
|
|
191
289
|
},
|
|
192
290
|
});
|
|
193
291
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "3.20.
|
|
3
|
+
"version": "3.20.4",
|
|
4
4
|
"description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"mcpName": "io.github.firecrawl/firecrawl-mcp-server",
|