@kak4343/scholar-mcp 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.ja.md +162 -0
- package/README.md +156 -0
- package/dist/cache/disk_cache.d.ts +27 -0
- package/dist/cache/disk_cache.js +82 -0
- package/dist/cache/disk_cache.js.map +1 -0
- package/dist/cache/index.d.ts +47 -0
- package/dist/cache/index.js +67 -0
- package/dist/cache/index.js.map +1 -0
- package/dist/cache/memory_cache.d.ts +17 -0
- package/dist/cache/memory_cache.js +36 -0
- package/dist/cache/memory_cache.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +166 -0
- package/dist/index.js.map +1 -0
- package/dist/notion/push.d.ts +43 -0
- package/dist/notion/push.js +190 -0
- package/dist/notion/push.js.map +1 -0
- package/dist/sources/arxiv.d.ts +5 -0
- package/dist/sources/arxiv.js +57 -0
- package/dist/sources/arxiv.js.map +1 -0
- package/dist/sources/pubmed.d.ts +15 -0
- package/dist/sources/pubmed.js +133 -0
- package/dist/sources/pubmed.js.map +1 -0
- package/dist/sources/semantic_scholar.d.ts +7 -0
- package/dist/sources/semantic_scholar.js +61 -0
- package/dist/sources/semantic_scholar.js.map +1 -0
- package/dist/types.d.ts +25 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +43 -0
- package/scripts/smoke_test.mjs +120 -0
- package/src/cache/disk_cache.ts +94 -0
- package/src/cache/index.ts +95 -0
- package/src/cache/memory_cache.ts +43 -0
- package/src/index.ts +212 -0
- package/src/notion/push.ts +237 -0
- package/src/sources/arxiv.ts +61 -0
- package/src/sources/pubmed.ts +143 -0
- package/src/sources/semantic_scholar.ts +67 -0
- package/src/types.ts +28 -0
- package/tsconfig.json +19 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
3
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
|
+
import { CallToolRequestSchema, ListToolsRequestSchema } from "@modelcontextprotocol/sdk/types.js";
|
|
5
|
+
import { PubMedSearcher } from "./sources/pubmed.js";
|
|
6
|
+
import { ArxivSearcher } from "./sources/arxiv.js";
|
|
7
|
+
import { SemanticScholarSearcher } from "./sources/semantic_scholar.js";
|
|
8
|
+
import { SearchCache, buildCacheKey } from "./cache/index.js";
|
|
9
|
+
import { exportResultsToNotion } from "./notion/push.js";
|
|
10
|
+
import type { Source, SearchParams, SearchResult } from "./types.js";
|
|
11
|
+
|
|
12
|
+
const PUBMED_API_KEY = process.env.PUBMED_API_KEY;
|
|
13
|
+
const SEMANTIC_SCHOLAR_API_KEY = process.env.SEMANTIC_SCHOLAR_API_KEY;
|
|
14
|
+
|
|
15
|
+
const pubmed = new PubMedSearcher(PUBMED_API_KEY);
|
|
16
|
+
const arxiv = new ArxivSearcher();
|
|
17
|
+
const semantic = new SemanticScholarSearcher(SEMANTIC_SCHOLAR_API_KEY);
|
|
18
|
+
|
|
19
|
+
interface CachedPayload {
|
|
20
|
+
query: string;
|
|
21
|
+
sources_queried: Source[];
|
|
22
|
+
total_results: number;
|
|
23
|
+
results: SearchResult[];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const searchCache = new SearchCache<CachedPayload>();
|
|
27
|
+
|
|
28
|
+
const server = new Server(
|
|
29
|
+
{ name: "scholar-mcp", version: "0.2.0" },
|
|
30
|
+
{ capabilities: { tools: {} } }
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
34
|
+
tools: [
|
|
35
|
+
{
|
|
36
|
+
name: "scholar_search",
|
|
37
|
+
description:
|
|
38
|
+
"Search PubMed, arXiv, and Semantic Scholar for scholarly papers. Returns title, authors, abstract, DOI, publication date, and venue. Supports date filtering and per-source filtering. Results are cached in memory (1h TTL) and on disk (7 days, ~/.scholar-mcp/cache/) keyed by sha256 of the request.",
|
|
39
|
+
inputSchema: {
|
|
40
|
+
type: "object",
|
|
41
|
+
properties: {
|
|
42
|
+
query: {
|
|
43
|
+
type: "string",
|
|
44
|
+
description: "Search query (English recommended for accuracy)",
|
|
45
|
+
},
|
|
46
|
+
sources: {
|
|
47
|
+
type: "array",
|
|
48
|
+
items: { type: "string", enum: ["pubmed", "arxiv", "semantic_scholar"] },
|
|
49
|
+
description: "Sources to search (default: all three)",
|
|
50
|
+
},
|
|
51
|
+
max_results: {
|
|
52
|
+
type: "number",
|
|
53
|
+
description: "Maximum results per source (default 10, max 50)",
|
|
54
|
+
},
|
|
55
|
+
date_from: { type: "string", description: "Filter by publication date (YYYY-MM-DD)" },
|
|
56
|
+
date_to: { type: "string", description: "Filter by publication date (YYYY-MM-DD)" },
|
|
57
|
+
},
|
|
58
|
+
required: ["query"],
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
name: "scholar_export_to_notion",
|
|
63
|
+
description:
|
|
64
|
+
"Push scholar_search results into a Notion database. Compatible with the user's Knowledge DB schema: Title (title), Authors (rich_text), DOI (url), Source (select), Published (date), Venue (rich_text), Citation Count (number), Abstract (rich_text), Japanese Summary (rich_text, optional), Status (select). Reads NOTION_TOKEN from env or ~/.scholar-mcp/config.json. Set dry_run=true to build the page payloads without calling the Notion API.",
|
|
65
|
+
inputSchema: {
|
|
66
|
+
type: "object",
|
|
67
|
+
properties: {
|
|
68
|
+
results: {
|
|
69
|
+
type: "array",
|
|
70
|
+
description: "SearchResult[] from scholar_search",
|
|
71
|
+
items: { type: "object" },
|
|
72
|
+
},
|
|
73
|
+
notion_database_id: {
|
|
74
|
+
type: "string",
|
|
75
|
+
description: "Target Notion database id (UUID, with or without dashes)",
|
|
76
|
+
},
|
|
77
|
+
notion_token: {
|
|
78
|
+
type: "string",
|
|
79
|
+
description: "Optional Notion integration token (else NOTION_TOKEN env / ~/.scholar-mcp/config.json)",
|
|
80
|
+
},
|
|
81
|
+
include_japanese_summary: {
|
|
82
|
+
type: "boolean",
|
|
83
|
+
description: "Include japanese_summary field if present on the results (default false)",
|
|
84
|
+
},
|
|
85
|
+
dry_run: {
|
|
86
|
+
type: "boolean",
|
|
87
|
+
description: "If true, return the page payloads without calling Notion (default false)",
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
required: ["results", "notion_database_id"],
|
|
91
|
+
},
|
|
92
|
+
},
|
|
93
|
+
],
|
|
94
|
+
}));
|
|
95
|
+
|
|
96
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
97
|
+
if (request.params.name === "scholar_search") {
|
|
98
|
+
return handleSearch(request.params.arguments as unknown as SearchParams);
|
|
99
|
+
}
|
|
100
|
+
if (request.params.name === "scholar_export_to_notion") {
|
|
101
|
+
return handleExportToNotion(request.params.arguments as unknown as {
|
|
102
|
+
results: SearchResult[];
|
|
103
|
+
notion_database_id: string;
|
|
104
|
+
notion_token?: string;
|
|
105
|
+
include_japanese_summary?: boolean;
|
|
106
|
+
dry_run?: boolean;
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
throw new Error(`Unknown tool: ${request.params.name}`);
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
async function handleSearch(args: SearchParams) {
|
|
113
|
+
const sources: Source[] = (args.sources ?? ["pubmed", "arxiv", "semantic_scholar"]) as Source[];
|
|
114
|
+
const max_results = Math.min(args.max_results ?? 10, 50);
|
|
115
|
+
const params: SearchParams = {
|
|
116
|
+
query: args.query,
|
|
117
|
+
max_results,
|
|
118
|
+
date_from: args.date_from,
|
|
119
|
+
date_to: args.date_to,
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const cacheKey = buildCacheKey({
|
|
123
|
+
tool: "scholar_search",
|
|
124
|
+
query: args.query,
|
|
125
|
+
sources,
|
|
126
|
+
date_from: args.date_from,
|
|
127
|
+
date_to: args.date_to,
|
|
128
|
+
max_results,
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
const cached = searchCache.get(cacheKey);
|
|
132
|
+
if (cached) {
|
|
133
|
+
return {
|
|
134
|
+
content: [
|
|
135
|
+
{
|
|
136
|
+
type: "text",
|
|
137
|
+
text: JSON.stringify(
|
|
138
|
+
{ ...cached, cache_hit: true, cache_hit_rate: searchCache.hitRate() },
|
|
139
|
+
null,
|
|
140
|
+
2,
|
|
141
|
+
),
|
|
142
|
+
},
|
|
143
|
+
],
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const searchers: Record<Source, () => Promise<SearchResult[]>> = {
|
|
148
|
+
pubmed: () => pubmed.search(params),
|
|
149
|
+
arxiv: () => arxiv.search(params),
|
|
150
|
+
semantic_scholar: () => semantic.search(params),
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
const promises = sources.map(async (s) => {
|
|
154
|
+
try {
|
|
155
|
+
return await searchers[s]();
|
|
156
|
+
} catch (e) {
|
|
157
|
+
console.error(`[scholar_search] ${s} failed:`, (e as Error).message);
|
|
158
|
+
return [];
|
|
159
|
+
}
|
|
160
|
+
});
|
|
161
|
+
const resultsArrays = await Promise.all(promises);
|
|
162
|
+
const allResults = resultsArrays.flat();
|
|
163
|
+
|
|
164
|
+
const payload: CachedPayload = {
|
|
165
|
+
query: args.query,
|
|
166
|
+
sources_queried: sources,
|
|
167
|
+
total_results: allResults.length,
|
|
168
|
+
results: allResults,
|
|
169
|
+
};
|
|
170
|
+
searchCache.set(cacheKey, payload);
|
|
171
|
+
|
|
172
|
+
return {
|
|
173
|
+
content: [
|
|
174
|
+
{
|
|
175
|
+
type: "text",
|
|
176
|
+
text: JSON.stringify(
|
|
177
|
+
{ ...payload, cache_hit: false, cache_hit_rate: searchCache.hitRate() },
|
|
178
|
+
null,
|
|
179
|
+
2,
|
|
180
|
+
),
|
|
181
|
+
},
|
|
182
|
+
],
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
async function handleExportToNotion(args: {
|
|
187
|
+
results: SearchResult[];
|
|
188
|
+
notion_database_id: string;
|
|
189
|
+
notion_token?: string;
|
|
190
|
+
include_japanese_summary?: boolean;
|
|
191
|
+
dry_run?: boolean;
|
|
192
|
+
}) {
|
|
193
|
+
const out = await exportResultsToNotion({
|
|
194
|
+
results: args.results,
|
|
195
|
+
notion_database_id: args.notion_database_id,
|
|
196
|
+
notion_token: args.notion_token,
|
|
197
|
+
include_japanese_summary: args.include_japanese_summary,
|
|
198
|
+
dry_run: args.dry_run,
|
|
199
|
+
});
|
|
200
|
+
return {
|
|
201
|
+
content: [
|
|
202
|
+
{
|
|
203
|
+
type: "text",
|
|
204
|
+
text: JSON.stringify(out, null, 2),
|
|
205
|
+
},
|
|
206
|
+
],
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const transport = new StdioServerTransport();
|
|
211
|
+
await server.connect(transport);
|
|
212
|
+
console.error("[scholar-mcp] v0.2 running on stdio");
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import { Client } from "@notionhq/client";
|
|
2
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
3
|
+
import { homedir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import type { SearchResult } from "../types.js";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Notion page schema used by scholar_export_to_notion.
|
|
9
|
+
*
|
|
10
|
+
* Compatible with the user's existing Knowledge DB (data_source_id
|
|
11
|
+
* 0a489d15-83e8-471d-ba1e-f04030473967) and the eye-ophthalmology weekly
|
|
12
|
+
* review push_to_notion.py helper.
|
|
13
|
+
*
|
|
14
|
+
* Properties (must be defined on the target database):
|
|
15
|
+
* Title - title
|
|
16
|
+
* Authors - rich_text
|
|
17
|
+
* DOI - url
|
|
18
|
+
* Source - select (pubmed / arxiv / semantic_scholar)
|
|
19
|
+
* Published - date
|
|
20
|
+
* Venue - rich_text
|
|
21
|
+
* Citation Count - number
|
|
22
|
+
* Abstract - rich_text (truncated to 2000 chars, full text in body)
|
|
23
|
+
* Japanese Summary - rich_text (only when include_japanese_summary)
|
|
24
|
+
* Status - select ("To Read")
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
const ABSTRACT_PROPERTY_LIMIT = 2000; // Notion rich_text per-property hard limit
|
|
28
|
+
const BLOCK_TEXT_LIMIT = 2000; // Notion paragraph block hard limit
|
|
29
|
+
|
|
30
|
+
export interface ExportInput {
|
|
31
|
+
results: SearchResult[];
|
|
32
|
+
notion_database_id: string;
|
|
33
|
+
notion_token?: string;
|
|
34
|
+
include_japanese_summary?: boolean;
|
|
35
|
+
dry_run?: boolean;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export interface ExportedPage {
|
|
39
|
+
result_index: number;
|
|
40
|
+
title: string;
|
|
41
|
+
page_id?: string;
|
|
42
|
+
page_url?: string;
|
|
43
|
+
status: "created" | "dry_run" | "error";
|
|
44
|
+
error?: string;
|
|
45
|
+
/** Page properties payload that was (or would be) sent to Notion. */
|
|
46
|
+
properties: Record<string, unknown>;
|
|
47
|
+
/** Children blocks that were (or would be) appended. */
|
|
48
|
+
children: Array<Record<string, unknown>>;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface ExportOutput {
|
|
52
|
+
database_id: string;
|
|
53
|
+
total: number;
|
|
54
|
+
created: number;
|
|
55
|
+
errors: number;
|
|
56
|
+
dry_run: boolean;
|
|
57
|
+
pages: ExportedPage[];
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Resolve a Notion token from (in order): explicit arg, NOTION_TOKEN env,
|
|
62
|
+
* ~/.scholar-mcp/config.json `{ "notion_token": "..." }`. Returns undefined
|
|
63
|
+
* if nothing matched — callers in dry-run mode can ignore that, but real
|
|
64
|
+
* exports must fail.
|
|
65
|
+
*/
|
|
66
|
+
export function resolveNotionToken(explicit?: string): string | undefined {
|
|
67
|
+
if (explicit && explicit.trim()) return explicit.trim();
|
|
68
|
+
if (process.env.NOTION_TOKEN && process.env.NOTION_TOKEN.trim()) {
|
|
69
|
+
return process.env.NOTION_TOKEN.trim();
|
|
70
|
+
}
|
|
71
|
+
const configPath = join(homedir(), ".scholar-mcp", "config.json");
|
|
72
|
+
if (existsSync(configPath)) {
|
|
73
|
+
try {
|
|
74
|
+
const cfg = JSON.parse(readFileSync(configPath, "utf8")) as { notion_token?: string };
|
|
75
|
+
if (cfg.notion_token && cfg.notion_token.trim()) return cfg.notion_token.trim();
|
|
76
|
+
} catch {
|
|
77
|
+
// ignore malformed config; treat as missing
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return undefined;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function richText(content: string): Array<{ type: "text"; text: { content: string } }> {
|
|
84
|
+
if (!content) return [];
|
|
85
|
+
return [{ type: "text", text: { content: content.slice(0, ABSTRACT_PROPERTY_LIMIT) } }];
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function chunkParagraphBlocks(content: string): Array<Record<string, unknown>> {
|
|
89
|
+
if (!content) return [];
|
|
90
|
+
const blocks: Array<Record<string, unknown>> = [];
|
|
91
|
+
for (let i = 0; i < content.length; i += BLOCK_TEXT_LIMIT) {
|
|
92
|
+
const chunk = content.slice(i, i + BLOCK_TEXT_LIMIT);
|
|
93
|
+
blocks.push({
|
|
94
|
+
object: "block",
|
|
95
|
+
type: "paragraph",
|
|
96
|
+
paragraph: {
|
|
97
|
+
rich_text: [{ type: "text", text: { content: chunk } }],
|
|
98
|
+
},
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
return blocks;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function buildPagePayload(
|
|
105
|
+
r: SearchResult,
|
|
106
|
+
databaseId: string,
|
|
107
|
+
includeJapaneseSummary: boolean,
|
|
108
|
+
): { properties: Record<string, unknown>; children: Array<Record<string, unknown>> } {
|
|
109
|
+
const properties: Record<string, unknown> = {
|
|
110
|
+
Title: { title: richText(r.title || "(untitled)") },
|
|
111
|
+
Authors: { rich_text: richText(r.authors.join(", ")) },
|
|
112
|
+
Source: { select: { name: r.source } },
|
|
113
|
+
Status: { select: { name: "To Read" } },
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
if (r.doi) {
|
|
117
|
+
properties.DOI = { url: r.doi.startsWith("http") ? r.doi : `https://doi.org/${r.doi}` };
|
|
118
|
+
}
|
|
119
|
+
if (r.published_date) {
|
|
120
|
+
properties.Published = { date: { start: r.published_date } };
|
|
121
|
+
}
|
|
122
|
+
if (r.venue) {
|
|
123
|
+
properties.Venue = { rich_text: richText(r.venue) };
|
|
124
|
+
}
|
|
125
|
+
if (typeof r.citation_count === "number") {
|
|
126
|
+
properties["Citation Count"] = { number: r.citation_count };
|
|
127
|
+
}
|
|
128
|
+
if (r.abstract) {
|
|
129
|
+
properties.Abstract = { rich_text: richText(r.abstract) };
|
|
130
|
+
}
|
|
131
|
+
if (includeJapaneseSummary && (r as SearchResult & { japanese_summary?: string }).japanese_summary) {
|
|
132
|
+
properties["Japanese Summary"] = {
|
|
133
|
+
rich_text: richText((r as SearchResult & { japanese_summary?: string }).japanese_summary!),
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Mirror the abstract (and full japanese summary) as body blocks so the
|
|
138
|
+
// 2000-char cap on rich_text properties never truncates the source text.
|
|
139
|
+
const children: Array<Record<string, unknown>> = [];
|
|
140
|
+
if (r.abstract && r.abstract.length > ABSTRACT_PROPERTY_LIMIT) {
|
|
141
|
+
children.push(...chunkParagraphBlocks(r.abstract));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// (databaseId is consumed by the caller, not here)
|
|
145
|
+
void databaseId;
|
|
146
|
+
return { properties, children };
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Push search results to a Notion database.
|
|
151
|
+
*
|
|
152
|
+
* Strategy: build per-result page payloads, then either return them
|
|
153
|
+
* unchanged (dry-run) or POST each one to Notion sequentially. Errors are
|
|
154
|
+
* captured per result so a single bad row does not abort the batch.
|
|
155
|
+
*/
|
|
156
|
+
export async function exportResultsToNotion(input: ExportInput): Promise<ExportOutput> {
|
|
157
|
+
const dryRun = input.dry_run ?? false;
|
|
158
|
+
const includeJp = input.include_japanese_summary ?? false;
|
|
159
|
+
const databaseId = input.notion_database_id;
|
|
160
|
+
|
|
161
|
+
if (!databaseId || !databaseId.trim()) {
|
|
162
|
+
throw new Error("notion_database_id is required");
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
let client: Client | undefined;
|
|
166
|
+
if (!dryRun) {
|
|
167
|
+
const token = resolveNotionToken(input.notion_token);
|
|
168
|
+
if (!token) {
|
|
169
|
+
throw new Error(
|
|
170
|
+
"Notion token not found. Set NOTION_TOKEN env or ~/.scholar-mcp/config.json {\"notion_token\": \"...\"}"
|
|
171
|
+
);
|
|
172
|
+
}
|
|
173
|
+
client = new Client({ auth: token });
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const pages: ExportedPage[] = [];
|
|
177
|
+
let created = 0;
|
|
178
|
+
let errors = 0;
|
|
179
|
+
|
|
180
|
+
for (let i = 0; i < input.results.length; i++) {
|
|
181
|
+
const r = input.results[i];
|
|
182
|
+
const { properties, children } = buildPagePayload(r, databaseId, includeJp);
|
|
183
|
+
|
|
184
|
+
if (dryRun || !client) {
|
|
185
|
+
pages.push({
|
|
186
|
+
result_index: i,
|
|
187
|
+
title: r.title,
|
|
188
|
+
status: "dry_run",
|
|
189
|
+
properties,
|
|
190
|
+
children,
|
|
191
|
+
});
|
|
192
|
+
continue;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
try {
|
|
196
|
+
const resp = await client.pages.create({
|
|
197
|
+
parent: { database_id: databaseId },
|
|
198
|
+
// The Notion SDK's property typings are tighter than our generic
|
|
199
|
+
// record; the runtime accepts our payload but the type checker
|
|
200
|
+
// wants the concrete union, so cast at the boundary.
|
|
201
|
+
properties: properties as Parameters<Client["pages"]["create"]>[0]["properties"],
|
|
202
|
+
children: children as Parameters<Client["pages"]["create"]>[0]["children"],
|
|
203
|
+
});
|
|
204
|
+
const pageRes = resp as { id?: string; url?: string };
|
|
205
|
+
pages.push({
|
|
206
|
+
result_index: i,
|
|
207
|
+
title: r.title,
|
|
208
|
+
page_id: pageRes.id,
|
|
209
|
+
page_url: pageRes.url,
|
|
210
|
+
status: "created",
|
|
211
|
+
properties,
|
|
212
|
+
children,
|
|
213
|
+
});
|
|
214
|
+
created++;
|
|
215
|
+
} catch (e) {
|
|
216
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
217
|
+
pages.push({
|
|
218
|
+
result_index: i,
|
|
219
|
+
title: r.title,
|
|
220
|
+
status: "error",
|
|
221
|
+
error: msg,
|
|
222
|
+
properties,
|
|
223
|
+
children,
|
|
224
|
+
});
|
|
225
|
+
errors++;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
return {
|
|
230
|
+
database_id: databaseId,
|
|
231
|
+
total: input.results.length,
|
|
232
|
+
created,
|
|
233
|
+
errors,
|
|
234
|
+
dry_run: dryRun,
|
|
235
|
+
pages,
|
|
236
|
+
};
|
|
237
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { XMLParser } from "fast-xml-parser";
|
|
2
|
+
import type { SearchParams, SearchResult, SourceSearcher } from "../types.js";
|
|
3
|
+
|
|
4
|
+
const BASE_URL = "https://export.arxiv.org/api/query";
|
|
5
|
+
const USER_AGENT = "scholar-mcp/0.2.1 (https://github.com/kak4343/scholar-mcp)";
|
|
6
|
+
|
|
7
|
+
export class ArxivSearcher implements SourceSearcher {
|
|
8
|
+
async search(params: SearchParams): Promise<SearchResult[]> {
|
|
9
|
+
const url = new URL(BASE_URL);
|
|
10
|
+
url.searchParams.set("search_query", `all:${params.query}`);
|
|
11
|
+
url.searchParams.set("max_results", String(params.max_results ?? 10));
|
|
12
|
+
url.searchParams.set("sortBy", "submittedDate");
|
|
13
|
+
url.searchParams.set("sortOrder", "descending");
|
|
14
|
+
|
|
15
|
+
const r = await fetch(url, { headers: { "User-Agent": USER_AGENT } });
|
|
16
|
+
if (!r.ok) throw new Error(`arXiv search failed: ${r.status}`);
|
|
17
|
+
const xml = await r.text();
|
|
18
|
+
|
|
19
|
+
const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: "@_" });
|
|
20
|
+
const parsed = parser.parse(xml);
|
|
21
|
+
const entries = parsed.feed?.entry;
|
|
22
|
+
if (!entries) return [];
|
|
23
|
+
const entryArray = Array.isArray(entries) ? entries : [entries];
|
|
24
|
+
|
|
25
|
+
return entryArray.map((e: any) => this.parseEntry(e)).filter((r): r is SearchResult => {
|
|
26
|
+
if (r === null) return false;
|
|
27
|
+
if (params.date_from && r.published_date < params.date_from) return false;
|
|
28
|
+
if (params.date_to && r.published_date > params.date_to) return false;
|
|
29
|
+
return true;
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
private parseEntry(entry: any): SearchResult | null {
|
|
34
|
+
const id = String(entry.id ?? "");
|
|
35
|
+
if (!id) return null;
|
|
36
|
+
const arxiv_id = id.split("/abs/")[1]?.replace(/v\d+$/, "") ?? id;
|
|
37
|
+
|
|
38
|
+
const title = String(entry.title ?? "").replace(/\s+/g, " ").trim();
|
|
39
|
+
const abstract = String(entry.summary ?? "").replace(/\s+/g, " ").trim();
|
|
40
|
+
const published_date = String(entry.published ?? "").substring(0, 10);
|
|
41
|
+
|
|
42
|
+
const authorRaw = entry.author;
|
|
43
|
+
const authorArray = Array.isArray(authorRaw) ? authorRaw : (authorRaw ? [authorRaw] : []);
|
|
44
|
+
const authors = authorArray.map((a: any) => String(a.name ?? "")).filter(Boolean);
|
|
45
|
+
|
|
46
|
+
const categoryRaw = entry.category;
|
|
47
|
+
const categoryArray = Array.isArray(categoryRaw) ? categoryRaw : (categoryRaw ? [categoryRaw] : []);
|
|
48
|
+
const venue = categoryArray.map((c: any) => c["@_term"]).filter(Boolean).join(", ");
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
source: "arxiv",
|
|
52
|
+
title,
|
|
53
|
+
authors,
|
|
54
|
+
abstract,
|
|
55
|
+
arxiv_id,
|
|
56
|
+
published_date,
|
|
57
|
+
venue: venue || "arXiv preprint",
|
|
58
|
+
url: id,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import { XMLParser } from "fast-xml-parser";
|
|
2
|
+
import type { SearchParams, SearchResult, SourceSearcher } from "../types.js";
|
|
3
|
+
|
|
4
|
+
const BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils";
|
|
5
|
+
const USER_AGENT = "scholar-mcp/0.2.1 (https://github.com/kak4343/scholar-mcp)";
|
|
6
|
+
|
|
7
|
+
export class PubMedSearcher implements SourceSearcher {
|
|
8
|
+
private apiKey?: string;
|
|
9
|
+
|
|
10
|
+
constructor(apiKey?: string) {
|
|
11
|
+
this.apiKey = apiKey;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
async search(params: SearchParams): Promise<SearchResult[]> {
|
|
15
|
+
const pmids = await this.esearch(params);
|
|
16
|
+
if (pmids.length === 0) return [];
|
|
17
|
+
return this.efetch(pmids);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
private async esearch(params: SearchParams): Promise<string[]> {
|
|
21
|
+
const url = new URL(`${BASE_URL}/esearch.fcgi`);
|
|
22
|
+
url.searchParams.set("db", "pubmed");
|
|
23
|
+
url.searchParams.set("term", this.buildTerm(params));
|
|
24
|
+
url.searchParams.set("retmax", String(params.max_results ?? 10));
|
|
25
|
+
url.searchParams.set("retmode", "json");
|
|
26
|
+
if (this.apiKey) url.searchParams.set("api_key", this.apiKey);
|
|
27
|
+
|
|
28
|
+
const r = await fetch(url, { headers: { "User-Agent": USER_AGENT } });
|
|
29
|
+
if (!r.ok) throw new Error(`PubMed esearch failed: ${r.status}`);
|
|
30
|
+
const data = await r.json() as { esearchresult?: { idlist?: string[] } };
|
|
31
|
+
return data.esearchresult?.idlist ?? [];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
private buildTerm(params: SearchParams): string {
|
|
35
|
+
let term = params.query;
|
|
36
|
+
if (params.date_from || params.date_to) {
|
|
37
|
+
const from = (params.date_from ?? "1900/01/01").replace(/-/g, "/");
|
|
38
|
+
const to = (params.date_to ?? "3000/01/01").replace(/-/g, "/");
|
|
39
|
+
term += ` AND (${from}[PDAT] : ${to}[PDAT])`;
|
|
40
|
+
}
|
|
41
|
+
return term;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
private async efetch(pmids: string[]): Promise<SearchResult[]> {
|
|
45
|
+
const url = new URL(`${BASE_URL}/efetch.fcgi`);
|
|
46
|
+
url.searchParams.set("db", "pubmed");
|
|
47
|
+
url.searchParams.set("id", pmids.join(","));
|
|
48
|
+
url.searchParams.set("retmode", "xml");
|
|
49
|
+
if (this.apiKey) url.searchParams.set("api_key", this.apiKey);
|
|
50
|
+
|
|
51
|
+
const r = await fetch(url, { headers: { "User-Agent": USER_AGENT } });
|
|
52
|
+
if (!r.ok) throw new Error(`PubMed efetch failed: ${r.status}`);
|
|
53
|
+
const xml = await r.text();
|
|
54
|
+
|
|
55
|
+
const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: "@_" });
|
|
56
|
+
const parsed = parser.parse(xml);
|
|
57
|
+
const articles = parsed.PubmedArticleSet?.PubmedArticle;
|
|
58
|
+
if (!articles) return [];
|
|
59
|
+
const articleArray = Array.isArray(articles) ? articles : [articles];
|
|
60
|
+
|
|
61
|
+
return articleArray.map((a: any) => this.parseArticle(a)).filter((r): r is SearchResult => r !== null);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
private parseArticle(article: any): SearchResult | null {
|
|
65
|
+
const medlineCitation = article.MedlineCitation;
|
|
66
|
+
if (!medlineCitation) return null;
|
|
67
|
+
|
|
68
|
+
const pmid = String(medlineCitation.PMID?.["#text"] ?? medlineCitation.PMID ?? "");
|
|
69
|
+
const articleData = medlineCitation.Article;
|
|
70
|
+
if (!articleData) return null;
|
|
71
|
+
|
|
72
|
+
const title = String(articleData.ArticleTitle?.["#text"] ?? articleData.ArticleTitle ?? "").trim();
|
|
73
|
+
|
|
74
|
+
const abstractText = articleData.Abstract?.AbstractText;
|
|
75
|
+
const abstract = this.flattenAbstract(abstractText);
|
|
76
|
+
|
|
77
|
+
const authorList = articleData.AuthorList?.Author;
|
|
78
|
+
const authors = this.parseAuthors(authorList);
|
|
79
|
+
|
|
80
|
+
const journal = articleData.Journal?.Title ?? articleData.Journal?.ISOAbbreviation ?? "";
|
|
81
|
+
const pubDate = articleData.Journal?.JournalIssue?.PubDate;
|
|
82
|
+
const published_date = this.parseDate(pubDate);
|
|
83
|
+
|
|
84
|
+
const doi = this.extractDoi(article);
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
source: "pubmed",
|
|
88
|
+
title,
|
|
89
|
+
authors,
|
|
90
|
+
abstract,
|
|
91
|
+
pmid,
|
|
92
|
+
doi,
|
|
93
|
+
published_date,
|
|
94
|
+
venue: journal,
|
|
95
|
+
url: `https://pubmed.ncbi.nlm.nih.gov/${pmid}/`,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
private flattenAbstract(abstractText: any): string {
|
|
100
|
+
if (!abstractText) return "";
|
|
101
|
+
if (typeof abstractText === "string") return abstractText;
|
|
102
|
+
if (Array.isArray(abstractText)) {
|
|
103
|
+
return abstractText.map((t: any) => this.flattenAbstract(t)).join(" ");
|
|
104
|
+
}
|
|
105
|
+
if (abstractText["#text"]) return String(abstractText["#text"]);
|
|
106
|
+
return "";
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
private parseAuthors(authorList: any): string[] {
|
|
110
|
+
if (!authorList) return [];
|
|
111
|
+
const arr = Array.isArray(authorList) ? authorList : [authorList];
|
|
112
|
+
return arr.map((a: any) => {
|
|
113
|
+
const last = a.LastName ?? "";
|
|
114
|
+
const initials = a.Initials ?? a.ForeName ?? "";
|
|
115
|
+
return `${last}${initials ? ` ${initials}` : ""}`.trim();
|
|
116
|
+
}).filter(Boolean);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
private parseDate(pubDate: any): string {
|
|
120
|
+
if (!pubDate) return "";
|
|
121
|
+
const year = pubDate.Year ?? "";
|
|
122
|
+
const month = pubDate.Month ?? "01";
|
|
123
|
+
const day = pubDate.Day ?? "01";
|
|
124
|
+
const monthNum = isNaN(Number(month)) ? this.monthNameToNum(month) : String(month).padStart(2, "0");
|
|
125
|
+
return year ? `${year}-${monthNum}-${String(day).padStart(2, "0")}` : "";
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
private monthNameToNum(month: string): string {
|
|
129
|
+
const map: Record<string, string> = {
|
|
130
|
+
Jan: "01", Feb: "02", Mar: "03", Apr: "04", May: "05", Jun: "06",
|
|
131
|
+
Jul: "07", Aug: "08", Sep: "09", Oct: "10", Nov: "11", Dec: "12",
|
|
132
|
+
};
|
|
133
|
+
return map[month] ?? "01";
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
private extractDoi(article: any): string | undefined {
|
|
137
|
+
const ids = article.PubmedData?.ArticleIdList?.ArticleId;
|
|
138
|
+
if (!ids) return undefined;
|
|
139
|
+
const arr = Array.isArray(ids) ? ids : [ids];
|
|
140
|
+
const doiEntry = arr.find((i: any) => i["@_IdType"] === "doi");
|
|
141
|
+
return doiEntry ? String(doiEntry["#text"] ?? doiEntry) : undefined;
|
|
142
|
+
}
|
|
143
|
+
}
|