@kak4343/scholar-mcp 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.ja.md +162 -0
- package/README.md +156 -0
- package/dist/cache/disk_cache.d.ts +27 -0
- package/dist/cache/disk_cache.js +82 -0
- package/dist/cache/disk_cache.js.map +1 -0
- package/dist/cache/index.d.ts +47 -0
- package/dist/cache/index.js +67 -0
- package/dist/cache/index.js.map +1 -0
- package/dist/cache/memory_cache.d.ts +17 -0
- package/dist/cache/memory_cache.js +36 -0
- package/dist/cache/memory_cache.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +166 -0
- package/dist/index.js.map +1 -0
- package/dist/notion/push.d.ts +43 -0
- package/dist/notion/push.js +190 -0
- package/dist/notion/push.js.map +1 -0
- package/dist/sources/arxiv.d.ts +5 -0
- package/dist/sources/arxiv.js +57 -0
- package/dist/sources/arxiv.js.map +1 -0
- package/dist/sources/pubmed.d.ts +15 -0
- package/dist/sources/pubmed.js +133 -0
- package/dist/sources/pubmed.js.map +1 -0
- package/dist/sources/semantic_scholar.d.ts +7 -0
- package/dist/sources/semantic_scholar.js +61 -0
- package/dist/sources/semantic_scholar.js.map +1 -0
- package/dist/types.d.ts +25 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +43 -0
- package/scripts/smoke_test.mjs +120 -0
- package/src/cache/disk_cache.ts +94 -0
- package/src/cache/index.ts +95 -0
- package/src/cache/memory_cache.ts +43 -0
- package/src/index.ts +212 -0
- package/src/notion/push.ts +237 -0
- package/src/sources/arxiv.ts +61 -0
- package/src/sources/pubmed.ts +143 -0
- package/src/sources/semantic_scholar.ts +67 -0
- package/src/types.ts +28 -0
- package/tsconfig.json +19 -0
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { SearchParams, SearchResult, SourceSearcher } from "../types.js";
|
|
2
|
+
export declare class SemanticScholarSearcher implements SourceSearcher {
|
|
3
|
+
private apiKey?;
|
|
4
|
+
constructor(apiKey?: string);
|
|
5
|
+
search(params: SearchParams): Promise<SearchResult[]>;
|
|
6
|
+
private parsePaper;
|
|
7
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
const BASE_URL = "https://api.semanticscholar.org/graph/v1";
|
|
2
|
+
const USER_AGENT = "scholar-mcp/0.2.1 (https://github.com/kak4343/scholar-mcp)";
|
|
3
|
+
const FIELDS = "title,authors,abstract,year,publicationDate,externalIds,citationCount,venue,url";
|
|
4
|
+
export class SemanticScholarSearcher {
|
|
5
|
+
apiKey;
|
|
6
|
+
constructor(apiKey) {
|
|
7
|
+
this.apiKey = apiKey;
|
|
8
|
+
}
|
|
9
|
+
async search(params) {
|
|
10
|
+
const url = new URL(`${BASE_URL}/paper/search`);
|
|
11
|
+
url.searchParams.set("query", params.query);
|
|
12
|
+
url.searchParams.set("limit", String(params.max_results ?? 10));
|
|
13
|
+
url.searchParams.set("fields", FIELDS);
|
|
14
|
+
if (params.date_from || params.date_to) {
|
|
15
|
+
const from = params.date_from ?? "1900-01-01";
|
|
16
|
+
const to = params.date_to ?? new Date().toISOString().substring(0, 10);
|
|
17
|
+
url.searchParams.set("publicationDateOrYear", `${from}:${to}`);
|
|
18
|
+
}
|
|
19
|
+
const headers = { "User-Agent": USER_AGENT };
|
|
20
|
+
if (this.apiKey)
|
|
21
|
+
headers["x-api-key"] = this.apiKey;
|
|
22
|
+
const r = await fetch(url, { headers });
|
|
23
|
+
if (!r.ok) {
|
|
24
|
+
if (r.status === 429)
|
|
25
|
+
throw new Error("Semantic Scholar rate limit exceeded");
|
|
26
|
+
throw new Error(`Semantic Scholar search failed: ${r.status}`);
|
|
27
|
+
}
|
|
28
|
+
const data = await r.json();
|
|
29
|
+
return (data.data ?? []).map((p) => this.parsePaper(p)).filter((r) => r !== null);
|
|
30
|
+
}
|
|
31
|
+
parsePaper(paper) {
|
|
32
|
+
const id = String(paper.paperId ?? "");
|
|
33
|
+
if (!id)
|
|
34
|
+
return null;
|
|
35
|
+
const title = String(paper.title ?? "").trim();
|
|
36
|
+
const abstract = String(paper.abstract ?? "").trim();
|
|
37
|
+
const authors = (paper.authors ?? []).map((a) => String(a.name ?? "")).filter(Boolean);
|
|
38
|
+
const published_date = String(paper.publicationDate ?? (paper.year ? `${paper.year}-01-01` : ""));
|
|
39
|
+
const doi = paper.externalIds?.DOI ? String(paper.externalIds.DOI) : undefined;
|
|
40
|
+
const pmid = paper.externalIds?.PubMed ? String(paper.externalIds.PubMed) : undefined;
|
|
41
|
+
const arxiv_id = paper.externalIds?.ArXiv ? String(paper.externalIds.ArXiv) : undefined;
|
|
42
|
+
const venue = String(paper.venue ?? "");
|
|
43
|
+
const citation_count = typeof paper.citationCount === "number" ? paper.citationCount : undefined;
|
|
44
|
+
const url = String(paper.url ?? `https://www.semanticscholar.org/paper/${id}`);
|
|
45
|
+
return {
|
|
46
|
+
source: "semantic_scholar",
|
|
47
|
+
title,
|
|
48
|
+
authors,
|
|
49
|
+
abstract,
|
|
50
|
+
doi,
|
|
51
|
+
pmid,
|
|
52
|
+
arxiv_id,
|
|
53
|
+
semantic_scholar_id: id,
|
|
54
|
+
published_date,
|
|
55
|
+
venue,
|
|
56
|
+
citation_count,
|
|
57
|
+
url,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
//# sourceMappingURL=semantic_scholar.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic_scholar.js","sourceRoot":"","sources":["../../src/sources/semantic_scholar.ts"],"names":[],"mappings":"AAEA,MAAM,QAAQ,GAAG,0CAA0C,CAAC;AAC5D,MAAM,UAAU,GAAG,4DAA4D,CAAC;AAChF,MAAM,MAAM,GAAG,iFAAiF,CAAC;AAEjG,MAAM,OAAO,uBAAuB;IAC1B,MAAM,CAAU;IAExB,YAAY,MAAe;QACzB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,MAAoB;QAC/B,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,GAAG,QAAQ,eAAe,CAAC,CAAC;QAChD,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QAC5C,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,CAAC;QAChE,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QACvC,IAAI,MAAM,CAAC,SAAS,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACvC,MAAM,IAAI,GAAG,MAAM,CAAC,SAAS,IAAI,YAAY,CAAC;YAC9C,MAAM,EAAE,GAAG,MAAM,CAAC,OAAO,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACvE,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,uBAAuB,EAAE,GAAG,IAAI,IAAI,EAAE,EAAE,CAAC,CAAC;QACjE,CAAC;QAED,MAAM,OAAO,GAA2B,EAAE,YAAY,EAAE,UAAU,EAAE,CAAC;QACrE,IAAI,IAAI,CAAC,MAAM;YAAE,OAAO,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC;QAEpD,MAAM,CAAC,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;QACxC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;YACV,IAAI,CAAC,CAAC,MAAM,KAAK,GAAG;gBAAE,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;YAC9E,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;QACjE,CAAC;QACD,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,IAAI,EAAsB,CAAC;QAChD,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAqB,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IAC5G,CAAC;IAEO,UAAU,CAAC,KAAU;QAC3B,MAAM,EAAE,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;QACvC,IAAI,CAAC,EAAE;YAAE,OAAO,IAAI,CAAC;QAErB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QAC/C,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QACrD,MAAM,OAAO,GAAG,CAAC,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC5F,MAAM,cAAc,GAAG,MAAM,CAAC,KAAK,CAAC,eAAe,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAClG,MAAM,GAAG,GAAG,KAAK,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAC/E,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QACtF,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QACxF,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;QACxC,MAAM,cAAc,GAAG,OAAO,KAAK,CAAC,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS,CAAC;QACjG,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,yCAAyC,EAAE,EAAE,CAAC,CAAC;QAE/E,OAAO;YACL,MAAM,EAAE,kBAAkB;YAC1B,KAAK;YACL,OAAO;YACP,QAAQ;YACR,GAAG;YACH,IAAI;YACJ,QAAQ;YACR,mBAAmB,EAAE,EAAE;YACvB,cAAc;YACd,KAAK;YACL,cAAc;YACd,GAAG;SACJ,CAAC;IACJ,CAAC;CACF"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export type Source = "pubmed" | "arxiv" | "semantic_scholar";
|
|
2
|
+
export interface SearchResult {
|
|
3
|
+
source: Source;
|
|
4
|
+
title: string;
|
|
5
|
+
authors: string[];
|
|
6
|
+
abstract: string;
|
|
7
|
+
doi?: string;
|
|
8
|
+
arxiv_id?: string;
|
|
9
|
+
pmid?: string;
|
|
10
|
+
semantic_scholar_id?: string;
|
|
11
|
+
published_date: string;
|
|
12
|
+
venue?: string;
|
|
13
|
+
citation_count?: number;
|
|
14
|
+
url: string;
|
|
15
|
+
}
|
|
16
|
+
export interface SearchParams {
|
|
17
|
+
query: string;
|
|
18
|
+
sources?: Source[];
|
|
19
|
+
max_results?: number;
|
|
20
|
+
date_from?: string;
|
|
21
|
+
date_to?: string;
|
|
22
|
+
}
|
|
23
|
+
export interface SourceSearcher {
|
|
24
|
+
search(params: SearchParams): Promise<SearchResult[]>;
|
|
25
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
|
package/package.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@kak4343/scholar-mcp",
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "Unified MCP server for PubMed + arXiv + Semantic Scholar scholarly search, with two-tier caching and Notion export.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"bin": {
|
|
8
|
+
"scholar-mcp": "dist/index.js"
|
|
9
|
+
},
|
|
10
|
+
"scripts": {
|
|
11
|
+
"build": "tsc",
|
|
12
|
+
"start": "node dist/index.js",
|
|
13
|
+
"dev": "tsc --watch",
|
|
14
|
+
"test": "node --test dist/tests/*.test.js"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"mcp",
|
|
18
|
+
"model-context-protocol",
|
|
19
|
+
"pubmed",
|
|
20
|
+
"arxiv",
|
|
21
|
+
"semantic-scholar",
|
|
22
|
+
"scholarly-search",
|
|
23
|
+
"claude",
|
|
24
|
+
"anthropic"
|
|
25
|
+
],
|
|
26
|
+
"author": "",
|
|
27
|
+
"license": "MIT",
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
30
|
+
"@notionhq/client": "^2.3.0",
|
|
31
|
+
"better-sqlite3": "^12.10.0",
|
|
32
|
+
"fast-xml-parser": "^4.5.0",
|
|
33
|
+
"lru-cache": "^11.5.1"
|
|
34
|
+
},
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"@types/better-sqlite3": "^7.6.13",
|
|
37
|
+
"@types/node": "^22.0.0",
|
|
38
|
+
"typescript": "^5.6.0"
|
|
39
|
+
},
|
|
40
|
+
"engines": {
|
|
41
|
+
"node": ">=18.0.0"
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* scholar-mcp v0.2 smoke test.
|
|
4
|
+
*
|
|
5
|
+
* Validates:
|
|
6
|
+
* 1. SearchCache: first lookup is a miss, second lookup is a hit, hit rate
|
|
7
|
+
* reflects the 1-of-2 ratio.
|
|
8
|
+
* 2. Cache key determinism: same inputs in different order produce the
|
|
9
|
+
* same sha256 key.
|
|
10
|
+
* 3. exportResultsToNotion dry-run: builds the expected Notion property
|
|
11
|
+
* payload without touching the network.
|
|
12
|
+
*
|
|
13
|
+
* Run: `npm run build && node scripts/smoke_test.mjs`
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { mkdtempSync, rmSync } from "node:fs";
|
|
17
|
+
import { tmpdir } from "node:os";
|
|
18
|
+
import { join } from "node:path";
|
|
19
|
+
|
|
20
|
+
import { SearchCache, buildCacheKey } from "../dist/cache/index.js";
|
|
21
|
+
import { exportResultsToNotion } from "../dist/notion/push.js";
|
|
22
|
+
|
|
23
|
+
const tmp = mkdtempSync(join(tmpdir(), "scholar-mcp-smoke-"));
|
|
24
|
+
const dbPath = join(tmp, "scholar_cache.db");
|
|
25
|
+
|
|
26
|
+
let failures = 0;
|
|
27
|
+
function assert(cond, msg) {
|
|
28
|
+
if (!cond) {
|
|
29
|
+
console.error(` FAIL: ${msg}`);
|
|
30
|
+
failures++;
|
|
31
|
+
} else {
|
|
32
|
+
console.error(` ok : ${msg}`);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
console.error("[1/3] Cache key determinism");
|
|
37
|
+
const k1 = buildCacheKey({
|
|
38
|
+
tool: "scholar_search",
|
|
39
|
+
query: "diabetic retinopathy",
|
|
40
|
+
sources: ["pubmed", "arxiv", "semantic_scholar"],
|
|
41
|
+
max_results: 10,
|
|
42
|
+
});
|
|
43
|
+
const k2 = buildCacheKey({
|
|
44
|
+
tool: "scholar_search",
|
|
45
|
+
query: "DIABETIC RETINOPATHY",
|
|
46
|
+
sources: ["semantic_scholar", "arxiv", "pubmed"],
|
|
47
|
+
max_results: 10,
|
|
48
|
+
});
|
|
49
|
+
assert(k1 === k2, "same inputs (different case/order) hash to same key");
|
|
50
|
+
assert(k1.length === 64, "sha256 produces 64-hex-char digest");
|
|
51
|
+
|
|
52
|
+
console.error("[2/3] SearchCache hit/miss");
|
|
53
|
+
const cache = new SearchCache({ diskPath: dbPath });
|
|
54
|
+
const key = buildCacheKey({
|
|
55
|
+
tool: "scholar_search",
|
|
56
|
+
query: "test query",
|
|
57
|
+
sources: ["pubmed"],
|
|
58
|
+
max_results: 5,
|
|
59
|
+
});
|
|
60
|
+
const hit0 = cache.get(key);
|
|
61
|
+
assert(hit0 === undefined, "first lookup is a miss");
|
|
62
|
+
cache.set(key, { results: [{ source: "pubmed", title: "Test Paper" }] });
|
|
63
|
+
const hit1 = cache.get(key);
|
|
64
|
+
assert(hit1 !== undefined, "second lookup is a hit");
|
|
65
|
+
assert(cache.hitRate() === 0.5, `hit rate is 0.5 (got ${cache.hitRate()})`);
|
|
66
|
+
|
|
67
|
+
// disk persistence: new in-memory cache, same disk path -> still hits
|
|
68
|
+
const cache2 = new SearchCache({ diskPath: dbPath });
|
|
69
|
+
const hit2 = cache2.get(key);
|
|
70
|
+
assert(hit2 !== undefined, "disk cache persists across SearchCache instances");
|
|
71
|
+
cache.close();
|
|
72
|
+
cache2.close();
|
|
73
|
+
|
|
74
|
+
console.error("[3/3] exportResultsToNotion dry-run schema");
|
|
75
|
+
const sampleResult = {
|
|
76
|
+
source: "pubmed",
|
|
77
|
+
title: "Sample paper on diabetic retinopathy",
|
|
78
|
+
authors: ["Yamada T", "Suzuki K"],
|
|
79
|
+
abstract: "Background. Methods. Results. Conclusions.",
|
|
80
|
+
doi: "10.1000/example.2026.001",
|
|
81
|
+
pmid: "12345678",
|
|
82
|
+
published_date: "2026-01-15",
|
|
83
|
+
venue: "JAMA Ophthalmology",
|
|
84
|
+
citation_count: 42,
|
|
85
|
+
url: "https://pubmed.ncbi.nlm.nih.gov/12345678/",
|
|
86
|
+
};
|
|
87
|
+
const dry = await exportResultsToNotion({
|
|
88
|
+
results: [sampleResult],
|
|
89
|
+
notion_database_id: "0a489d15-83e8-471d-ba1e-f04030473967",
|
|
90
|
+
include_japanese_summary: false,
|
|
91
|
+
dry_run: true,
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
assert(dry.dry_run === true, "dry_run flag echoed in output");
|
|
95
|
+
assert(dry.total === 1, "total = 1");
|
|
96
|
+
assert(dry.created === 0, "created = 0 in dry run");
|
|
97
|
+
assert(dry.pages.length === 1, "one page payload returned");
|
|
98
|
+
const p = dry.pages[0];
|
|
99
|
+
assert(p.status === "dry_run", "page status = dry_run");
|
|
100
|
+
assert(p.properties.Title !== undefined, "Title property present");
|
|
101
|
+
assert(p.properties.Authors !== undefined, "Authors property present");
|
|
102
|
+
assert(p.properties.DOI !== undefined && p.properties.DOI.url.includes("doi.org"), "DOI normalised to URL");
|
|
103
|
+
assert(p.properties.Source.select.name === "pubmed", "Source select = pubmed");
|
|
104
|
+
assert(p.properties.Published.date.start === "2026-01-15", "Published date passed through");
|
|
105
|
+
assert(p.properties.Venue !== undefined, "Venue rich_text present");
|
|
106
|
+
assert(p.properties["Citation Count"].number === 42, "Citation Count = 42");
|
|
107
|
+
assert(p.properties.Abstract !== undefined, "Abstract rich_text present");
|
|
108
|
+
assert(p.properties.Status.select.name === "To Read", "Status defaults to 'To Read'");
|
|
109
|
+
assert(p.properties["Japanese Summary"] === undefined, "Japanese Summary omitted when not requested");
|
|
110
|
+
|
|
111
|
+
// teardown
|
|
112
|
+
rmSync(tmp, { recursive: true, force: true });
|
|
113
|
+
|
|
114
|
+
if (failures === 0) {
|
|
115
|
+
console.error("\nAll smoke checks passed.");
|
|
116
|
+
process.exit(0);
|
|
117
|
+
} else {
|
|
118
|
+
console.error(`\n${failures} check(s) failed.`);
|
|
119
|
+
process.exit(1);
|
|
120
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import Database from "better-sqlite3";
|
|
2
|
+
import { mkdirSync } from "node:fs";
|
|
3
|
+
import { homedir } from "node:os";
|
|
4
|
+
import { dirname, join } from "node:path";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Persistent SQLite-backed cache for scholar_search results.
|
|
8
|
+
*
|
|
9
|
+
* Default location: ~/.scholar-mcp/cache/scholar_cache.db
|
|
10
|
+
* Default TTL: 7 days
|
|
11
|
+
*
|
|
12
|
+
* Schema:
|
|
13
|
+
* key TEXT PRIMARY KEY -- sha256 of the canonical request
|
|
14
|
+
* value TEXT NOT NULL -- JSON-serialised payload
|
|
15
|
+
* created INTEGER NOT NULL -- epoch ms of insertion
|
|
16
|
+
* expires INTEGER NOT NULL -- epoch ms after which the row is stale
|
|
17
|
+
*/
|
|
18
|
+
export class DiskCache<T extends object = object> {
|
|
19
|
+
private db: Database.Database;
|
|
20
|
+
private ttlMs: number;
|
|
21
|
+
|
|
22
|
+
constructor(dbPath?: string, ttlMs = 7 * 24 * 60 * 60 * 1000) {
|
|
23
|
+
const resolved = dbPath ?? join(homedir(), ".scholar-mcp", "cache", "scholar_cache.db");
|
|
24
|
+
mkdirSync(dirname(resolved), { recursive: true });
|
|
25
|
+
this.db = new Database(resolved);
|
|
26
|
+
this.ttlMs = ttlMs;
|
|
27
|
+
this.init();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
private init(): void {
|
|
31
|
+
this.db.pragma("journal_mode = WAL");
|
|
32
|
+
this.db.exec(`
|
|
33
|
+
CREATE TABLE IF NOT EXISTS cache (
|
|
34
|
+
key TEXT PRIMARY KEY,
|
|
35
|
+
value TEXT NOT NULL,
|
|
36
|
+
created INTEGER NOT NULL,
|
|
37
|
+
expires INTEGER NOT NULL
|
|
38
|
+
);
|
|
39
|
+
CREATE INDEX IF NOT EXISTS idx_cache_expires ON cache(expires);
|
|
40
|
+
`);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
get(key: string): T | undefined {
|
|
44
|
+
const row = this.db
|
|
45
|
+
.prepare<[string, number], { value: string }>(
|
|
46
|
+
"SELECT value FROM cache WHERE key = ? AND expires > ?"
|
|
47
|
+
)
|
|
48
|
+
.get(key, Date.now());
|
|
49
|
+
if (!row) return undefined;
|
|
50
|
+
try {
|
|
51
|
+
return JSON.parse(row.value) as T;
|
|
52
|
+
} catch {
|
|
53
|
+
return undefined;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
set(key: string, value: T): void {
|
|
58
|
+
const now = Date.now();
|
|
59
|
+
this.db
|
|
60
|
+
.prepare(
|
|
61
|
+
"INSERT OR REPLACE INTO cache (key, value, created, expires) VALUES (?, ?, ?, ?)"
|
|
62
|
+
)
|
|
63
|
+
.run(key, JSON.stringify(value), now, now + this.ttlMs);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
has(key: string): boolean {
|
|
67
|
+
return this.get(key) !== undefined;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
delete(key: string): void {
|
|
71
|
+
this.db.prepare("DELETE FROM cache WHERE key = ?").run(key);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** Remove all expired rows. Call periodically to keep the DB small. */
|
|
75
|
+
prune(): number {
|
|
76
|
+
const result = this.db.prepare("DELETE FROM cache WHERE expires <= ?").run(Date.now());
|
|
77
|
+
return Number(result.changes);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
clear(): void {
|
|
81
|
+
this.db.exec("DELETE FROM cache");
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
get size(): number {
|
|
85
|
+
const row = this.db
|
|
86
|
+
.prepare<[number], { c: number }>("SELECT COUNT(*) AS c FROM cache WHERE expires > ?")
|
|
87
|
+
.get(Date.now());
|
|
88
|
+
return row?.c ?? 0;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
close(): void {
|
|
92
|
+
this.db.close();
|
|
93
|
+
}
|
|
94
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import type { Source } from "../types.js";
|
|
3
|
+
import { MemoryCache } from "./memory_cache.js";
|
|
4
|
+
import { DiskCache } from "./disk_cache.js";
|
|
5
|
+
|
|
6
|
+
export { MemoryCache } from "./memory_cache.js";
|
|
7
|
+
export { DiskCache } from "./disk_cache.js";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Canonical key inputs. `sources` is sorted before hashing so that
|
|
11
|
+
* ["arxiv", "pubmed"] and ["pubmed", "arxiv"] map to the same cache entry.
|
|
12
|
+
*/
|
|
13
|
+
export interface CacheKeyInput {
|
|
14
|
+
tool: string;
|
|
15
|
+
query: string;
|
|
16
|
+
sources: Source[];
|
|
17
|
+
date_from?: string;
|
|
18
|
+
date_to?: string;
|
|
19
|
+
max_results?: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function buildCacheKey(input: CacheKeyInput): string {
|
|
23
|
+
const canonical = {
|
|
24
|
+
tool: input.tool,
|
|
25
|
+
query: input.query.trim().toLowerCase(),
|
|
26
|
+
sources: [...input.sources].sort(),
|
|
27
|
+
date_from: input.date_from ?? null,
|
|
28
|
+
date_to: input.date_to ?? null,
|
|
29
|
+
max_results: input.max_results ?? null,
|
|
30
|
+
};
|
|
31
|
+
return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Two-tier cache helper: memory first, then disk. On a disk hit the
|
|
36
|
+
* value is promoted back into memory so subsequent lookups are fast.
|
|
37
|
+
*
|
|
38
|
+
* Stats lets the caller report a cache_hit_rate metric.
|
|
39
|
+
*/
|
|
40
|
+
export class SearchCache<T extends object = object> {
|
|
41
|
+
readonly memory: MemoryCache<T>;
|
|
42
|
+
readonly disk: DiskCache<T>;
|
|
43
|
+
private hits = 0;
|
|
44
|
+
private misses = 0;
|
|
45
|
+
|
|
46
|
+
constructor(opts: {
|
|
47
|
+
memoryMaxEntries?: number;
|
|
48
|
+
memoryTtlMs?: number;
|
|
49
|
+
diskPath?: string;
|
|
50
|
+
diskTtlMs?: number;
|
|
51
|
+
} = {}) {
|
|
52
|
+
this.memory = new MemoryCache<T>(opts.memoryMaxEntries, opts.memoryTtlMs);
|
|
53
|
+
this.disk = new DiskCache<T>(opts.diskPath, opts.diskTtlMs);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
get(key: string): T | undefined {
|
|
57
|
+
const m = this.memory.get(key);
|
|
58
|
+
if (m !== undefined) {
|
|
59
|
+
this.hits++;
|
|
60
|
+
return m;
|
|
61
|
+
}
|
|
62
|
+
const d = this.disk.get(key);
|
|
63
|
+
if (d !== undefined) {
|
|
64
|
+
this.memory.set(key, d);
|
|
65
|
+
this.hits++;
|
|
66
|
+
return d;
|
|
67
|
+
}
|
|
68
|
+
this.misses++;
|
|
69
|
+
return undefined;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
set(key: string, value: T): void {
|
|
73
|
+
this.memory.set(key, value);
|
|
74
|
+
this.disk.set(key, value);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/** Hit rate over the lifetime of this cache instance. */
|
|
78
|
+
hitRate(): number {
|
|
79
|
+
const total = this.hits + this.misses;
|
|
80
|
+
return total === 0 ? 0 : this.hits / total;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
stats(): { hits: number; misses: number; rate: number } {
|
|
84
|
+
return { hits: this.hits, misses: this.misses, rate: this.hitRate() };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
resetStats(): void {
|
|
88
|
+
this.hits = 0;
|
|
89
|
+
this.misses = 0;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
close(): void {
|
|
93
|
+
this.disk.close();
|
|
94
|
+
}
|
|
95
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { LRUCache } from "lru-cache";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* In-process LRU cache for scholar_search results.
|
|
5
|
+
*
|
|
6
|
+
* - Capacity: 1000 entries
|
|
7
|
+
* - TTL: 1 hour
|
|
8
|
+
* - Stores arbitrary JSON-serialisable values keyed by sha256 hash
|
|
9
|
+
*/
|
|
10
|
+
export class MemoryCache<T extends object = object> {
|
|
11
|
+
private cache: LRUCache<string, T>;
|
|
12
|
+
|
|
13
|
+
constructor(maxEntries = 1000, ttlMs = 60 * 60 * 1000) {
|
|
14
|
+
this.cache = new LRUCache<string, T>({
|
|
15
|
+
max: maxEntries,
|
|
16
|
+
ttl: ttlMs,
|
|
17
|
+
});
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
get(key: string): T | undefined {
|
|
21
|
+
return this.cache.get(key);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
set(key: string, value: T): void {
|
|
25
|
+
this.cache.set(key, value);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
has(key: string): boolean {
|
|
29
|
+
return this.cache.has(key);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
delete(key: string): boolean {
|
|
33
|
+
return this.cache.delete(key);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
clear(): void {
|
|
37
|
+
this.cache.clear();
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
get size(): number {
|
|
41
|
+
return this.cache.size;
|
|
42
|
+
}
|
|
43
|
+
}
|