@syndash/research-vault-mcp 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -34
- package/bin/research-vault-mcp.mjs +37 -21
- package/dist/server.js +1154 -0
- package/package.json +6 -4
- package/src/ingest/html.ts +42 -0
- package/src/server.ts +11 -6
package/dist/server.js
ADDED
|
@@ -0,0 +1,1154 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
var __require = import.meta.require;
|
|
3
|
+
|
|
4
|
+
// src/vault.ts
|
|
5
|
+
import { readFileSync, readdirSync, existsSync, statSync } from "fs";
|
|
6
|
+
import { join, basename } from "path";
|
|
7
|
+
import { homedir } from "os";
|
|
8
|
+
var VAULT_ROOT = process.env.VAULT_ROOT ?? `${homedir()}/Documents/Evensong/research-vault`;
|
|
9
|
+
var KNOWLEDGE_DIR = join(VAULT_ROOT, "knowledge");
|
|
10
|
+
var RAW_DIR = join(VAULT_ROOT, "raw");
|
|
11
|
+
var DECAY_PATH = join(VAULT_ROOT, ".meta", "decay-scores.json");
|
|
12
|
+
var TAXONOMY_PATH = join(VAULT_ROOT, "knowledge", "_taxonomy.md");
|
|
13
|
+
function normalizeId(raw) {
|
|
14
|
+
return raw.replace(/^\d{8}--?\d{4}-/, "").replace(/^(\d{10,})--?/, "").replace(/\.md$/, "");
|
|
15
|
+
}
|
|
16
|
+
function loadDecayScores() {
|
|
17
|
+
try {
|
|
18
|
+
return JSON.parse(readFileSync(DECAY_PATH, "utf-8"));
|
|
19
|
+
} catch {
|
|
20
|
+
return [];
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
function loadTaxonomy() {
|
|
24
|
+
try {
|
|
25
|
+
return readFileSync(TAXONOMY_PATH, "utf-8");
|
|
26
|
+
} catch {
|
|
27
|
+
return "";
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
function loadFileMeta(filePath) {
|
|
31
|
+
try {
|
|
32
|
+
const content = readFileSync(filePath, "utf-8");
|
|
33
|
+
const lines = content.split(`
|
|
34
|
+
`);
|
|
35
|
+
let title = "";
|
|
36
|
+
for (const line of lines.slice(0, 30)) {
|
|
37
|
+
const m = line.match(/^#\s+(.+)/);
|
|
38
|
+
if (m) {
|
|
39
|
+
title = m[1];
|
|
40
|
+
break;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
const s = statSync(filePath);
|
|
44
|
+
return {
|
|
45
|
+
title: title || normalizeId(basename(filePath)),
|
|
46
|
+
modified: s.mtime.toISOString(),
|
|
47
|
+
size: s.size
|
|
48
|
+
};
|
|
49
|
+
} catch {
|
|
50
|
+
return { title: normalizeId(basename(filePath)), modified: "", size: 0 };
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
function scanKnowledge() {
|
|
54
|
+
const entries = [];
|
|
55
|
+
if (!existsSync(KNOWLEDGE_DIR))
|
|
56
|
+
return entries;
|
|
57
|
+
const categories = readdirSync(KNOWLEDGE_DIR);
|
|
58
|
+
for (const cat of categories) {
|
|
59
|
+
if (cat.startsWith("_"))
|
|
60
|
+
continue;
|
|
61
|
+
const catPath = join(KNOWLEDGE_DIR, cat);
|
|
62
|
+
if (!existsSync(catPath) || !statSync(catPath).isDirectory())
|
|
63
|
+
continue;
|
|
64
|
+
const subEntries = readdirSync(catPath);
|
|
65
|
+
for (const sub of subEntries) {
|
|
66
|
+
const subPath = join(catPath, sub);
|
|
67
|
+
const subStat = statSync(subPath);
|
|
68
|
+
if (subStat.isDirectory()) {
|
|
69
|
+
const files = readdirSync(subPath).filter((f) => f.endsWith(".md"));
|
|
70
|
+
for (const file of files) {
|
|
71
|
+
const fp = join(subPath, file);
|
|
72
|
+
const meta = loadFileMeta(fp);
|
|
73
|
+
entries.push({
|
|
74
|
+
id: normalizeId(file),
|
|
75
|
+
title: meta.title,
|
|
76
|
+
category: `${cat}/${sub}`,
|
|
77
|
+
path: fp,
|
|
78
|
+
modified: meta.modified,
|
|
79
|
+
size: meta.size
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
} else if (sub.endsWith(".md")) {
|
|
83
|
+
const meta = loadFileMeta(subPath);
|
|
84
|
+
entries.push({
|
|
85
|
+
id: normalizeId(sub),
|
|
86
|
+
title: meta.title,
|
|
87
|
+
category: cat,
|
|
88
|
+
path: subPath,
|
|
89
|
+
modified: meta.modified,
|
|
90
|
+
size: meta.size
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return entries;
|
|
96
|
+
}
|
|
97
|
+
function scanRaw() {
|
|
98
|
+
const pending = [];
|
|
99
|
+
if (!existsSync(RAW_DIR))
|
|
100
|
+
return pending;
|
|
101
|
+
try {
|
|
102
|
+
const entries = readdirSync(RAW_DIR);
|
|
103
|
+
for (const entry of entries) {
|
|
104
|
+
if (entry === "_inbox") {
|
|
105
|
+
const inbox = join(RAW_DIR, entry);
|
|
106
|
+
if (existsSync(inbox)) {
|
|
107
|
+
pending.push(...readdirSync(inbox).filter((f) => /\.(md|pdf|txt)$/.test(f)));
|
|
108
|
+
}
|
|
109
|
+
} else if (/^\d{4}-\d{2}$/.test(entry)) {
|
|
110
|
+
const monthDir = join(RAW_DIR, entry);
|
|
111
|
+
if (existsSync(monthDir)) {
|
|
112
|
+
pending.push(...readdirSync(monthDir).filter((f) => /\.(md|pdf|txt)$/.test(f)).map((f) => `${entry}/${f}`));
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
} catch {}
|
|
117
|
+
return pending;
|
|
118
|
+
}
|
|
119
|
+
var vaultTools = [
|
|
120
|
+
{
|
|
121
|
+
name: "vault_search",
|
|
122
|
+
description: "Search the Research Vault knowledge base. Returns analyzed papers with retention scores.",
|
|
123
|
+
inputSchema: {
|
|
124
|
+
type: "object",
|
|
125
|
+
properties: {
|
|
126
|
+
query: { type: "string", description: "Search query (matches title, category)" },
|
|
127
|
+
category: { type: "string", description: 'Filter by category (e.g., "ai-agents/benchmarking")' },
|
|
128
|
+
limit: { type: "number", description: "Max results (default 10)" }
|
|
129
|
+
}
|
|
130
|
+
},
|
|
131
|
+
call: async ({ query, category, limit = 10 }) => {
|
|
132
|
+
let items = scanKnowledge();
|
|
133
|
+
const scores = loadDecayScores();
|
|
134
|
+
const scoreMap = new Map(scores.map((s) => [normalizeId(s.itemId), s]));
|
|
135
|
+
if (category) {
|
|
136
|
+
items = items.filter((item) => item.category === category || item.category.startsWith(category + "/"));
|
|
137
|
+
}
|
|
138
|
+
if (query) {
|
|
139
|
+
const q = query.toLowerCase();
|
|
140
|
+
items = items.filter((item) => item.title.toLowerCase().includes(q) || item.id.toLowerCase().includes(q) || item.category.toLowerCase().includes(q));
|
|
141
|
+
}
|
|
142
|
+
const results = items.slice(0, limit).map((item) => {
|
|
143
|
+
const sid = item.id.replace(/--/g, "-");
|
|
144
|
+
const score = scoreMap.get(item.id) || scoreMap.get(sid);
|
|
145
|
+
return {
|
|
146
|
+
id: item.id,
|
|
147
|
+
title: item.title,
|
|
148
|
+
category: item.category,
|
|
149
|
+
score: score?.score ?? null,
|
|
150
|
+
summaryLevel: score?.summaryLevel ?? null,
|
|
151
|
+
nextReview: score?.nextReviewAt ?? null,
|
|
152
|
+
accessCount: score?.accessCount ?? 0,
|
|
153
|
+
modified: item.modified
|
|
154
|
+
};
|
|
155
|
+
});
|
|
156
|
+
return {
|
|
157
|
+
content: [{
|
|
158
|
+
type: "text",
|
|
159
|
+
text: JSON.stringify({ query, category, results, total: results.length }, null, 2)
|
|
160
|
+
}]
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
name: "vault_status",
|
|
166
|
+
description: "Get Research Vault health \u2014 item counts by decay level, top/bottom retention.",
|
|
167
|
+
inputSchema: { type: "object", properties: {} },
|
|
168
|
+
call: async () => {
|
|
169
|
+
const scores = loadDecayScores();
|
|
170
|
+
const entries = scanKnowledge();
|
|
171
|
+
const deep = scores.filter((s) => s.summaryLevel === "deep");
|
|
172
|
+
const shallow = scores.filter((s) => s.summaryLevel === "shallow");
|
|
173
|
+
const none = scores.filter((s) => s.summaryLevel === "none");
|
|
174
|
+
const sorted = [...scores].sort((a, b) => b.score - a.score);
|
|
175
|
+
const top5 = sorted.slice(0, 5).map((s) => {
|
|
176
|
+
const sid = s.itemId.replace(/--/g, "-");
|
|
177
|
+
const entry = entries.find((e) => normalizeId(e.id) === normalizeId(s.itemId) || normalizeId(e.id) === normalizeId(sid));
|
|
178
|
+
return { itemId: s.itemId, score: s.score, accesses: s.accessCount, title: entry?.title || s.itemId };
|
|
179
|
+
});
|
|
180
|
+
const bottom5 = sorted.slice(-5).reverse().map((s) => {
|
|
181
|
+
const sid = s.itemId.replace(/--/g, "-");
|
|
182
|
+
const entry = entries.find((e) => normalizeId(e.id) === normalizeId(s.itemId) || normalizeId(e.id) === normalizeId(sid));
|
|
183
|
+
return { itemId: s.itemId, score: s.score, lastAccess: s.lastAccess.slice(0, 10), title: entry?.title || s.itemId };
|
|
184
|
+
});
|
|
185
|
+
const pending = scanRaw();
|
|
186
|
+
return {
|
|
187
|
+
content: [{
|
|
188
|
+
type: "text",
|
|
189
|
+
text: JSON.stringify({
|
|
190
|
+
total: entries.length,
|
|
191
|
+
analyzed: scores.length,
|
|
192
|
+
deep: deep.length,
|
|
193
|
+
shallow: shallow.length,
|
|
194
|
+
dormant: none.length,
|
|
195
|
+
pending_raw: pending.length,
|
|
196
|
+
top5,
|
|
197
|
+
bottom5
|
|
198
|
+
}, null, 2)
|
|
199
|
+
}]
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
name: "vault_batch_analyze",
|
|
205
|
+
description: "Check batch analyze status and pending papers in the raw queue.",
|
|
206
|
+
inputSchema: {
|
|
207
|
+
type: "object",
|
|
208
|
+
properties: {
|
|
209
|
+
count: { type: "number", description: "Preview N papers (default 5)" }
|
|
210
|
+
}
|
|
211
|
+
},
|
|
212
|
+
call: async ({ count = 5 } = {}) => {
|
|
213
|
+
const pending = scanRaw();
|
|
214
|
+
const entries = scanKnowledge();
|
|
215
|
+
const analyzedIds = new Set(entries.map((e) => normalizeId(e.id)));
|
|
216
|
+
const unanalyzed = pending.filter((p) => {
|
|
217
|
+
const id = normalizeId(p);
|
|
218
|
+
return !analyzedIds.has(id);
|
|
219
|
+
});
|
|
220
|
+
if (unanalyzed.length === 0) {
|
|
221
|
+
return { content: [{ type: "text", text: JSON.stringify({ message: "Queue empty \u2014 all papers analyzed", analyzed: entries.length }) }] };
|
|
222
|
+
}
|
|
223
|
+
return {
|
|
224
|
+
content: [{
|
|
225
|
+
type: "text",
|
|
226
|
+
text: JSON.stringify({
|
|
227
|
+
message: `${unanalyzed.length} papers pending analysis`,
|
|
228
|
+
pending: unanalyzed.length,
|
|
229
|
+
preview: unanalyzed.slice(0, count),
|
|
230
|
+
hint: "cd ~/Desktop/research-vault && bun run scripts/batch-analyze.ts --count N"
|
|
231
|
+
}, null, 2)
|
|
232
|
+
}]
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
name: "vault_taxonomy",
|
|
238
|
+
description: "Get the Research Vault taxonomy \u2014 all categories and counts.",
|
|
239
|
+
inputSchema: { type: "object", properties: {} },
|
|
240
|
+
call: async () => {
|
|
241
|
+
const taxonomy = loadTaxonomy();
|
|
242
|
+
const entries = scanKnowledge();
|
|
243
|
+
const catCounts = {};
|
|
244
|
+
for (const e of entries)
|
|
245
|
+
catCounts[e.category] = (catCounts[e.category] || 0) + 1;
|
|
246
|
+
return {
|
|
247
|
+
content: [{
|
|
248
|
+
type: "text",
|
|
249
|
+
text: JSON.stringify({ taxonomy, categories: catCounts }, null, 2)
|
|
250
|
+
}]
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
];
|
|
255
|
+
|
|
256
|
+
// src/vault_write.ts
|
|
257
|
+
import { readFileSync as readFileSync3, writeFileSync as writeFileSync2, existsSync as existsSync3, statSync as statSync2, mkdirSync as mkdirSync2, unlinkSync, realpathSync, readdirSync as readdirSync2 } from "fs";
|
|
258
|
+
import { join as join3, dirname, basename as basename2, resolve as pathResolve } from "path";
|
|
259
|
+
import { homedir as homedir2 } from "os";
|
|
260
|
+
|
|
261
|
+
// src/vault_jobs.ts
|
|
262
|
+
import { readFileSync as readFileSync2, writeFileSync, existsSync as existsSync2, mkdirSync } from "fs";
|
|
263
|
+
import { join as join2 } from "path";
|
|
264
|
+
import { createHash, randomUUID } from "crypto";
|
|
265
|
+
var JOBS_FILE = "ingest-jobs.json";
|
|
266
|
+
var CHECKSUMS_FILE = "checksums.json";
|
|
267
|
+
|
|
268
|
+
class IngestJobStore {
|
|
269
|
+
vaultRoot;
|
|
270
|
+
metaDir;
|
|
271
|
+
constructor(vaultRoot) {
|
|
272
|
+
this.vaultRoot = vaultRoot;
|
|
273
|
+
this.metaDir = join2(this.vaultRoot, ".meta");
|
|
274
|
+
if (!existsSync2(this.metaDir))
|
|
275
|
+
mkdirSync(this.metaDir, { recursive: true });
|
|
276
|
+
}
|
|
277
|
+
jobsPath() {
|
|
278
|
+
return join2(this.metaDir, JOBS_FILE);
|
|
279
|
+
}
|
|
280
|
+
checksumsPath() {
|
|
281
|
+
return join2(this.metaDir, CHECKSUMS_FILE);
|
|
282
|
+
}
|
|
283
|
+
loadJobs() {
|
|
284
|
+
try {
|
|
285
|
+
return JSON.parse(readFileSync2(this.jobsPath(), "utf-8"));
|
|
286
|
+
} catch {
|
|
287
|
+
return {};
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
saveJobs(jobs) {
|
|
291
|
+
writeFileSync(this.jobsPath(), JSON.stringify(jobs, null, 2), "utf-8");
|
|
292
|
+
}
|
|
293
|
+
loadChecksums() {
|
|
294
|
+
try {
|
|
295
|
+
return JSON.parse(readFileSync2(this.checksumsPath(), "utf-8"));
|
|
296
|
+
} catch {
|
|
297
|
+
return {};
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
saveChecksums(store) {
|
|
301
|
+
writeFileSync(this.checksumsPath(), JSON.stringify(store, null, 2), "utf-8");
|
|
302
|
+
}
|
|
303
|
+
async createJob(input) {
|
|
304
|
+
const jobs = this.loadJobs();
|
|
305
|
+
const now = new Date().toISOString();
|
|
306
|
+
const job = {
|
|
307
|
+
jobId: randomUUID(),
|
|
308
|
+
source: input.source,
|
|
309
|
+
value: input.value,
|
|
310
|
+
category: input.category ?? "inbox",
|
|
311
|
+
status: "queued",
|
|
312
|
+
rawPath: null,
|
|
313
|
+
metadata: null,
|
|
314
|
+
createdAt: now,
|
|
315
|
+
updatedAt: now
|
|
316
|
+
};
|
|
317
|
+
jobs[job.jobId] = job;
|
|
318
|
+
this.saveJobs(jobs);
|
|
319
|
+
return job;
|
|
320
|
+
}
|
|
321
|
+
async getJob(jobId) {
|
|
322
|
+
return this.loadJobs()[jobId] ?? null;
|
|
323
|
+
}
|
|
324
|
+
async updateJob(jobId, updates) {
|
|
325
|
+
const jobs = this.loadJobs();
|
|
326
|
+
const job = jobs[jobId];
|
|
327
|
+
if (!job)
|
|
328
|
+
return null;
|
|
329
|
+
jobs[jobId] = { ...job, ...updates, updatedAt: new Date().toISOString() };
|
|
330
|
+
this.saveJobs(jobs);
|
|
331
|
+
return jobs[jobId];
|
|
332
|
+
}
|
|
333
|
+
async getAllJobs() {
|
|
334
|
+
return Object.values(this.loadJobs());
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
async function computeChecksum(filePath) {
|
|
338
|
+
const file = Bun.file(filePath);
|
|
339
|
+
const buffer = await file.arrayBuffer();
|
|
340
|
+
const hash = createHash("sha256");
|
|
341
|
+
hash.update(Buffer.from(buffer));
|
|
342
|
+
return hash.digest("hex");
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// src/ingest/arxiv.ts
|
|
346
|
+
var ARXIV_API = "https://export.arxiv.org/api/query";
|
|
347
|
+
function parseArxivId(value) {
|
|
348
|
+
if (/^\d{4}\.\d{4,}(v\d+)?$/.test(value.trim())) {
|
|
349
|
+
return value.trim();
|
|
350
|
+
}
|
|
351
|
+
const m = value.match(/(?:arxiv\.org\/abs\/|abs\/?)(\d{4}\.\d{4,}(?:v\d+)?)/i);
|
|
352
|
+
return m ? m[1] : null;
|
|
353
|
+
}
|
|
354
|
+
async function fetchArxivMetadata(id) {
|
|
355
|
+
const url = `${ARXIV_API}?id_list=${id}`;
|
|
356
|
+
const res = await fetch(url);
|
|
357
|
+
if (!res.ok)
|
|
358
|
+
throw new Error(`ArXiv API error: ${res.status}`);
|
|
359
|
+
const xml = await res.text();
|
|
360
|
+
return parseArxivXml(xml);
|
|
361
|
+
}
|
|
362
|
+
function parseArxivXml(xml) {
|
|
363
|
+
const titleMatch = xml.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
364
|
+
const title = titleMatch ? titleMatch[1].replace(/\s+/g, " ").trim() : null;
|
|
365
|
+
const summaryMatch = xml.match(/<summary[^>]*>([\s\S]*?)<\/summary>/i);
|
|
366
|
+
const abstract = summaryMatch ? summaryMatch[1].replace(/\s+/g, " ").trim() : null;
|
|
367
|
+
const authors = [];
|
|
368
|
+
const authorRe = /<author>[\s\S]*?<name>([\s\S]*?)<\/name>[\s\S]*?<\/author>/gi;
|
|
369
|
+
let m;
|
|
370
|
+
while ((m = authorRe.exec(xml)) !== null) {
|
|
371
|
+
authors.push(m[1].replace(/\s+/g, " ").trim());
|
|
372
|
+
}
|
|
373
|
+
const categories = [];
|
|
374
|
+
const catRe = /<category[^>]*term="([^"]+)"/gi;
|
|
375
|
+
while ((m = catRe.exec(xml)) !== null)
|
|
376
|
+
categories.push(m[1]);
|
|
377
|
+
return {
|
|
378
|
+
title,
|
|
379
|
+
authors: authors.length ? authors : null,
|
|
380
|
+
abstract,
|
|
381
|
+
arxivId: null,
|
|
382
|
+
categories: categories.length ? categories : null
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// src/ingest/html.ts
|
|
387
|
+
function validateUrl(url) {
|
|
388
|
+
let parsed;
|
|
389
|
+
try {
|
|
390
|
+
parsed = new URL(url);
|
|
391
|
+
} catch {
|
|
392
|
+
throw new Error(`Invalid URL: ${url}`);
|
|
393
|
+
}
|
|
394
|
+
const scheme = parsed.protocol.toLowerCase();
|
|
395
|
+
if (scheme !== "http:" && scheme !== "https:") {
|
|
396
|
+
throw new Error(`URL scheme not allowed: ${scheme}. Only http/https permitted.`);
|
|
397
|
+
}
|
|
398
|
+
const hostname = parsed.hostname.toLowerCase();
|
|
399
|
+
if (hostname === "169.254.169.254" || hostname === "metadata.google.internal") {
|
|
400
|
+
throw new Error(`Cloud metadata endpoint blocked: ${hostname}`);
|
|
401
|
+
}
|
|
402
|
+
if (hostname === "localhost" || hostname === "127.0.0.1" || hostname === "::1" || hostname === "[::1]") {
|
|
403
|
+
throw new Error(`Localhost not permitted: ${hostname}`);
|
|
404
|
+
}
|
|
405
|
+
const ip = hostname;
|
|
406
|
+
if (/^(10\.\d+|172\.(1[6-9]|2\d|3[01])\.\d+\.\d+|192\.168\.\d+)$/.test(ip)) {
|
|
407
|
+
throw new Error(`Private IP not permitted: ${ip}`);
|
|
408
|
+
}
|
|
409
|
+
if (hostname.startsWith("169.254.")) {
|
|
410
|
+
throw new Error(`Link-local IP blocked: ${hostname}`);
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
async function fetchHtml(url) {
|
|
414
|
+
validateUrl(url);
|
|
415
|
+
const res = await fetch(url, {
|
|
416
|
+
headers: {
|
|
417
|
+
"User-Agent": "Mozilla/5.0 research-vault-mcp/1.1.0",
|
|
418
|
+
Accept: "text/html"
|
|
419
|
+
}
|
|
420
|
+
});
|
|
421
|
+
if (!res.ok)
|
|
422
|
+
throw new Error(`HTTP ${res.status} fetching ${url}`);
|
|
423
|
+
const html = await res.text();
|
|
424
|
+
let text = html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<nav[\s\S]*?<\/nav>/gi, "").replace(/<footer[\s\S]*?<\/footer>/gi, "").replace(/<header[\s\S]*?<\/header>/gi, "").replace(/<aside[\s\S]*?<\/aside>/gi, "").replace(/<!--[\s\S]*?-->/g, "");
|
|
425
|
+
text = text.replace(/<\/(p|div|br|h[1-6]|li|tr)>/gi, `
|
|
426
|
+
`);
|
|
427
|
+
text = text.replace(/<[^>]+>/g, "");
|
|
428
|
+
text = text.replace(/ /g, " ").replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'");
|
|
429
|
+
text = text.replace(/\n{3,}/g, `
|
|
430
|
+
|
|
431
|
+
`).trim();
|
|
432
|
+
return text;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// src/vault_write.ts
|
|
436
|
+
var VAULT_ROOT2 = process.env.VAULT_ROOT ?? `${homedir2()}/Documents/Evensong/research-vault`;
|
|
437
|
+
var KNOWLEDGE_DIR2 = join3(VAULT_ROOT2, "knowledge");
|
|
438
|
+
var RAW_DIR2 = join3(VAULT_ROOT2, "raw");
|
|
439
|
+
var DECAY_PATH2 = join3(VAULT_ROOT2, ".meta", "decay-scores.json");
|
|
440
|
+
var CHECKSUMS_PATH = join3(VAULT_ROOT2, ".meta", "checksums.json");
|
|
441
|
+
function ensureDir(p) {
|
|
442
|
+
if (!existsSync3(p))
|
|
443
|
+
mkdirSync2(p, { recursive: true });
|
|
444
|
+
}
|
|
445
|
+
function safePath(root, target) {
|
|
446
|
+
const joined = join3(root, target);
|
|
447
|
+
let resolved;
|
|
448
|
+
try {
|
|
449
|
+
resolved = realpathSync(joined);
|
|
450
|
+
} catch {
|
|
451
|
+
resolved = pathResolve(joined);
|
|
452
|
+
}
|
|
453
|
+
const rootNorm = root.replace(/\\/g, "/").replace(/\/$/, "");
|
|
454
|
+
const resolvedNorm = resolved.replace(/\\/g, "/").replace(/\/$/, "");
|
|
455
|
+
if (!resolvedNorm.startsWith(rootNorm + "/") && resolvedNorm !== rootNorm) {
|
|
456
|
+
throw new Error("Path traversal detected: target outside vault root");
|
|
457
|
+
}
|
|
458
|
+
return resolved;
|
|
459
|
+
}
|
|
460
|
+
function normalizeId2(raw) {
|
|
461
|
+
return raw.replace(/^\d{8}--?\d{4}-/, "").replace(/^(\d{10,})--?/, "").replace(/\.md$/, "");
|
|
462
|
+
}
|
|
463
|
+
function loadDecayScores2() {
|
|
464
|
+
try {
|
|
465
|
+
return JSON.parse(readFileSync3(DECAY_PATH2, "utf-8"));
|
|
466
|
+
} catch {
|
|
467
|
+
return {};
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
function saveDecayScores(scores) {
|
|
471
|
+
ensureDir(dirname(DECAY_PATH2));
|
|
472
|
+
writeFileSync2(DECAY_PATH2, JSON.stringify(scores, null, 2), "utf-8");
|
|
473
|
+
}
|
|
474
|
+
function loadChecksums() {
|
|
475
|
+
try {
|
|
476
|
+
return JSON.parse(readFileSync3(CHECKSUMS_PATH, "utf-8"));
|
|
477
|
+
} catch {
|
|
478
|
+
return {};
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
function saveChecksums(store) {
|
|
482
|
+
ensureDir(dirname(CHECKSUMS_PATH));
|
|
483
|
+
writeFileSync2(CHECKSUMS_PATH, JSON.stringify(store, null, 2), "utf-8");
|
|
484
|
+
}
|
|
485
|
+
var jobStore = new IngestJobStore(VAULT_ROOT2);
|
|
486
|
+
async function ingestArxiv(value, category) {
|
|
487
|
+
const id = parseArxivId(value);
|
|
488
|
+
if (!id)
|
|
489
|
+
throw new Error(`Invalid ArXiv ID: ${value}`);
|
|
490
|
+
const job = await jobStore.createJob({ source: "arxiv", value: id, category });
|
|
491
|
+
await jobStore.updateJob(job.jobId, { status: "fetching" });
|
|
492
|
+
const metadata = await fetchArxivMetadata(id);
|
|
493
|
+
metadata.arxivId = id;
|
|
494
|
+
const metaPath = join3(RAW_DIR2, category, `arxiv-${id}.meta.json`);
|
|
495
|
+
ensureDir(dirname(metaPath));
|
|
496
|
+
writeFileSync2(metaPath, JSON.stringify(metadata, null, 2), "utf-8");
|
|
497
|
+
const hash = await computeChecksum(metaPath);
|
|
498
|
+
const checksums = loadChecksums();
|
|
499
|
+
checksums[metaPath] = { sha256: hash, writtenAt: new Date().toISOString() };
|
|
500
|
+
saveChecksums(checksums);
|
|
501
|
+
await jobStore.updateJob(job.jobId, { status: "queued", rawPath: metaPath, metadata });
|
|
502
|
+
return job;
|
|
503
|
+
}
|
|
504
|
+
async function ingestUrl(value, category) {
|
|
505
|
+
const job = await jobStore.createJob({ source: "url", value, category });
|
|
506
|
+
await jobStore.updateJob(job.jobId, { status: "fetching" });
|
|
507
|
+
(async () => {
|
|
508
|
+
try {
|
|
509
|
+
const text = await fetchHtml(value);
|
|
510
|
+
const safeName = value.replace(/[^a-z0-9]/gi, "_").slice(0, 64);
|
|
511
|
+
const rawPath = join3(RAW_DIR2, category, `${Date.now()}--${safeName}.html`);
|
|
512
|
+
ensureDir(dirname(rawPath));
|
|
513
|
+
writeFileSync2(rawPath, text, "utf-8");
|
|
514
|
+
const hash = await computeChecksum(rawPath);
|
|
515
|
+
const checksums = loadChecksums();
|
|
516
|
+
checksums[rawPath] = { sha256: hash, writtenAt: new Date().toISOString() };
|
|
517
|
+
saveChecksums(checksums);
|
|
518
|
+
await jobStore.updateJob(job.jobId, { status: "queued", rawPath });
|
|
519
|
+
} catch (e) {
|
|
520
|
+
await jobStore.updateJob(job.jobId, { status: "failed", error: e instanceof Error ? e.message : String(e) });
|
|
521
|
+
}
|
|
522
|
+
})();
|
|
523
|
+
return job;
|
|
524
|
+
}
|
|
525
|
+
async function ingestFile(value, category) {
|
|
526
|
+
if (!existsSync3(value))
|
|
527
|
+
throw new Error(`File not found: ${value}`);
|
|
528
|
+
const job = await jobStore.createJob({ source: "file", value, category });
|
|
529
|
+
const destDir = join3(RAW_DIR2, category);
|
|
530
|
+
ensureDir(destDir);
|
|
531
|
+
const destPath = join3(destDir, `${Date.now()}--${basename2(value)}`);
|
|
532
|
+
const content = readFileSync3(value);
|
|
533
|
+
writeFileSync2(destPath, content);
|
|
534
|
+
const hash = await computeChecksum(destPath);
|
|
535
|
+
const checksums = loadChecksums();
|
|
536
|
+
checksums[destPath] = { sha256: hash, writtenAt: new Date().toISOString() };
|
|
537
|
+
saveChecksums(checksums);
|
|
538
|
+
await jobStore.updateJob(job.jobId, { status: "queued", rawPath: destPath });
|
|
539
|
+
return job;
|
|
540
|
+
}
|
|
541
|
+
async function saveNote(input) {
|
|
542
|
+
const safeTitle = input.title.replace(/[^a-z0-9]/gi, "-").slice(0, 32);
|
|
543
|
+
const id = `${Date.now()}--${safeTitle}`;
|
|
544
|
+
const filePath = safePath(KNOWLEDGE_DIR2, join3(input.category, `${id}.md`));
|
|
545
|
+
ensureDir(dirname(filePath));
|
|
546
|
+
const content = `# ${input.title}
|
|
547
|
+
|
|
548
|
+
${input.content}
|
|
549
|
+
`;
|
|
550
|
+
writeFileSync2(filePath, content, "utf-8");
|
|
551
|
+
const scores = loadDecayScores2();
|
|
552
|
+
scores[id] = {
|
|
553
|
+
itemId: id,
|
|
554
|
+
score: 0.5,
|
|
555
|
+
lastAccess: new Date().toISOString(),
|
|
556
|
+
accessCount: 0,
|
|
557
|
+
summaryLevel: input.summaryLevel ?? "none",
|
|
558
|
+
nextReviewAt: new Date().toISOString(),
|
|
559
|
+
difficulty: 0.5
|
|
560
|
+
};
|
|
561
|
+
saveDecayScores(scores);
|
|
562
|
+
const hash = await computeChecksum(filePath);
|
|
563
|
+
const checksums = loadChecksums();
|
|
564
|
+
checksums[filePath] = { sha256: hash, writtenAt: new Date().toISOString() };
|
|
565
|
+
saveChecksums(checksums);
|
|
566
|
+
return { id, path: filePath, writtenAt: new Date().toISOString() };
|
|
567
|
+
}
|
|
568
|
+
function getEntry(input) {
|
|
569
|
+
let filePath;
|
|
570
|
+
if (input.path) {
|
|
571
|
+
filePath = safePath(VAULT_ROOT2, input.path);
|
|
572
|
+
} else if (input.id) {
|
|
573
|
+
const entry = scanKnowledge2().find((e) => normalizeId2(e.id) === normalizeId2(input.id));
|
|
574
|
+
if (!entry)
|
|
575
|
+
throw new Error(`Entry not found: ${input.id}`);
|
|
576
|
+
filePath = entry.path;
|
|
577
|
+
} else {
|
|
578
|
+
throw new Error("id or path required");
|
|
579
|
+
}
|
|
580
|
+
const content = readFileSync3(filePath, "utf-8");
|
|
581
|
+
const s = statSync2(filePath);
|
|
582
|
+
const relPath = filePath.replace(VAULT_ROOT2 + "/", "");
|
|
583
|
+
return {
|
|
584
|
+
id: normalizeId2(basename2(filePath)),
|
|
585
|
+
title: content.match(/^#\s+(.+)/m)?.[1] ?? normalizeId2(basename2(filePath)),
|
|
586
|
+
category: relPath.includes("/") ? relPath.split("/").slice(0, -1).join("/") : "",
|
|
587
|
+
content,
|
|
588
|
+
modified: s.mtime.toISOString(),
|
|
589
|
+
size: s.size
|
|
590
|
+
};
|
|
591
|
+
}
|
|
592
|
+
function deleteEntry(input) {
|
|
593
|
+
let filePath;
|
|
594
|
+
if (input.path) {
|
|
595
|
+
filePath = safePath(VAULT_ROOT2, input.path);
|
|
596
|
+
} else if (input.id) {
|
|
597
|
+
const entry = scanKnowledge2().find((e) => normalizeId2(e.id) === normalizeId2(input.id));
|
|
598
|
+
if (!entry)
|
|
599
|
+
throw new Error(`Entry not found: ${input.id}`);
|
|
600
|
+
filePath = entry.path;
|
|
601
|
+
} else {
|
|
602
|
+
throw new Error("id or path required");
|
|
603
|
+
}
|
|
604
|
+
unlinkSync(filePath);
|
|
605
|
+
const id = normalizeId2(basename2(filePath));
|
|
606
|
+
const scores = loadDecayScores2();
|
|
607
|
+
delete scores[id];
|
|
608
|
+
saveDecayScores(scores);
|
|
609
|
+
const checksums = loadChecksums();
|
|
610
|
+
delete checksums[filePath];
|
|
611
|
+
saveChecksums(checksums);
|
|
612
|
+
return { deleted: true, path: filePath };
|
|
613
|
+
}
|
|
614
|
+
function scanKnowledge2() {
|
|
615
|
+
const entries = [];
|
|
616
|
+
if (!existsSync3(KNOWLEDGE_DIR2))
|
|
617
|
+
return entries;
|
|
618
|
+
try {
|
|
619
|
+
const categories = readdirSync2(KNOWLEDGE_DIR2);
|
|
620
|
+
for (const cat of categories) {
|
|
621
|
+
if (cat.startsWith("_"))
|
|
622
|
+
continue;
|
|
623
|
+
const catPath = join3(KNOWLEDGE_DIR2, cat);
|
|
624
|
+
if (!existsSync3(catPath) || !statSync2(catPath).isDirectory())
|
|
625
|
+
continue;
|
|
626
|
+
try {
|
|
627
|
+
const files = readdirSync2(catPath).filter((f) => f.endsWith(".md"));
|
|
628
|
+
for (const file of files) {
|
|
629
|
+
const fp = join3(catPath, file);
|
|
630
|
+
const s = statSync2(fp);
|
|
631
|
+
entries.push({
|
|
632
|
+
id: normalizeId2(file),
|
|
633
|
+
title: normalizeId2(file),
|
|
634
|
+
category: cat,
|
|
635
|
+
path: fp,
|
|
636
|
+
modified: s.mtime.toISOString(),
|
|
637
|
+
size: s.size
|
|
638
|
+
});
|
|
639
|
+
}
|
|
640
|
+
} catch {}
|
|
641
|
+
}
|
|
642
|
+
} catch {}
|
|
643
|
+
return entries;
|
|
644
|
+
}
|
|
645
|
+
var vaultWriteTools = [
|
|
646
|
+
{
|
|
647
|
+
name: "vault_raw_ingest",
|
|
648
|
+
description: "Fire-and-forget ingest of URL/file/ArXiv to raw vault layer. Returns jobId for async progress polling.",
|
|
649
|
+
inputSchema: {
|
|
650
|
+
type: "object",
|
|
651
|
+
properties: {
|
|
652
|
+
source: { type: "string", enum: ["url", "file", "arxiv"] },
|
|
653
|
+
value: { type: "string", description: "URL / absolute file path / ArXiv ID or URL" },
|
|
654
|
+
category: { type: "string", description: 'raw/ subdirectory, default "inbox"' },
|
|
655
|
+
priority: { type: "string", enum: ["high", "low"], default: "low" },
|
|
656
|
+
arxivMetadata: { type: "boolean", description: "ArXiv: fetch metadata before storing, default true" }
|
|
657
|
+
},
|
|
658
|
+
required: ["source", "value"]
|
|
659
|
+
},
|
|
660
|
+
call: async (args) => {
|
|
661
|
+
try {
|
|
662
|
+
const category = args.category ?? "inbox";
|
|
663
|
+
let job;
|
|
664
|
+
if (args.source === "arxiv") {
|
|
665
|
+
job = await ingestArxiv(args.value, category);
|
|
666
|
+
} else if (args.source === "url") {
|
|
667
|
+
job = await ingestUrl(args.value, category);
|
|
668
|
+
} else {
|
|
669
|
+
job = await ingestFile(args.value, category);
|
|
670
|
+
}
|
|
671
|
+
return { content: [{ type: "text", text: JSON.stringify(job) }] };
|
|
672
|
+
} catch (e) {
|
|
673
|
+
return { content: [{ type: "text", text: e instanceof Error ? e.message : String(e) }], isError: true };
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
},
|
|
677
|
+
{
|
|
678
|
+
name: "vault_note_save",
|
|
679
|
+
description: "Write a structured note to the knowledge layer.",
|
|
680
|
+
inputSchema: {
|
|
681
|
+
type: "object",
|
|
682
|
+
properties: {
|
|
683
|
+
title: { type: "string" },
|
|
684
|
+
content: { type: "string" },
|
|
685
|
+
category: { type: "string" },
|
|
686
|
+
tags: { type: "array", items: { type: "string" } },
|
|
687
|
+
summaryLevel: { type: "string", enum: ["deep", "shallow", "none"] }
|
|
688
|
+
},
|
|
689
|
+
required: ["title", "content", "category"]
|
|
690
|
+
},
|
|
691
|
+
call: async (args) => {
|
|
692
|
+
try {
|
|
693
|
+
const result = await saveNote(args);
|
|
694
|
+
return { content: [{ type: "text", text: JSON.stringify(result) }] };
|
|
695
|
+
} catch (e) {
|
|
696
|
+
return { content: [{ type: "text", text: e instanceof Error ? e.message : String(e) }], isError: true };
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
},
|
|
700
|
+
{
|
|
701
|
+
name: "vault_get",
|
|
702
|
+
description: "Read full content of a vault entry by id or path.",
|
|
703
|
+
inputSchema: {
|
|
704
|
+
type: "object",
|
|
705
|
+
properties: {
|
|
706
|
+
id: { type: "string" },
|
|
707
|
+
path: { type: "string" }
|
|
708
|
+
}
|
|
709
|
+
},
|
|
710
|
+
call: async (args) => {
|
|
711
|
+
try {
|
|
712
|
+
const result = getEntry(args);
|
|
713
|
+
return { content: [{ type: "text", text: JSON.stringify(result) }] };
|
|
714
|
+
} catch (e) {
|
|
715
|
+
return { content: [{ type: "text", text: e instanceof Error ? e.message : String(e) }], isError: true };
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
},
|
|
719
|
+
{
|
|
720
|
+
name: "vault_delete",
|
|
721
|
+
description: "Delete a vault entry (raw or knowledge).",
|
|
722
|
+
inputSchema: {
|
|
723
|
+
type: "object",
|
|
724
|
+
properties: {
|
|
725
|
+
id: { type: "string" },
|
|
726
|
+
path: { type: "string" }
|
|
727
|
+
}
|
|
728
|
+
},
|
|
729
|
+
call: async (args) => {
|
|
730
|
+
try {
|
|
731
|
+
const result = deleteEntry(args);
|
|
732
|
+
return { content: [{ type: "text", text: JSON.stringify(result) }] };
|
|
733
|
+
} catch (e) {
|
|
734
|
+
return { content: [{ type: "text", text: e instanceof Error ? e.message : String(e) }], isError: true };
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
];
|
|
739
|
+
|
|
740
|
+
// src/amplify.ts
|
|
741
|
+
var AMPLIFY_BASE = "https://prod-api.vanderbilt.ai";
|
|
742
|
+
var config = null;
|
|
743
|
+
function configureAmplify(apiKey) {
|
|
744
|
+
config = { apiKey };
|
|
745
|
+
}
|
|
746
|
+
function getHeaders() {
|
|
747
|
+
if (!config?.apiKey)
|
|
748
|
+
throw new Error("Amplify API key not configured. Call configureAmplify() first.");
|
|
749
|
+
return {
|
|
750
|
+
Authorization: `Bearer ${config.apiKey}`,
|
|
751
|
+
"Content-Type": "application/json"
|
|
752
|
+
};
|
|
753
|
+
}
|
|
754
|
+
var amplifyTools = [
|
|
755
|
+
{
|
|
756
|
+
name: "amplify_list_models",
|
|
757
|
+
description: "List available models on Vanderbilt Amplify. Returns model IDs, context windows, providers, and pricing tiers.",
|
|
758
|
+
inputSchema: { type: "object", properties: {} },
|
|
759
|
+
call: async () => {
|
|
760
|
+
try {
|
|
761
|
+
const res = await fetch(`${AMPLIFY_BASE}/available_models`, {
|
|
762
|
+
headers: getHeaders()
|
|
763
|
+
});
|
|
764
|
+
if (!res.ok)
|
|
765
|
+
throw new Error(`HTTP ${res.status}`);
|
|
766
|
+
const data = await res.json();
|
|
767
|
+
return {
|
|
768
|
+
content: [{
|
|
769
|
+
type: "text",
|
|
770
|
+
text: JSON.stringify(data, null, 2)
|
|
771
|
+
}]
|
|
772
|
+
};
|
|
773
|
+
} catch (e) {
|
|
774
|
+
return { content: [{ type: "text", text: `Error: ${e.message}` }], isError: true };
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
},
|
|
778
|
+
{
|
|
779
|
+
name: "amplify_chat",
|
|
780
|
+
description: "Send a streaming chat message to Amplify. Returns Claude/GPT/Mistral responses via SSE.",
|
|
781
|
+
inputSchema: {
|
|
782
|
+
type: "object",
|
|
783
|
+
required: ["message"],
|
|
784
|
+
properties: {
|
|
785
|
+
message: { type: "string", description: "User message" },
|
|
786
|
+
modelId: { type: "string", description: "Model ID (from amplify_list_models)" },
|
|
787
|
+
systemPrompt: { type: "string", description: "Optional system prompt override" },
|
|
788
|
+
temperature: { type: "number", description: "Temperature (0-2, default 0.7)" },
|
|
789
|
+
maxTokens: { type: "number", description: "Max output tokens (default 4000)" },
|
|
790
|
+
stream: { type: "boolean", description: "If true, yield chunks via onProgress callback instead of waiting for complete response (default false)" }
|
|
791
|
+
}
|
|
792
|
+
},
|
|
793
|
+
call: async ({ message, modelId, systemPrompt, temperature = 0.7, maxTokens = 4000, stream = false }, onProgress) => {
|
|
794
|
+
try {
|
|
795
|
+
const body = {
|
|
796
|
+
data: {
|
|
797
|
+
model: modelId || "gpt-4o",
|
|
798
|
+
temperature,
|
|
799
|
+
max_tokens: maxTokens,
|
|
800
|
+
messages: [{ role: "user", content: message }]
|
|
801
|
+
}
|
|
802
|
+
};
|
|
803
|
+
if (systemPrompt) {
|
|
804
|
+
body.data.messages.unshift({ role: "system", content: systemPrompt });
|
|
805
|
+
}
|
|
806
|
+
const res = await fetch(`${AMPLIFY_BASE}/chat`, {
|
|
807
|
+
method: "POST",
|
|
808
|
+
headers: getHeaders(),
|
|
809
|
+
body: JSON.stringify(body)
|
|
810
|
+
});
|
|
811
|
+
if (!res.ok) {
|
|
812
|
+
const err = await res.text();
|
|
813
|
+
throw new Error(`HTTP ${res.status}: ${err}`);
|
|
814
|
+
}
|
|
815
|
+
const reader = res.body?.getReader();
|
|
816
|
+
if (!reader)
|
|
817
|
+
throw new Error("No response body");
|
|
818
|
+
let fullText = "";
|
|
819
|
+
const decoder = new TextDecoder;
|
|
820
|
+
while (true) {
|
|
821
|
+
const { done, value } = await reader.read();
|
|
822
|
+
if (done)
|
|
823
|
+
break;
|
|
824
|
+
const chunk = decoder.decode(value, { stream: true });
|
|
825
|
+
for (const line of chunk.split(`
|
|
826
|
+
`)) {
|
|
827
|
+
if (line.startsWith("data: ")) {
|
|
828
|
+
try {
|
|
829
|
+
const parsed = JSON.parse(line.slice(6));
|
|
830
|
+
if (parsed.data?.content)
|
|
831
|
+
fullText += parsed.data.content;
|
|
832
|
+
else if (parsed.data)
|
|
833
|
+
fullText += typeof parsed.data === "string" ? parsed.data : JSON.stringify(parsed.data);
|
|
834
|
+
} catch {}
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
if (stream && onProgress) {
|
|
839
|
+
const res2 = await fetch(`${AMPLIFY_BASE}/chat`, {
|
|
840
|
+
method: "POST",
|
|
841
|
+
headers: getHeaders(),
|
|
842
|
+
body: JSON.stringify(body)
|
|
843
|
+
});
|
|
844
|
+
if (!res2.ok)
|
|
845
|
+
throw new Error(`HTTP ${res2.status}`);
|
|
846
|
+
const reader2 = res2.body?.getReader();
|
|
847
|
+
if (!reader2)
|
|
848
|
+
throw new Error("No response body");
|
|
849
|
+
const decoder2 = new TextDecoder;
|
|
850
|
+
let buffer2 = "";
|
|
851
|
+
while (true) {
|
|
852
|
+
const { done, value } = await reader2.read();
|
|
853
|
+
if (done)
|
|
854
|
+
break;
|
|
855
|
+
buffer2 += decoder2.decode(value, { stream: true });
|
|
856
|
+
for (const line of buffer2.split(`
|
|
857
|
+
`)) {
|
|
858
|
+
if (line.startsWith("data: ")) {
|
|
859
|
+
try {
|
|
860
|
+
const parsed = JSON.parse(line.slice(6));
|
|
861
|
+
if (parsed.data?.content) {
|
|
862
|
+
onProgress({ type: "chunk", text: parsed.data.content });
|
|
863
|
+
}
|
|
864
|
+
} catch {}
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
return { content: [{ type: "text", text: "(streamed)" }] };
|
|
869
|
+
}
|
|
870
|
+
return {
|
|
871
|
+
content: [{ type: "text", text: fullText || "(no response)" }]
|
|
872
|
+
};
|
|
873
|
+
} catch (e) {
|
|
874
|
+
return { content: [{ type: "text", text: `Error: ${e.message}` }], isError: true };
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
},
|
|
878
|
+
{
|
|
879
|
+
name: "amplify_files_query",
|
|
880
|
+
description: "Query uploaded files on Amplify using semantic search. Returns relevant file chunks.",
|
|
881
|
+
inputSchema: {
|
|
882
|
+
type: "object",
|
|
883
|
+
required: ["query"],
|
|
884
|
+
properties: {
|
|
885
|
+
query: { type: "string", description: "Search query" },
|
|
886
|
+
limit: { type: "number", description: "Max results (default 5)" }
|
|
887
|
+
}
|
|
888
|
+
},
|
|
889
|
+
call: async ({ query, limit = 5 }) => {
|
|
890
|
+
try {
|
|
891
|
+
const res = await fetch(`${AMPLIFY_BASE}/files/query`, {
|
|
892
|
+
method: "POST",
|
|
893
|
+
headers: getHeaders(),
|
|
894
|
+
body: JSON.stringify({ query, limit })
|
|
895
|
+
});
|
|
896
|
+
if (!res.ok)
|
|
897
|
+
throw new Error(`HTTP ${res.status}`);
|
|
898
|
+
const data = await res.json();
|
|
899
|
+
return {
|
|
900
|
+
content: [{ type: "text", text: JSON.stringify(data, null, 2) }]
|
|
901
|
+
};
|
|
902
|
+
} catch (e) {
|
|
903
|
+
return { content: [{ type: "text", text: `Error: ${e.message}` }], isError: true };
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
},
|
|
907
|
+
{
|
|
908
|
+
name: "amplify_files_list",
|
|
909
|
+
description: "List tags/categories of uploaded files on Amplify.",
|
|
910
|
+
inputSchema: { type: "object", properties: {} },
|
|
911
|
+
call: async () => {
|
|
912
|
+
try {
|
|
913
|
+
const res = await fetch(`${AMPLIFY_BASE}/files/tags/list`, {
|
|
914
|
+
headers: getHeaders()
|
|
915
|
+
});
|
|
916
|
+
if (!res.ok)
|
|
917
|
+
throw new Error(`HTTP ${res.status}`);
|
|
918
|
+
const data = await res.json();
|
|
919
|
+
return {
|
|
920
|
+
content: [{ type: "text", text: JSON.stringify(data, null, 2) }]
|
|
921
|
+
};
|
|
922
|
+
} catch (e) {
|
|
923
|
+
return { content: [{ type: "text", text: `Error: ${e.message}` }], isError: true };
|
|
924
|
+
}
|
|
925
|
+
}
|
|
926
|
+
},
|
|
927
|
+
{
|
|
928
|
+
name: "amplify_assistants_list",
|
|
929
|
+
description: "List your Amplify assistants.",
|
|
930
|
+
inputSchema: { type: "object", properties: {} },
|
|
931
|
+
call: async () => {
|
|
932
|
+
try {
|
|
933
|
+
const res = await fetch(`${AMPLIFY_BASE}/assistant/list`, {
|
|
934
|
+
headers: getHeaders()
|
|
935
|
+
});
|
|
936
|
+
if (!res.ok)
|
|
937
|
+
throw new Error(`HTTP ${res.status}`);
|
|
938
|
+
const data = await res.json();
|
|
939
|
+
return {
|
|
940
|
+
content: [{ type: "text", text: JSON.stringify(data, null, 2) }]
|
|
941
|
+
};
|
|
942
|
+
} catch (e) {
|
|
943
|
+
return { content: [{ type: "text", text: `Error: ${e.message}` }], isError: true };
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
];
|
|
948
|
+
|
|
949
|
+
// src/server.ts
|
|
950
|
+
var HOST = "0.0.0.0";
|
|
951
|
+
var TRANSPORT = process.env.MCP_TRANSPORT ?? "stdio";
|
|
952
|
+
var PORT = parseInt(process.env.MCP_PORT ?? "8765");
|
|
953
|
+
var allTools = [
|
|
954
|
+
...vaultTools,
|
|
955
|
+
...vaultWriteTools,
|
|
956
|
+
...amplifyTools
|
|
957
|
+
];
|
|
958
|
+
var toolMap = new Map(allTools.map((t) => [t.name, t]));
|
|
959
|
+
var sessions = new Map;
|
|
960
|
+
function makeResponse(id, result, error) {
|
|
961
|
+
return { jsonrpc: "2.0", id, result, error };
|
|
962
|
+
}
|
|
963
|
+
function generateSessionId() {
|
|
964
|
+
return crypto.randomUUID();
|
|
965
|
+
}
|
|
966
|
+
async function handleRequest(req) {
|
|
967
|
+
const { method, id, params } = req;
|
|
968
|
+
if (method === "notifications/initialized" || method === "notifications/cancelled") {
|
|
969
|
+
return null;
|
|
970
|
+
}
|
|
971
|
+
if (method === "initialize") {
|
|
972
|
+
return makeResponse(id, {
|
|
973
|
+
protocolVersion: "2024-11-05",
|
|
974
|
+
capabilities: {
|
|
975
|
+
tools: { listChanged: false }
|
|
976
|
+
},
|
|
977
|
+
serverInfo: {
|
|
978
|
+
name: "research-vault-mcp",
|
|
979
|
+
version: "1.0.0"
|
|
980
|
+
}
|
|
981
|
+
});
|
|
982
|
+
}
|
|
983
|
+
if (method === "tools/list") {
|
|
984
|
+
return makeResponse(id, {
|
|
985
|
+
tools: allTools.map((t) => ({
|
|
986
|
+
name: t.name,
|
|
987
|
+
description: t.description,
|
|
988
|
+
inputSchema: t.inputSchema
|
|
989
|
+
}))
|
|
990
|
+
});
|
|
991
|
+
}
|
|
992
|
+
if (method === "tools/call") {
|
|
993
|
+
const { name, arguments: args } = params;
|
|
994
|
+
console.error("[DEBUG] tools/call:", name, JSON.stringify(args));
|
|
995
|
+
const tool = toolMap.get(name);
|
|
996
|
+
if (!tool) {
|
|
997
|
+
return makeResponse(id, undefined, { code: -32602, message: `Unknown tool: ${name}` });
|
|
998
|
+
}
|
|
999
|
+
try {
|
|
1000
|
+
const result = await tool.call(args || {});
|
|
1001
|
+
return makeResponse(id, { content: result.content, isError: result.isError });
|
|
1002
|
+
} catch (e) {
|
|
1003
|
+
return makeResponse(id, undefined, { code: -32603, message: `Tool error: ${e.message}` });
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
if (method === "ping") {
|
|
1007
|
+
return makeResponse(id, {});
|
|
1008
|
+
}
|
|
1009
|
+
return makeResponse(id, undefined, { code: -32601, message: `Method not found: ${method}` });
|
|
1010
|
+
}
|
|
1011
|
+
async function handleStdioTransport() {
|
|
1012
|
+
const rl = await import("readline");
|
|
1013
|
+
const rli = rl.createInterface({ input: process.stdin, crlfDelay: Infinity });
|
|
1014
|
+
const writer = Bun.stdout.writer();
|
|
1015
|
+
const send = (obj) => {
|
|
1016
|
+
writer.write(JSON.stringify(obj) + `
|
|
1017
|
+
`);
|
|
1018
|
+
writer.flush();
|
|
1019
|
+
};
|
|
1020
|
+
for await (const line of rli) {
|
|
1021
|
+
if (!line.trim())
|
|
1022
|
+
continue;
|
|
1023
|
+
try {
|
|
1024
|
+
const req = JSON.parse(line);
|
|
1025
|
+
const result = await handleRequest(req);
|
|
1026
|
+
if (result)
|
|
1027
|
+
send(result);
|
|
1028
|
+
} catch (e) {
|
|
1029
|
+
send({ jsonrpc: "2.0", error: { code: -32700, message: `Parse error: ${e instanceof Error ? e.message : String(e)}` } });
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
var server;
|
|
1034
|
+
if (TRANSPORT !== "stdio") {
|
|
1035
|
+
server = Bun.serve({
|
|
1036
|
+
port: PORT,
|
|
1037
|
+
hostname: HOST,
|
|
1038
|
+
async fetch(req) {
|
|
1039
|
+
const url = new URL(req.url);
|
|
1040
|
+
if (url.pathname === "/sse" && req.method === "GET") {
|
|
1041
|
+
const sessionId = generateSessionId();
|
|
1042
|
+
const stream = new ReadableStream({
|
|
1043
|
+
start(controller) {
|
|
1044
|
+
const encoder = new TextEncoder;
|
|
1045
|
+
const send = (data) => {
|
|
1046
|
+
try {
|
|
1047
|
+
controller.enqueue(encoder.encode(data));
|
|
1048
|
+
} catch {}
|
|
1049
|
+
};
|
|
1050
|
+
send(`event: endpoint
|
|
1051
|
+
data: /messages?sessionId=${sessionId}
|
|
1052
|
+
|
|
1053
|
+
`);
|
|
1054
|
+
const heartbeat = setInterval(() => {
|
|
1055
|
+
try {
|
|
1056
|
+
controller.enqueue(encoder.encode(`: heartbeat
|
|
1057
|
+
|
|
1058
|
+
`));
|
|
1059
|
+
} catch {
|
|
1060
|
+
clearInterval(heartbeat);
|
|
1061
|
+
sessions.delete(sessionId);
|
|
1062
|
+
}
|
|
1063
|
+
}, 15000);
|
|
1064
|
+
sessions.set(sessionId, { send, heartbeat });
|
|
1065
|
+
console.error(`[SSE] Session ${sessionId} connected`);
|
|
1066
|
+
req.signal.addEventListener("abort", () => {
|
|
1067
|
+
clearInterval(heartbeat);
|
|
1068
|
+
sessions.delete(sessionId);
|
|
1069
|
+
console.error(`[SSE] Session ${sessionId} disconnected`);
|
|
1070
|
+
});
|
|
1071
|
+
}
|
|
1072
|
+
});
|
|
1073
|
+
return new Response(stream, {
|
|
1074
|
+
status: 200,
|
|
1075
|
+
headers: {
|
|
1076
|
+
"Content-Type": "text/event-stream",
|
|
1077
|
+
"Cache-Control": "no-cache",
|
|
1078
|
+
Connection: "keep-alive",
|
|
1079
|
+
"X-Accel-Buffering": "no"
|
|
1080
|
+
}
|
|
1081
|
+
});
|
|
1082
|
+
}
|
|
1083
|
+
if (url.pathname === "/messages" && req.method === "POST") {
|
|
1084
|
+
const sessionId = url.searchParams.get("sessionId");
|
|
1085
|
+
if (!sessionId || !sessions.has(sessionId)) {
|
|
1086
|
+
return Response.json({ error: "Invalid or missing sessionId" }, { status: 400 });
|
|
1087
|
+
}
|
|
1088
|
+
const session = sessions.get(sessionId);
|
|
1089
|
+
try {
|
|
1090
|
+
const body = await req.json();
|
|
1091
|
+
const result = await handleRequest(body);
|
|
1092
|
+
if (result) {
|
|
1093
|
+
session.send(`event: message
|
|
1094
|
+
data: ${JSON.stringify(result)}
|
|
1095
|
+
|
|
1096
|
+
`);
|
|
1097
|
+
}
|
|
1098
|
+
return new Response(null, { status: 202 });
|
|
1099
|
+
} catch (e) {
|
|
1100
|
+
return Response.json({ jsonrpc: "2.0", error: { code: -32700, message: `Parse error: ${e.message}` } }, { status: 400 });
|
|
1101
|
+
}
|
|
1102
|
+
}
|
|
1103
|
+
if (url.pathname === "/health" && req.method === "GET") {
|
|
1104
|
+
return Response.json({
|
|
1105
|
+
status: "ok",
|
|
1106
|
+
tools: allTools.length,
|
|
1107
|
+
vault_tools: vaultTools.length,
|
|
1108
|
+
amplify_tools: amplifyTools.length,
|
|
1109
|
+
sse_sessions: sessions.size,
|
|
1110
|
+
uptime: process.uptime()
|
|
1111
|
+
});
|
|
1112
|
+
}
|
|
1113
|
+
if (url.pathname === "/configure" && req.method === "POST") {
|
|
1114
|
+
try {
|
|
1115
|
+
const { apiKey } = await req.json();
|
|
1116
|
+
if (!apiKey)
|
|
1117
|
+
throw new Error("apiKey required");
|
|
1118
|
+
configureAmplify(apiKey);
|
|
1119
|
+
return Response.json({ status: "configured" });
|
|
1120
|
+
} catch (e) {
|
|
1121
|
+
return Response.json({ error: e.message }, { status: 400 });
|
|
1122
|
+
}
|
|
1123
|
+
}
|
|
1124
|
+
return Response.json({ error: "Not found" }, { status: 404 });
|
|
1125
|
+
}
|
|
1126
|
+
});
|
|
1127
|
+
}
|
|
1128
|
+
if (TRANSPORT === "stdio") {
|
|
1129
|
+
console.error("[MCP] Running in stdio mode (stdin/stdout JSON-RPC)");
|
|
1130
|
+
await handleStdioTransport();
|
|
1131
|
+
process.exit(0);
|
|
1132
|
+
} else {
|
|
1133
|
+
console.log(`
|
|
1134
|
+
\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557
|
|
1135
|
+
\u2551 Research Vault MCP Server \u2014 MCP SSE Transport \u2551
|
|
1136
|
+
\u2560\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2563
|
|
1137
|
+
\u2551 SSE: http://${HOST}:${PORT}/sse \u2551
|
|
1138
|
+
\u2551 Messages: http://${HOST}:${PORT}/messages \u2551
|
|
1139
|
+
\u2551 Health: http://${HOST}:${PORT}/health \u2551
|
|
1140
|
+
\u2560\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2563
|
|
1141
|
+
\u2551 Tools: ${String(allTools.length).padEnd(3)} (${vaultTools.length} vault, ${amplifyTools.length} amplify) \u2551
|
|
1142
|
+
\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D
|
|
1143
|
+
`);
|
|
1144
|
+
}
|
|
1145
|
+
process.on("SIGINT", () => {
|
|
1146
|
+
console.log(`
|
|
1147
|
+
Shutting down...`);
|
|
1148
|
+
for (const [id, session] of sessions) {
|
|
1149
|
+
clearInterval(session.heartbeat);
|
|
1150
|
+
}
|
|
1151
|
+
sessions.clear();
|
|
1152
|
+
server?.stop();
|
|
1153
|
+
process.exit(0);
|
|
1154
|
+
});
|