@syndash/research-vault-mcp 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,88 @@
1
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs'
2
+ import { join } from 'path'
3
+ import { createHash, randomUUID } from 'crypto'
4
+ import type { IngestJob, RawIngestInput, ChecksumStore } from './types.js'
5
+
6
+ const JOBS_FILE = 'ingest-jobs.json'
7
+ const CHECKSUMS_FILE = 'checksums.json'
8
+
9
+ export class IngestJobStore {
10
+ private metaDir: string
11
+
12
+ constructor(private vaultRoot: string) {
13
+ this.metaDir = join(this.vaultRoot, '.meta')
14
+ if (!existsSync(this.metaDir)) mkdirSync(this.metaDir, { recursive: true })
15
+ }
16
+
17
+ private jobsPath() { return join(this.metaDir, JOBS_FILE) }
18
+ private checksumsPath() { return join(this.metaDir, CHECKSUMS_FILE) }
19
+
20
+ private loadJobs(): Record<string, IngestJob> {
21
+ try {
22
+ return JSON.parse(readFileSync(this.jobsPath(), 'utf-8'))
23
+ } catch { return {} }
24
+ }
25
+
26
+ private saveJobs(jobs: Record<string, IngestJob>) {
27
+ writeFileSync(this.jobsPath(), JSON.stringify(jobs, null, 2), 'utf-8')
28
+ }
29
+
30
+ private loadChecksums(): ChecksumStore {
31
+ try {
32
+ return JSON.parse(readFileSync(this.checksumsPath(), 'utf-8'))
33
+ } catch { return {} }
34
+ }
35
+
36
+ private saveChecksums(store: ChecksumStore) {
37
+ writeFileSync(this.checksumsPath(), JSON.stringify(store, null, 2), 'utf-8')
38
+ }
39
+
40
+ async createJob(input: RawIngestInput): Promise<IngestJob> {
41
+ const jobs = this.loadJobs()
42
+ const now = new Date().toISOString()
43
+ const job: IngestJob = {
44
+ jobId: randomUUID(),
45
+ source: input.source,
46
+ value: input.value,
47
+ category: input.category ?? 'inbox',
48
+ status: 'queued',
49
+ rawPath: null,
50
+ metadata: null,
51
+ createdAt: now,
52
+ updatedAt: now
53
+ }
54
+ jobs[job.jobId] = job
55
+ this.saveJobs(jobs)
56
+ return job
57
+ }
58
+
59
+ async getJob(jobId: string): Promise<IngestJob | null> {
60
+ return this.loadJobs()[jobId] ?? null
61
+ }
62
+
63
+ async updateJob(jobId: string, updates: Partial<IngestJob>): Promise<IngestJob | null> {
64
+ const jobs = this.loadJobs()
65
+ const job = jobs[jobId]
66
+ if (!job) return null
67
+ jobs[jobId] = { ...job, ...updates, updatedAt: new Date().toISOString() }
68
+ this.saveJobs(jobs)
69
+ return jobs[jobId]
70
+ }
71
+
72
+ async getAllJobs(): Promise<IngestJob[]> {
73
+ return Object.values(this.loadJobs())
74
+ }
75
+ }
76
+
77
+ export async function computeChecksum(filePath: string): Promise<string> {
78
+ const file = Bun.file(filePath)
79
+ const buffer = await file.arrayBuffer()
80
+ const hash = createHash('sha256')
81
+ hash.update(Buffer.from(buffer))
82
+ return hash.digest('hex')
83
+ }
84
+
85
+ export async function verifyChecksum(filePath: string, expected: string): Promise<boolean> {
86
+ const actual = await computeChecksum(filePath)
87
+ return actual === expected
88
+ }
@@ -0,0 +1,347 @@
1
+ import { readFileSync, writeFileSync, existsSync, statSync, mkdirSync, unlinkSync, realpathSync, readdirSync } from 'fs'
2
+ import { join, dirname, basename, resolve as pathResolve } from 'path'
3
+ import { homedir } from 'os'
4
+ import { IngestJobStore, computeChecksum } from './vault_jobs.js'
5
+ import { parseArxivId, fetchArxivMetadata } from './ingest/arxiv.js'
6
+ import { fetchHtml } from './ingest/html.js'
7
+ import type { VaultEntry, RawIngestInput, NoteSaveInput, VaultGetInput, VaultDeleteInput, DecayScore } from './types.js'
8
+
9
+ const VAULT_ROOT = process.env.VAULT_ROOT ?? `${homedir()}/Documents/Evensong/research-vault`
10
+ const KNOWLEDGE_DIR = join(VAULT_ROOT, 'knowledge')
11
+ const RAW_DIR = join(VAULT_ROOT, 'raw')
12
+ const DECAY_PATH = join(VAULT_ROOT, '.meta', 'decay-scores.json')
13
+ const CHECKSUMS_PATH = join(VAULT_ROOT, '.meta', 'checksums.json')
14
+
15
+ function ensureDir(p: string) {
16
+ if (!existsSync(p)) mkdirSync(p, { recursive: true })
17
+ }
18
+
19
+ function safePath(root: string, target: string): string {
20
+ const joined = join(root, target)
21
+ let resolved: string
22
+ try {
23
+ resolved = realpathSync(joined)
24
+ } catch {
25
+ // Path doesn't exist yet (new file). Use resolve to normalize .. components
26
+ // and verify the final path stays within root.
27
+ resolved = pathResolve(joined)
28
+ }
29
+ // Normalize both to remove trailing slashes for prefix comparison
30
+ const rootNorm = root.replace(/\\/g, '/').replace(/\/$/, '')
31
+ const resolvedNorm = resolved.replace(/\\/g, '/').replace(/\/$/, '')
32
+ if (!resolvedNorm.startsWith(rootNorm + '/') && resolvedNorm !== rootNorm) {
33
+ throw new Error('Path traversal detected: target outside vault root')
34
+ }
35
+ return resolved
36
+ }
37
+
38
+ export function normalizeId(raw: string): string {
39
+ return raw
40
+ .replace(/^\d{8}--?\d{4}-/, '')
41
+ .replace(/^(\d{10,})--?/, '')
42
+ .replace(/\.md$/, '')
43
+ }
44
+
45
+ function loadDecayScores(): Record<string, DecayScore> {
46
+ try { return JSON.parse(readFileSync(DECAY_PATH, 'utf-8')) } catch { return {} }
47
+ }
48
+
49
+ function saveDecayScores(scores: Record<string, DecayScore>) {
50
+ ensureDir(dirname(DECAY_PATH))
51
+ writeFileSync(DECAY_PATH, JSON.stringify(scores, null, 2), 'utf-8')
52
+ }
53
+
54
+ function loadChecksums(): Record<string, { sha256: string; writtenAt: string }> {
55
+ try { return JSON.parse(readFileSync(CHECKSUMS_PATH, 'utf-8')) } catch { return {} }
56
+ }
57
+
58
+ function saveChecksums(store: Record<string, { sha256: string; writtenAt: string }>) {
59
+ ensureDir(dirname(CHECKSUMS_PATH))
60
+ writeFileSync(CHECKSUMS_PATH, JSON.stringify(store, null, 2), 'utf-8')
61
+ }
62
+
63
+ // ─── ingest helpers ──────────────────────────────────────────────────────────────
64
+
65
+ const jobStore = new IngestJobStore(VAULT_ROOT)
66
+
67
+ async function ingestArxiv(value: string, category: string) {
68
+ const id = parseArxivId(value)
69
+ if (!id) throw new Error(`Invalid ArXiv ID: ${value}`)
70
+
71
+ const job = await jobStore.createJob({ source: 'arxiv', value: id, category })
72
+ await jobStore.updateJob(job.jobId, { status: 'fetching' })
73
+
74
+ const metadata = await fetchArxivMetadata(id)
75
+ metadata.arxivId = id
76
+
77
+ const metaPath = join(RAW_DIR, category, `arxiv-${id}.meta.json`)
78
+ ensureDir(dirname(metaPath))
79
+ writeFileSync(metaPath, JSON.stringify(metadata, null, 2), 'utf-8')
80
+
81
+ const hash = await computeChecksum(metaPath)
82
+ const checksums = loadChecksums()
83
+ checksums[metaPath] = { sha256: hash, writtenAt: new Date().toISOString() }
84
+ saveChecksums(checksums)
85
+
86
+ await jobStore.updateJob(job.jobId, { status: 'queued', rawPath: metaPath, metadata })
87
+ return job
88
+ }
89
+
90
+ async function ingestUrl(value: string, category: string) {
91
+ const job = await jobStore.createJob({ source: 'url', value, category })
92
+ await jobStore.updateJob(job.jobId, { status: 'fetching' })
93
+
94
+ ;(async () => {
95
+ try {
96
+ const text = await fetchHtml(value)
97
+ const safeName = value.replace(/[^a-z0-9]/gi, '_').slice(0, 64)
98
+ const rawPath = join(RAW_DIR, category, `${Date.now()}--${safeName}.html`)
99
+ ensureDir(dirname(rawPath))
100
+ writeFileSync(rawPath, text, 'utf-8')
101
+
102
+ const hash = await computeChecksum(rawPath)
103
+ const checksums = loadChecksums()
104
+ checksums[rawPath] = { sha256: hash, writtenAt: new Date().toISOString() }
105
+ saveChecksums(checksums)
106
+
107
+ await jobStore.updateJob(job.jobId, { status: 'queued', rawPath })
108
+ } catch (e: unknown) {
109
+ await jobStore.updateJob(job.jobId, { status: 'failed', error: e instanceof Error ? e.message : String(e) })
110
+ }
111
+ })()
112
+
113
+ return job
114
+ }
115
+
116
+ async function ingestFile(value: string, category: string) {
117
+ if (!existsSync(value)) throw new Error(`File not found: ${value}`)
118
+ const job = await jobStore.createJob({ source: 'file', value, category })
119
+ const destDir = join(RAW_DIR, category)
120
+ ensureDir(destDir)
121
+ const destPath = join(destDir, `${Date.now()}--${basename(value)}`)
122
+ const content = readFileSync(value)
123
+ writeFileSync(destPath, content)
124
+
125
+ const hash = await computeChecksum(destPath)
126
+ const checksums = loadChecksums()
127
+ checksums[destPath] = { sha256: hash, writtenAt: new Date().toISOString() }
128
+ saveChecksums(checksums)
129
+
130
+ await jobStore.updateJob(job.jobId, { status: 'queued', rawPath: destPath })
131
+ return job
132
+ }
133
+
134
+ // ─── vault_note_save ──────────────────────────────────────────────────────────
135
+
136
+ async function saveNote(input: NoteSaveInput) {
137
+ const safeTitle = input.title.replace(/[^a-z0-9]/gi, '-').slice(0, 32)
138
+ const id = `${Date.now()}--${safeTitle}`
139
+ const filePath = safePath(KNOWLEDGE_DIR, join(input.category, `${id}.md`))
140
+ ensureDir(dirname(filePath))
141
+ const content = `# ${input.title}\n\n${input.content}\n`
142
+ writeFileSync(filePath, content, 'utf-8')
143
+
144
+ const scores = loadDecayScores()
145
+ scores[id] = {
146
+ itemId: id, score: 0.5, lastAccess: new Date().toISOString(),
147
+ accessCount: 0, summaryLevel: input.summaryLevel ?? 'none',
148
+ nextReviewAt: new Date().toISOString(), difficulty: 0.5
149
+ }
150
+ saveDecayScores(scores)
151
+
152
+ const hash = await computeChecksum(filePath)
153
+ const checksums = loadChecksums()
154
+ checksums[filePath] = { sha256: hash, writtenAt: new Date().toISOString() }
155
+ saveChecksums(checksums)
156
+
157
+ return { id, path: filePath, writtenAt: new Date().toISOString() }
158
+ }
159
+
160
+ // ─── vault_get ────────────────────────────────────────────────────────────────
161
+
162
+ function getEntry(input: VaultGetInput) {
163
+ let filePath: string
164
+
165
+ if (input.path) {
166
+ filePath = safePath(VAULT_ROOT, input.path)
167
+ } else if (input.id) {
168
+ const entry = scanKnowledge().find(e => normalizeId(e.id) === normalizeId(input.id!))
169
+ if (!entry) throw new Error(`Entry not found: ${input.id}`)
170
+ filePath = entry.path
171
+ } else {
172
+ throw new Error('id or path required')
173
+ }
174
+
175
+ const content = readFileSync(filePath, 'utf-8')
176
+ const s = statSync(filePath)
177
+ const relPath = filePath.replace(VAULT_ROOT + '/', '')
178
+
179
+ return {
180
+ id: normalizeId(basename(filePath)),
181
+ title: (content.match(/^#\s+(.+)/m)?.[1]) ?? normalizeId(basename(filePath)),
182
+ category: relPath.includes('/') ? relPath.split('/').slice(0, -1).join('/') : '',
183
+ content,
184
+ modified: s.mtime.toISOString(),
185
+ size: s.size
186
+ }
187
+ }
188
+
189
+ // ─── vault_delete ─────────────────────────────────────────────────────────────
190
+
191
+ function deleteEntry(input: VaultDeleteInput) {
192
+ let filePath: string
193
+
194
+ if (input.path) {
195
+ filePath = safePath(VAULT_ROOT, input.path)
196
+ } else if (input.id) {
197
+ const entry = scanKnowledge().find(e => normalizeId(e.id) === normalizeId(input.id!))
198
+ if (!entry) throw new Error(`Entry not found: ${input.id}`)
199
+ filePath = entry.path
200
+ } else {
201
+ throw new Error('id or path required')
202
+ }
203
+
204
+ unlinkSync(filePath)
205
+
206
+ const id = normalizeId(basename(filePath))
207
+ const scores = loadDecayScores()
208
+ delete scores[id]
209
+ saveDecayScores(scores)
210
+
211
+ const checksums = loadChecksums()
212
+ delete checksums[filePath]
213
+ saveChecksums(checksums)
214
+
215
+ return { deleted: true, path: filePath }
216
+ }
217
+
218
+ // ─── scanKnowledge ───────────────────────────────────────────────────────────
219
+
220
+ function scanKnowledge(): VaultEntry[] {
221
+ const entries: VaultEntry[] = []
222
+ if (!existsSync(KNOWLEDGE_DIR)) return entries
223
+ try {
224
+ const categories = readdirSync(KNOWLEDGE_DIR)
225
+ for (const cat of categories) {
226
+ if (cat.startsWith('_')) continue
227
+ const catPath = join(KNOWLEDGE_DIR, cat)
228
+ if (!existsSync(catPath) || !statSync(catPath).isDirectory()) continue
229
+ try {
230
+ const files = readdirSync(catPath).filter((f: string) => f.endsWith('.md'))
231
+ for (const file of files) {
232
+ const fp = join(catPath, file)
233
+ const s = statSync(fp)
234
+ entries.push({
235
+ id: normalizeId(file),
236
+ title: normalizeId(file),
237
+ category: cat,
238
+ path: fp,
239
+ modified: s.mtime.toISOString(),
240
+ size: s.size
241
+ })
242
+ }
243
+ } catch {}
244
+ }
245
+ } catch {}
246
+ return entries
247
+ }
248
+
249
+ // ─── Tool Definitions ──────────────────────────────────────────────────────────
250
+
251
+ export const vaultWriteTools = [
252
+ {
253
+ name: 'vault_raw_ingest',
254
+ description: 'Fire-and-forget ingest of URL/file/ArXiv to raw vault layer. Returns jobId for async progress polling.',
255
+ inputSchema: {
256
+ type: 'object',
257
+ properties: {
258
+ source: { type: 'string', enum: ['url', 'file', 'arxiv'] },
259
+ value: { type: 'string', description: 'URL / absolute file path / ArXiv ID or URL' },
260
+ category: { type: 'string', description: 'raw/ subdirectory, default "inbox"' },
261
+ priority: { type: 'string', enum: ['high', 'low'], default: 'low' },
262
+ arxivMetadata: { type: 'boolean', description: 'ArXiv: fetch metadata before storing, default true' }
263
+ },
264
+ required: ['source', 'value']
265
+ },
266
+ call: async (args: RawIngestInput) => {
267
+ try {
268
+ const category = args.category ?? 'inbox'
269
+ let job
270
+ if (args.source === 'arxiv') {
271
+ job = await ingestArxiv(args.value, category)
272
+ } else if (args.source === 'url') {
273
+ job = await ingestUrl(args.value, category)
274
+ } else {
275
+ job = await ingestFile(args.value, category)
276
+ }
277
+ return { content: [{ type: 'text', text: JSON.stringify(job) }] }
278
+ } catch (e: unknown) {
279
+ return { content: [{ type: 'text', text: e instanceof Error ? e.message : String(e) }], isError: true }
280
+ }
281
+ }
282
+ },
283
+
284
+ {
285
+ name: 'vault_note_save',
286
+ description: 'Write a structured note to the knowledge layer.',
287
+ inputSchema: {
288
+ type: 'object',
289
+ properties: {
290
+ title: { type: 'string' },
291
+ content: { type: 'string' },
292
+ category: { type: 'string' },
293
+ tags: { type: 'array', items: { type: 'string' } },
294
+ summaryLevel: { type: 'string', enum: ['deep', 'shallow', 'none'] }
295
+ },
296
+ required: ['title', 'content', 'category']
297
+ },
298
+ call: async (args: NoteSaveInput) => {
299
+ try {
300
+ const result = await saveNote(args)
301
+ return { content: [{ type: 'text', text: JSON.stringify(result) }] }
302
+ } catch (e: unknown) {
303
+ return { content: [{ type: 'text', text: e instanceof Error ? e.message : String(e) }], isError: true }
304
+ }
305
+ }
306
+ },
307
+
308
+ {
309
+ name: 'vault_get',
310
+ description: 'Read full content of a vault entry by id or path.',
311
+ inputSchema: {
312
+ type: 'object',
313
+ properties: {
314
+ id: { type: 'string' },
315
+ path: { type: 'string' }
316
+ }
317
+ },
318
+ call: async (args: VaultGetInput) => {
319
+ try {
320
+ const result = getEntry(args)
321
+ return { content: [{ type: 'text', text: JSON.stringify(result) }] }
322
+ } catch (e: unknown) {
323
+ return { content: [{ type: 'text', text: e instanceof Error ? e.message : String(e) }], isError: true }
324
+ }
325
+ }
326
+ },
327
+
328
+ {
329
+ name: 'vault_delete',
330
+ description: 'Delete a vault entry (raw or knowledge).',
331
+ inputSchema: {
332
+ type: 'object',
333
+ properties: {
334
+ id: { type: 'string' },
335
+ path: { type: 'string' }
336
+ }
337
+ },
338
+ call: async (args: VaultDeleteInput) => {
339
+ try {
340
+ const result = deleteEntry(args)
341
+ return { content: [{ type: 'text', text: JSON.stringify(result) }] }
342
+ } catch (e: unknown) {
343
+ return { content: [{ type: 'text', text: e instanceof Error ? e.message : String(e) }], isError: true }
344
+ }
345
+ }
346
+ }
347
+ ]