@syndash/research-vault-mcp 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,33 @@
1
- import { readFileSync, writeFileSync, existsSync, statSync, mkdirSync, unlinkSync, realpathSync, readdirSync } from 'fs'
1
+ import { readFileSync, writeFileSync, existsSync, mkdirSync, unlinkSync, realpathSync } from 'fs'
2
2
  import { join, dirname, basename, resolve as pathResolve } from 'path'
3
3
  import { homedir } from 'os'
4
4
  import { IngestJobStore, computeChecksum } from './vault_jobs.js'
5
5
  import { parseArxivId, fetchArxivMetadata } from './ingest/arxiv.js'
6
6
  import { fetchHtml } from './ingest/html.js'
7
- import type { VaultEntry, RawIngestInput, NoteSaveInput, VaultGetInput, VaultDeleteInput, DecayScore } from './types.js'
7
+ import { scanKnowledge } from './vault.js'
8
+ import type { RawIngestInput, NoteSaveInput, VaultDeleteInput, DecayScore } from './types.js'
8
9
 
9
- const VAULT_ROOT = process.env.VAULT_ROOT ?? `${homedir()}/Documents/Evensong/research-vault`
10
- const KNOWLEDGE_DIR = join(VAULT_ROOT, 'knowledge')
11
- const RAW_DIR = join(VAULT_ROOT, 'raw')
12
- const DECAY_PATH = join(VAULT_ROOT, '.meta', 'decay-scores.json')
13
- const CHECKSUMS_PATH = join(VAULT_ROOT, '.meta', 'checksums.json')
10
+ const DEFAULT_VAULT_ROOT = `${homedir()}/Documents/Evensong/research-vault`
11
+
12
+ function getVaultRoot(): string {
13
+ return process.env.VAULT_ROOT ?? DEFAULT_VAULT_ROOT
14
+ }
15
+
16
+ function getKnowledgeDir(): string {
17
+ return join(getVaultRoot(), 'knowledge')
18
+ }
19
+
20
+ function getRawDir(): string {
21
+ return join(getVaultRoot(), 'raw')
22
+ }
23
+
24
+ function getDecayPath(): string {
25
+ return join(getVaultRoot(), '.meta', 'decay-scores.json')
26
+ }
27
+
28
+ function getChecksumsPath(): string {
29
+ return join(getVaultRoot(), '.meta', 'checksums.json')
30
+ }
14
31
 
15
32
  function ensureDir(p: string) {
16
33
  if (!existsSync(p)) mkdirSync(p, { recursive: true })
@@ -18,16 +35,23 @@ function ensureDir(p: string) {
18
35
 
19
36
  function safePath(root: string, target: string): string {
20
37
  const joined = join(root, target)
38
+ let resolvedRoot: string
39
+ try {
40
+ resolvedRoot = realpathSync(root)
41
+ } catch {
42
+ resolvedRoot = pathResolve(root)
43
+ }
44
+
21
45
  let resolved: string
22
46
  try {
23
47
  resolved = realpathSync(joined)
24
48
  } catch {
25
49
  // Path doesn't exist yet (new file). Use resolve to normalize .. components
26
50
  // and verify the final path stays within root.
27
- resolved = pathResolve(joined)
51
+ resolved = pathResolve(resolvedRoot, target)
28
52
  }
29
53
  // Normalize both to remove trailing slashes for prefix comparison
30
- const rootNorm = root.replace(/\\/g, '/').replace(/\/$/, '')
54
+ const rootNorm = resolvedRoot.replace(/\\/g, '/').replace(/\/$/, '')
31
55
  const resolvedNorm = resolved.replace(/\\/g, '/').replace(/\/$/, '')
32
56
  if (!resolvedNorm.startsWith(rootNorm + '/') && resolvedNorm !== rootNorm) {
33
57
  throw new Error('Path traversal detected: target outside vault root')
@@ -42,39 +66,52 @@ export function normalizeId(raw: string): string {
42
66
  .replace(/\.md$/, '')
43
67
  }
44
68
 
45
- function loadDecayScores(): Record<string, DecayScore> {
46
- try { return JSON.parse(readFileSync(DECAY_PATH, 'utf-8')) } catch { return {} }
69
+ function loadDecayScores(): DecayScore[] {
70
+ try {
71
+ const data = JSON.parse(readFileSync(getDecayPath(), 'utf-8'))
72
+ if (Array.isArray(data)) return data
73
+ if (data && typeof data === 'object') return Object.values(data) as DecayScore[]
74
+ return []
75
+ } catch {
76
+ return []
77
+ }
47
78
  }
48
79
 
49
- function saveDecayScores(scores: Record<string, DecayScore>) {
50
- ensureDir(dirname(DECAY_PATH))
51
- writeFileSync(DECAY_PATH, JSON.stringify(scores, null, 2), 'utf-8')
80
+ function saveDecayScores(scores: DecayScore[]) {
81
+ const decayPath = getDecayPath()
82
+ ensureDir(dirname(decayPath))
83
+ writeFileSync(decayPath, JSON.stringify(scores, null, 2), 'utf-8')
52
84
  }
53
85
 
54
86
  function loadChecksums(): Record<string, { sha256: string; writtenAt: string }> {
55
- try { return JSON.parse(readFileSync(CHECKSUMS_PATH, 'utf-8')) } catch { return {} }
87
+ try { return JSON.parse(readFileSync(getChecksumsPath(), 'utf-8')) } catch { return {} }
56
88
  }
57
89
 
58
90
  function saveChecksums(store: Record<string, { sha256: string; writtenAt: string }>) {
59
- ensureDir(dirname(CHECKSUMS_PATH))
60
- writeFileSync(CHECKSUMS_PATH, JSON.stringify(store, null, 2), 'utf-8')
91
+ const checksumsPath = getChecksumsPath()
92
+ ensureDir(dirname(checksumsPath))
93
+ writeFileSync(checksumsPath, JSON.stringify(store, null, 2), 'utf-8')
61
94
  }
62
95
 
63
96
  // ─── ingest helpers ──────────────────────────────────────────────────────────────
64
97
 
65
- const jobStore = new IngestJobStore(VAULT_ROOT)
98
+ function getJobStore(): IngestJobStore {
99
+ return new IngestJobStore(getVaultRoot())
100
+ }
66
101
 
67
102
  async function ingestArxiv(value: string, category: string) {
68
103
  const id = parseArxivId(value)
69
104
  if (!id) throw new Error(`Invalid ArXiv ID: ${value}`)
70
105
 
106
+ const jobStore = getJobStore()
107
+ const metaPath = safePath(getRawDir(), join(category, `arxiv-${id}.meta.json`))
108
+
71
109
  const job = await jobStore.createJob({ source: 'arxiv', value: id, category })
72
110
  await jobStore.updateJob(job.jobId, { status: 'fetching' })
73
111
 
74
112
  const metadata = await fetchArxivMetadata(id)
75
113
  metadata.arxivId = id
76
114
 
77
- const metaPath = join(RAW_DIR, category, `arxiv-${id}.meta.json`)
78
115
  ensureDir(dirname(metaPath))
79
116
  writeFileSync(metaPath, JSON.stringify(metadata, null, 2), 'utf-8')
80
117
 
@@ -88,6 +125,10 @@ async function ingestArxiv(value: string, category: string) {
88
125
  }
89
126
 
90
127
  async function ingestUrl(value: string, category: string) {
128
+ const rawDir = getRawDir()
129
+ safePath(rawDir, category)
130
+
131
+ const jobStore = getJobStore()
91
132
  const job = await jobStore.createJob({ source: 'url', value, category })
92
133
  await jobStore.updateJob(job.jobId, { status: 'fetching' })
93
134
 
@@ -95,7 +136,7 @@ async function ingestUrl(value: string, category: string) {
95
136
  try {
96
137
  const text = await fetchHtml(value)
97
138
  const safeName = value.replace(/[^a-z0-9]/gi, '_').slice(0, 64)
98
- const rawPath = join(RAW_DIR, category, `${Date.now()}--${safeName}.html`)
139
+ const rawPath = safePath(rawDir, join(category, `${Date.now()}--${safeName}.md`))
99
140
  ensureDir(dirname(rawPath))
100
141
  writeFileSync(rawPath, text, 'utf-8')
101
142
 
@@ -115,10 +156,14 @@ async function ingestUrl(value: string, category: string) {
115
156
 
116
157
  async function ingestFile(value: string, category: string) {
117
158
  if (!existsSync(value)) throw new Error(`File not found: ${value}`)
159
+
160
+ const rawDir = getRawDir()
161
+ safePath(rawDir, category)
162
+
163
+ const jobStore = getJobStore()
118
164
  const job = await jobStore.createJob({ source: 'file', value, category })
119
- const destDir = join(RAW_DIR, category)
120
- ensureDir(destDir)
121
- const destPath = join(destDir, `${Date.now()}--${basename(value)}`)
165
+ const destPath = safePath(rawDir, join(category, `${Date.now()}--${basename(value)}`))
166
+ ensureDir(dirname(destPath))
122
167
  const content = readFileSync(value)
123
168
  writeFileSync(destPath, content)
124
169
 
@@ -136,18 +181,19 @@ async function ingestFile(value: string, category: string) {
136
181
  async function saveNote(input: NoteSaveInput) {
137
182
  const safeTitle = input.title.replace(/[^a-z0-9]/gi, '-').slice(0, 32)
138
183
  const id = `${Date.now()}--${safeTitle}`
139
- const filePath = safePath(KNOWLEDGE_DIR, join(input.category, `${id}.md`))
184
+ const filePath = safePath(getKnowledgeDir(), join(input.category, `${id}.md`))
140
185
  ensureDir(dirname(filePath))
141
186
  const content = `# ${input.title}\n\n${input.content}\n`
142
187
  writeFileSync(filePath, content, 'utf-8')
143
188
 
144
189
  const scores = loadDecayScores()
145
- scores[id] = {
190
+ const filtered = scores.filter(s => normalizeId(s.itemId) !== normalizeId(id))
191
+ filtered.push({
146
192
  itemId: id, score: 0.5, lastAccess: new Date().toISOString(),
147
193
  accessCount: 0, summaryLevel: input.summaryLevel ?? 'none',
148
194
  nextReviewAt: new Date().toISOString(), difficulty: 0.5
149
- }
150
- saveDecayScores(scores)
195
+ })
196
+ saveDecayScores(filtered)
151
197
 
152
198
  const hash = await computeChecksum(filePath)
153
199
  const checksums = loadChecksums()
@@ -157,42 +203,13 @@ async function saveNote(input: NoteSaveInput) {
157
203
  return { id, path: filePath, writtenAt: new Date().toISOString() }
158
204
  }
159
205
 
160
- // ─── vault_get ────────────────────────────────────────────────────────────────
161
-
162
- function getEntry(input: VaultGetInput) {
163
- let filePath: string
164
-
165
- if (input.path) {
166
- filePath = safePath(VAULT_ROOT, input.path)
167
- } else if (input.id) {
168
- const entry = scanKnowledge().find(e => normalizeId(e.id) === normalizeId(input.id!))
169
- if (!entry) throw new Error(`Entry not found: ${input.id}`)
170
- filePath = entry.path
171
- } else {
172
- throw new Error('id or path required')
173
- }
174
-
175
- const content = readFileSync(filePath, 'utf-8')
176
- const s = statSync(filePath)
177
- const relPath = filePath.replace(VAULT_ROOT + '/', '')
178
-
179
- return {
180
- id: normalizeId(basename(filePath)),
181
- title: (content.match(/^#\s+(.+)/m)?.[1]) ?? normalizeId(basename(filePath)),
182
- category: relPath.includes('/') ? relPath.split('/').slice(0, -1).join('/') : '',
183
- content,
184
- modified: s.mtime.toISOString(),
185
- size: s.size
186
- }
187
- }
188
-
189
206
  // ─── vault_delete ─────────────────────────────────────────────────────────────
190
207
 
191
208
  function deleteEntry(input: VaultDeleteInput) {
192
209
  let filePath: string
193
210
 
194
211
  if (input.path) {
195
- filePath = safePath(VAULT_ROOT, input.path)
212
+ filePath = safePath(getVaultRoot(), input.path)
196
213
  } else if (input.id) {
197
214
  const entry = scanKnowledge().find(e => normalizeId(e.id) === normalizeId(input.id!))
198
215
  if (!entry) throw new Error(`Entry not found: ${input.id}`)
@@ -205,8 +222,8 @@ function deleteEntry(input: VaultDeleteInput) {
205
222
 
206
223
  const id = normalizeId(basename(filePath))
207
224
  const scores = loadDecayScores()
208
- delete scores[id]
209
- saveDecayScores(scores)
225
+ const filtered = scores.filter(s => normalizeId(s.itemId) !== normalizeId(id))
226
+ saveDecayScores(filtered)
210
227
 
211
228
  const checksums = loadChecksums()
212
229
  delete checksums[filePath]
@@ -215,37 +232,6 @@ function deleteEntry(input: VaultDeleteInput) {
215
232
  return { deleted: true, path: filePath }
216
233
  }
217
234
 
218
- // ─── scanKnowledge ───────────────────────────────────────────────────────────
219
-
220
- function scanKnowledge(): VaultEntry[] {
221
- const entries: VaultEntry[] = []
222
- if (!existsSync(KNOWLEDGE_DIR)) return entries
223
- try {
224
- const categories = readdirSync(KNOWLEDGE_DIR)
225
- for (const cat of categories) {
226
- if (cat.startsWith('_')) continue
227
- const catPath = join(KNOWLEDGE_DIR, cat)
228
- if (!existsSync(catPath) || !statSync(catPath).isDirectory()) continue
229
- try {
230
- const files = readdirSync(catPath).filter((f: string) => f.endsWith('.md'))
231
- for (const file of files) {
232
- const fp = join(catPath, file)
233
- const s = statSync(fp)
234
- entries.push({
235
- id: normalizeId(file),
236
- title: normalizeId(file),
237
- category: cat,
238
- path: fp,
239
- modified: s.mtime.toISOString(),
240
- size: s.size
241
- })
242
- }
243
- } catch {}
244
- }
245
- } catch {}
246
- return entries
247
- }
248
-
249
235
  // ─── Tool Definitions ──────────────────────────────────────────────────────────
250
236
 
251
237
  export const vaultWriteTools = [
@@ -257,7 +243,7 @@ export const vaultWriteTools = [
257
243
  properties: {
258
244
  source: { type: 'string', enum: ['url', 'file', 'arxiv'] },
259
245
  value: { type: 'string', description: 'URL / absolute file path / ArXiv ID or URL' },
260
- category: { type: 'string', description: 'raw/ subdirectory, default "inbox"' },
246
+ category: { type: 'string', description: 'raw/ subdirectory, default "_inbox"' },
261
247
  priority: { type: 'string', enum: ['high', 'low'], default: 'low' },
262
248
  arxivMetadata: { type: 'boolean', description: 'ArXiv: fetch metadata before storing, default true' }
263
249
  },
@@ -265,7 +251,7 @@ export const vaultWriteTools = [
265
251
  },
266
252
  call: async (args: RawIngestInput) => {
267
253
  try {
268
- const category = args.category ?? 'inbox'
254
+ const category = args.category ?? '_inbox'
269
255
  let job
270
256
  if (args.source === 'arxiv') {
271
257
  job = await ingestArxiv(args.value, category)
@@ -305,26 +291,6 @@ export const vaultWriteTools = [
305
291
  }
306
292
  },
307
293
 
308
- {
309
- name: 'vault_get',
310
- description: 'Read full content of a vault entry by id or path.',
311
- inputSchema: {
312
- type: 'object',
313
- properties: {
314
- id: { type: 'string' },
315
- path: { type: 'string' }
316
- }
317
- },
318
- call: async (args: VaultGetInput) => {
319
- try {
320
- const result = getEntry(args)
321
- return { content: [{ type: 'text', text: JSON.stringify(result) }] }
322
- } catch (e: unknown) {
323
- return { content: [{ type: 'text', text: e instanceof Error ? e.message : String(e) }], isError: true }
324
- }
325
- }
326
- },
327
-
328
294
  {
329
295
  name: 'vault_delete',
330
296
  description: 'Delete a vault entry (raw or knowledge).',