npm - @syndash/research-vault-mcp - Versions diffs - 1.1.0 - Mend

@syndash/research-vault-mcp 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +75 -0
package/bin/research-vault-mcp.mjs +49 -0
package/package.json +46 -0
package/src/amplify.ts +245 -0
package/src/ingest/arxiv.ts +64 -0
package/src/ingest/html.ts +46 -0
package/src/ingest/pdf.ts +30 -0
package/src/server.ts +301 -0
package/src/types.ts +77 -0
package/src/vault.ts +310 -0
package/src/vault_jobs.ts +88 -0
package/src/vault_write.ts +347 -0

package/src/server.ts ADDED Viewed

@@ -0,0 +1,301 @@
+// Research Vault MCP Server — Standard MCP SSE Transport
+// MCP Protocol: JSON-RPC 2.0 over SSE (server→client) + HTTP POST (client→server)
+//
+// Flow:
+//   1. Client connects GET /sse
+//   2. Server sends: event: endpoint\ndata: /messages?sessionId=<uuid>
+//   3. Client POSTs JSON-RPC to /messages?sessionId=<uuid>
+//   4. Server sends JSON-RPC response via SSE: event: message\ndata: {...}
+import { vaultTools } from './vault'
+import { vaultWriteTools } from './vault_write.js'
+import { amplifyTools, configureAmplify } from './amplify'
+const HOST = '0.0.0.0'
+const TRANSPORT = process.env.MCP_TRANSPORT ?? 'sse'
+const PORT = parseInt(process.env.MCP_PORT ?? '8765')
+// ─── MCP Protocol Types ──────────────────────────────────────────────────────
+interface MCPRequest {
+  jsonrpc: '2.0'
+  id?: string | number
+  method: string
+  params?: any
+}
+interface MCPResponse {
+  jsonrpc: '2.0'
+  id?: string | number
+  result?: any
+  error?: { code: number; message: string; data?: any }
+}
+interface Tool {
+  name: string
+  description: string
+  inputSchema: any
+  call: (params: any) => Promise<{ content: Array<{type: string; text: string}>; isError?: boolean }>
+}
+// ─── State ───────────────────────────────────────────────────────────────────
+const allTools: Tool[] = [
+  ...vaultTools,
+  ...vaultWriteTools,
+  ...amplifyTools
+]
+const toolMap = new Map(allTools.map(t => [t.name, t]))
+// Session management: sessionId → SSE writer
+interface Session {
+  send: (data: string) => void
+  heartbeat: ReturnType<typeof setInterval>
+}
+const sessions = new Map<string, Session>()
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+function makeResponse(id: string | number | undefined, result?: any, error?: any): MCPResponse {
+  return { jsonrpc: '2.0', id, result, error }
+}
+function generateSessionId(): string {
+  return crypto.randomUUID()
+}
+// ─── MCP Handlers ─────────────────────────────────────────────────────────────
+async function handleRequest(req: MCPRequest): Promise<MCPResponse | null> {
+  const { method, id, params } = req
+  // ── notifications (no id = no response expected)
+  if (method === 'notifications/initialized' || method === 'notifications/cancelled') {
+    return null
+  }
+  // ── initialize
+  if (method === 'initialize') {
+    return makeResponse(id, {
+      protocolVersion: '2024-11-05',
+      capabilities: {
+        tools: { listChanged: false },
+      },
+      serverInfo: {
+        name: 'research-vault-mcp',
+        version: '1.0.0'
+      }
+    })
+  }
+  // ── tools/list
+  if (method === 'tools/list') {
+    return makeResponse(id, {
+      tools: allTools.map(t => ({
+        name: t.name,
+        description: t.description,
+        inputSchema: t.inputSchema
+      }))
+    })
+  }
+  // ── tools/call
+  if (method === 'tools/call') {
+    const { name, arguments: args } = params
+    console.error('[DEBUG] tools/call:', name, JSON.stringify(args))
+    const tool = toolMap.get(name)
+    if (!tool) {
+      return makeResponse(id, undefined, { code: -32602, message: `Unknown tool: ${name}` })
+    }
+    try {
+      const result = await tool.call(args || {})
+      return makeResponse(id, { content: result.content, isError: result.isError })
+    } catch (e: any) {
+      return makeResponse(id, undefined, { code: -32603, message: `Tool error: ${e.message}` })
+    }
+  }
+  // ── ping
+  if (method === 'ping') {
+    return makeResponse(id, {})
+  }
+  return makeResponse(id, undefined, { code: -32601, message: `Method not found: ${method}` })
+}
+// ─── STDIO Transport ──────────────────────────────────────────────────────────
+async function handleStdioTransport() {
+  const rl = await import('readline')
+  const rli = rl.createInterface({ input: process.stdin as any, crlfDelay: Infinity })
+  const writer = Bun.stdout.writer()
+  const send = (obj: MCPResponse) => {
+    writer.write(JSON.stringify(obj) + '\n')
+    writer.flush()
+  }
+  for await (const line of rli) {
+    if (!line.trim()) continue
+    try {
+      const req = JSON.parse(line) as MCPRequest
+      const result = await handleRequest(req)
+      if (result) send(result)
+    } catch (e: unknown) {
+      send({ jsonrpc: '2.0', error: { code: -32700, message: `Parse error: ${e instanceof Error ? e.message : String(e)}` } })
+    }
+  }
+}
+// ─── HTTP Server ──────────────────────────────────────────────────────────────
+const server = Bun.serve({
+  port: PORT,
+  hostname: HOST,
+  async fetch(req: Request): Promise<Response> {
+    const url = new URL(req.url)
+    // ── GET /sse — MCP SSE Transport: establish SSE stream + send endpoint
+    if (url.pathname === '/sse' && req.method === 'GET') {
+      const sessionId = generateSessionId()
+      const stream = new ReadableStream({
+        start(controller) {
+          const encoder = new TextEncoder()
+          const send = (data: string) => {
+            try { controller.enqueue(encoder.encode(data)) } catch {}
+          }
+          // Step 1: Send the endpoint event (MCP SSE spec requirement)
+          send(`event: endpoint\ndata: /messages?sessionId=${sessionId}\n\n`)
+          // Heartbeat every 15s
+          const heartbeat = setInterval(() => {
+            try {
+              controller.enqueue(encoder.encode(`: heartbeat\n\n`))
+            } catch {
+              clearInterval(heartbeat)
+              sessions.delete(sessionId)
+            }
+          }, 15000)
+          // Register session
+          sessions.set(sessionId, { send, heartbeat })
+          console.error(`[SSE] Session ${sessionId} connected`)
+          req.signal.addEventListener('abort', () => {
+            clearInterval(heartbeat)
+            sessions.delete(sessionId)
+            console.error(`[SSE] Session ${sessionId} disconnected`)
+          })
+        }
+      })
+      return new Response(stream, {
+        status: 200,
+        headers: {
+          'Content-Type': 'text/event-stream',
+          'Cache-Control': 'no-cache',
+          'Connection': 'keep-alive',
+          'X-Accel-Buffering': 'no'
+        }
+      })
+    }
+    // ── POST /messages?sessionId=xxx — MCP SSE Transport: receive JSON-RPC, respond via SSE
+    if (url.pathname === '/messages' && req.method === 'POST') {
+      const sessionId = url.searchParams.get('sessionId')
+      if (!sessionId || !sessions.has(sessionId)) {
+        return Response.json(
+          { error: 'Invalid or missing sessionId' },
+          { status: 400 }
+        )
+      }
+      const session = sessions.get(sessionId)!
+      try {
+        const body = await req.json() as MCPRequest
+        const result = await handleRequest(body)
+        // Send response via SSE stream (MCP SSE spec)
+        if (result) {
+          session.send(`event: message\ndata: ${JSON.stringify(result)}\n\n`)
+        }
+        // Return 202 Accepted (MCP SSE spec: POST returns 202, response goes via SSE)
+        return new Response(null, { status: 202 })
+      } catch (e: any) {
+        return Response.json(
+          { jsonrpc: '2.0', error: { code: -32700, message: `Parse error: ${e.message}` } },
+          { status: 400 }
+        )
+      }
+    }
+    // ── GET /health
+    if (url.pathname === '/health' && req.method === 'GET') {
+      return Response.json({
+        status: 'ok',
+        tools: allTools.length,
+        vault_tools: vaultTools.length,
+        amplify_tools: amplifyTools.length,
+        sse_sessions: sessions.size,
+        uptime: process.uptime()
+      })
+    }
+    // ── POST /configure — set Amplify API key
+    if (url.pathname === '/configure' && req.method === 'POST') {
+      try {
+        const { apiKey } = await req.json() as { apiKey: string }
+        if (!apiKey) throw new Error('apiKey required')
+        configureAmplify(apiKey)
+        return Response.json({ status: 'configured' })
+      } catch (e: any) {
+        return Response.json({ error: e.message }, { status: 400 })
+      }
+    }
+    // ── 404
+    return Response.json({ error: 'Not found' }, { status: 404 })
+  }
+})
+// ─── Startup ─────────────────────────────────────────────────────────────────
+if (TRANSPORT === 'stdio') {
+  console.error('[MCP] Running in stdio mode (stdin/stdout JSON-RPC)')
+  await handleStdioTransport()
+  process.exit(0)
+} else {
+  console.log(`
+╔══════════════════════════════════════════════════════╗
+║   Research Vault MCP Server — MCP SSE Transport     ║
+╠══════════════════════════════════════════════════════╣
+║  SSE:       http://${HOST}:${PORT}/sse                ║
+║  Messages:  http://${HOST}:${PORT}/messages          ║
+║  Health:    http://${HOST}:${PORT}/health            ║
+╠══════════════════════════════════════════════════════╣
+║  Tools:     ${String(allTools.length).padEnd(3)} (${vaultTools.length} vault, ${amplifyTools.length} amplify)     ║
+╚══════════════════════════════════════════════════════╝
+`)
+}
+// ─── Graceful Shutdown ───────────────────────────────────────────────────────
+process.on('SIGINT', () => {
+  console.log('\nShutting down...')
+  for (const [id, session] of sessions) {
+    clearInterval(session.heartbeat)
+  }
+  sessions.clear()
+  server.stop()
+  process.exit(0)
+})

package/src/types.ts ADDED Viewed

@@ -0,0 +1,77 @@
+// packages/research-vault-mcp/src/types.ts
+export interface VaultEntry {
+  id: string
+  title: string
+  category: string
+  path: string
+  modified: string
+  size: number
+}
+export interface DecayScore {
+  itemId: string
+  score: number
+  lastAccess: string
+  accessCount: number
+  summaryLevel: 'deep' | 'shallow' | 'none'
+  nextReviewAt: string
+  difficulty: number
+}
+// ─── Ingest Job Types ───────────────────────────────────────────
+export type IngestStatus = 'queued' | 'fetching' | 'parsing' | 'done' | 'failed'
+export interface IngestJob {
+  jobId: string
+  source: 'url' | 'file' | 'arxiv'
+  value: string
+  category: string
+  status: IngestStatus
+  rawPath: string | null
+  metadata: ArxivMetadata | null
+  error?: string
+  createdAt: string
+  updatedAt: string
+}
+export interface ArxivMetadata {
+  title: string | null
+  authors: string[] | null
+  abstract: string | null
+  arxivId: string | null
+  categories: string[] | null
+}
+// ─── Tool Input/Output Types ───────────────────────────────────
+export interface RawIngestInput {
+  source: 'url' | 'file' | 'arxiv'
+  value: string
+  category?: string   // defaults to "inbox"
+  priority?: 'high' | 'low'
+  arxivMetadata?: boolean  // ArXiv only: prefetch metadata before storing, default true
+}
+export interface NoteSaveInput {
+  title: string
+  content: string
+  category: string
+  tags?: string[]
+  summaryLevel?: 'deep' | 'shallow' | 'none'
+}
+export interface VaultGetInput {
+  id?: string
+  path?: string
+}
+export interface VaultDeleteInput {
+  id?: string
+  path?: string
+}
+// ─── Checksum Types ────────────────────────────────────────────
+export type ChecksumStore = Record<string, { sha256: string; writtenAt: string }>

package/src/vault.ts ADDED Viewed

@@ -0,0 +1,310 @@
+// Research Vault MCP Tools
+// Resolves vault root via env override, else defaults to the actual data location.
+// After Phase 07 T3, CCR/research-vault is a submodule of ds-research-vault.
+import { readFileSync, readdirSync, existsSync, statSync } from 'fs'
+import { join, basename } from 'path'
+import { homedir } from 'os'
+const VAULT_ROOT = process.env.VAULT_ROOT ?? `${homedir()}/Documents/Evensong/research-vault`
+const KNOWLEDGE_DIR = join(VAULT_ROOT, 'knowledge')
+const RAW_DIR = join(VAULT_ROOT, 'raw')
+const DECAY_PATH = join(VAULT_ROOT, '.meta', 'decay-scores.json')
+const TAXONOMY_PATH = join(VAULT_ROOT, 'knowledge', '_taxonomy.md')
+// ─── Types ───────────────────────────────────────────────────────────────────
+interface VaultEntry {
+  id: string
+  title: string
+  category: string
+  path: string
+  modified: string
+  size: number
+}
+interface DecayScore {
+  itemId: string
+  score: number
+  lastAccess: string
+  accessCount: number
+  summaryLevel: 'deep' | 'shallow' | 'none'
+  nextReviewAt: string
+  difficulty: number
+}
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+function normalizeId(raw: string): string {
+  return raw
+    .replace(/^\d{8}--?\d{4}-/, '')
+    .replace(/^(\d{10,})--?/, '')
+    .replace(/\.md$/, '')
+}
+function loadDecayScores(): DecayScore[] {
+  try {
+    return JSON.parse(readFileSync(DECAY_PATH, 'utf-8'))
+  } catch {
+    return []
+  }
+}
+function loadTaxonomy(): string {
+  try {
+    return readFileSync(TAXONOMY_PATH, 'utf-8')
+  } catch {
+    return ''
+  }
+}
+function loadFileMeta(filePath: string): { title: string; modified: string; size: number } {
+  try {
+    const content = readFileSync(filePath, 'utf-8')
+    const lines = content.split('\n')
+    let title = ''
+    for (const line of lines.slice(0, 30)) {
+      const m = line.match(/^#\s+(.+)/)
+      if (m) { title = m[1]; break }
+    }
+    const s = statSync(filePath)
+    return {
+      title: title || normalizeId(basename(filePath)),
+      modified: s.mtime.toISOString(),
+      size: s.size
+    }
+  } catch {
+    return { title: normalizeId(basename(filePath)), modified: '', size: 0 }
+  }
+}
+function scanKnowledge(): VaultEntry[] {
+  const entries: VaultEntry[] = []
+  if (!existsSync(KNOWLEDGE_DIR)) return entries
+  const categories = readdirSync(KNOWLEDGE_DIR)
+  for (const cat of categories) {
+    if (cat.startsWith('_')) continue
+    const catPath = join(KNOWLEDGE_DIR, cat)
+    if (!existsSync(catPath) || !statSync(catPath).isDirectory()) continue
+    const subEntries = readdirSync(catPath)
+    for (const sub of subEntries) {
+      const subPath = join(catPath, sub)
+      const subStat = statSync(subPath)
+      if (subStat.isDirectory()) {
+        const files = readdirSync(subPath).filter(f => f.endsWith('.md'))
+        for (const file of files) {
+          const fp = join(subPath, file)
+          const meta = loadFileMeta(fp)
+          entries.push({
+            id: normalizeId(file),
+            title: meta.title,
+            category: `${cat}/${sub}`,
+            path: fp,
+            modified: meta.modified,
+            size: meta.size
+          })
+        }
+      } else if (sub.endsWith('.md')) {
+        const meta = loadFileMeta(subPath)
+        entries.push({
+          id: normalizeId(sub),
+          title: meta.title,
+          category: cat,
+          path: subPath,
+          modified: meta.modified,
+          size: meta.size
+        })
+      }
+    }
+  }
+  return entries
+}
+function scanRaw(): string[] {
+  const pending: string[] = []
+  if (!existsSync(RAW_DIR)) return pending
+  try {
+    const entries = readdirSync(RAW_DIR)
+    for (const entry of entries) {
+      if (entry === '_inbox') {
+        const inbox = join(RAW_DIR, entry)
+        if (existsSync(inbox)) {
+          pending.push(...readdirSync(inbox).filter(f => /\.(md|pdf|txt)$/.test(f)))
+        }
+      } else if (/^\d{4}-\d{2}$/.test(entry)) {
+        const monthDir = join(RAW_DIR, entry)
+        if (existsSync(monthDir)) {
+          pending.push(
+            ...readdirSync(monthDir)
+              .filter(f => /\.(md|pdf|txt)$/.test(f))
+              .map(f => `${entry}/${f}`)
+          )
+        }
+      }
+    }
+  } catch {}
+  return pending
+}
+// ─── MCP Tools ───────────────────────────────────────────────────────────────
+const vaultTools = [
+  {
+    name: 'vault_search',
+    description: 'Search the Research Vault knowledge base. Returns analyzed papers with retention scores.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        query: { type: 'string', description: 'Search query (matches title, category)' },
+        category: { type: 'string', description: 'Filter by category (e.g., "ai-agents/benchmarking")' },
+        limit: { type: 'number', description: 'Max results (default 10)' }
+      }
+    },
+    call: async ({ query, category, limit = 10 }: { query?: string; category?: string; limit?: number }) => {
+      let items = scanKnowledge()
+      const scores = loadDecayScores()
+      const scoreMap = new Map(scores.map(s => [normalizeId(s.itemId), s]))
+      if (category) {
+        items = items.filter(item =>
+          item.category === category || item.category.startsWith(category + '/')
+        )
+      }
+      if (query) {
+        const q = query.toLowerCase()
+        items = items.filter(item =>
+          item.title.toLowerCase().includes(q) ||
+          item.id.toLowerCase().includes(q) ||
+          item.category.toLowerCase().includes(q)
+        )
+      }
+      const results = items.slice(0, limit).map(item => {
+        const sid = item.id.replace(/--/g, '-')
+        const score = scoreMap.get(item.id) || scoreMap.get(sid)
+        return {
+          id: item.id,
+          title: item.title,
+          category: item.category,
+          score: score?.score ?? null,
+          summaryLevel: score?.summaryLevel ?? null,
+          nextReview: score?.nextReviewAt ?? null,
+          accessCount: score?.accessCount ?? 0,
+          modified: item.modified
+        }
+      })
+      return {
+        content: [{
+          type: 'text',
+          text: JSON.stringify({ query, category, results, total: results.length }, null, 2)
+        }]
+      }
+    }
+  },
+  {
+    name: 'vault_status',
+    description: 'Get Research Vault health — item counts by decay level, top/bottom retention.',
+    inputSchema: { type: 'object', properties: {} },
+    call: async () => {
+      const scores = loadDecayScores()
+      const entries = scanKnowledge()
+      const deep = scores.filter(s => s.summaryLevel === 'deep')
+      const shallow = scores.filter(s => s.summaryLevel === 'shallow')
+      const none = scores.filter(s => s.summaryLevel === 'none')
+      const sorted = [...scores].sort((a, b) => b.score - a.score)
+      const top5 = sorted.slice(0, 5).map(s => {
+        const sid = s.itemId.replace(/--/g, '-')
+        const entry = entries.find(e => normalizeId(e.id) === normalizeId(s.itemId) || normalizeId(e.id) === normalizeId(sid))
+        return { itemId: s.itemId, score: s.score, accesses: s.accessCount, title: entry?.title || s.itemId }
+      })
+      const bottom5 = sorted.slice(-5).reverse().map(s => {
+        const sid = s.itemId.replace(/--/g, '-')
+        const entry = entries.find(e => normalizeId(e.id) === normalizeId(s.itemId) || normalizeId(e.id) === normalizeId(sid))
+        return { itemId: s.itemId, score: s.score, lastAccess: s.lastAccess.slice(0, 10), title: entry?.title || s.itemId }
+      })
+      const pending = scanRaw()
+      return {
+        content: [{
+          type: 'text',
+          text: JSON.stringify({
+            total: entries.length,
+            analyzed: scores.length,
+            deep: deep.length,
+            shallow: shallow.length,
+            dormant: none.length,
+            pending_raw: pending.length,
+            top5,
+            bottom5
+          }, null, 2)
+        }]
+      }
+    }
+  },
+  {
+    name: 'vault_batch_analyze',
+    description: 'Check batch analyze status and pending papers in the raw queue.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        count: { type: 'number', description: 'Preview N papers (default 5)' }
+      }
+    },
+    call: async ({ count = 5 }: { count?: number } = {}) => {
+      const pending = scanRaw()
+      const entries = scanKnowledge()
+      const analyzedIds = new Set(entries.map(e => normalizeId(e.id)))
+      const unanalyzed = pending.filter(p => {
+        const id = normalizeId(p)
+        return !analyzedIds.has(id)
+      })
+      if (unanalyzed.length === 0) {
+        return { content: [{ type: 'text', text: JSON.stringify({ message: 'Queue empty — all papers analyzed', analyzed: entries.length }) }] }
+      }
+      return {
+        content: [{
+          type: 'text',
+          text: JSON.stringify({
+            message: `${unanalyzed.length} papers pending analysis`,
+            pending: unanalyzed.length,
+            preview: unanalyzed.slice(0, count),
+            hint: 'cd ~/Desktop/research-vault && bun run scripts/batch-analyze.ts --count N'
+          }, null, 2)
+        }]
+      }
+    }
+  },
+  {
+    name: 'vault_taxonomy',
+    description: 'Get the Research Vault taxonomy — all categories and counts.',
+    inputSchema: { type: 'object', properties: {} },
+    call: async () => {
+      const taxonomy = loadTaxonomy()
+      const entries = scanKnowledge()
+      const catCounts: Record<string, number> = {}
+      for (const e of entries) catCounts[e.category] = (catCounts[e.category] || 0) + 1
+      return {
+        content: [{
+          type: 'text',
+          text: JSON.stringify({ taxonomy, categories: catCounts }, null, 2)
+        }]
+      }
+    }
+  }
+]
+export { vaultTools, scanKnowledge, scanRaw, loadDecayScores, normalizeId }