@syndash/research-vault-mcp 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,75 @@
1
+ # @syndash/research-vault-mcp
2
+
3
+ MCP (Model Context Protocol) server for [Nolan's research vault](https://github.com/Fearvox/dash-research-vault) — semantic search + memory persistence over 200+ markdown documents via local Gemma (Atomic Chat) or cloud LLM fallback.
4
+
5
+ **Part of**: DASH SHATTER / SynDASH ecosystem.
6
+ **Home**: [github.com/Fearvox/Evensong](https://github.com/Fearvox/Evensong) — `packages/research-vault-mcp/`
7
+ **Status**: Wave 3+ — not yet published to npm. Plan: `docs/superpowers/plans/2026-04-19-wave2d-submodule-mcp-package-prep.md`.
8
+
9
+ ## Install & Run (future, post-publish)
10
+
11
+ ```bash
12
+ # Via bun (recommended — native TS execution)
13
+ bunx @syndash/research-vault-mcp
14
+
15
+ # Via Node
16
+ npx @syndash/research-vault-mcp
17
+ ```
18
+
19
+ ## Configure Claude Code / Claude Desktop
20
+
21
+ Add to `~/.claude/settings.json` or Claude Desktop config:
22
+
23
+ ```json
24
+ {
25
+ "mcpServers": {
26
+ "research-vault": {
27
+ "command": "bunx",
28
+ "args": ["@syndash/research-vault-mcp"]
29
+ }
30
+ }
31
+ }
32
+ ```
33
+
34
+ For direct local dev from this monorepo:
35
+
36
+ ```json
37
+ {
38
+ "mcpServers": {
39
+ "research-vault-dev": {
40
+ "command": "bun",
41
+ "args": ["run", "packages/research-vault-mcp/src/server.ts"]
42
+ }
43
+ }
44
+ }
45
+ ```
46
+
47
+ ## Tools Exposed (MCP contract)
48
+
49
+ See `src/vault.ts` and `src/amplify.ts` for current tool definitions:
50
+
51
+ - `vault_search` — hybrid search over analyzed knowledge base
52
+ - `vault_status` — decay scores + retention health
53
+ - `vault_taxonomy` — category tree + item counts
54
+ - `vault_batch_analyze` — raw queue status + preview
55
+ - `amplify_*` — remote RAG query layer (currently requires Amplify API key — see `docs.evermind.ai`; Wave 3+ will add local Gemma fallback path via `@syndash/research-vault-mcp`'s built-in retrieval chain)
56
+
57
+ ## Architecture
58
+
59
+ Per parent spec [2026-04-19 vault foundation & preamble design](https://github.com/Fearvox/Evensong/blob/main/docs/superpowers/specs/2026-04-19-vault-foundation-and-preamble-design.md) §3.4, retrieval uses a **unified multi-signal ranker** (not 3 separate subsystems):
60
+
61
+ ```
62
+ score(d, q, t) = 0.35·BM25(q,d) + 0.35·cosine(embed(q), embed(d))
63
+ + 0.15·exp(-(t - lastAccess)/stability)
64
+ + 0.10·log1p(accessCount)/log1p(MAX_ACCESS)
65
+ + 0.05·summary_level_weight(d)
66
+ ```
67
+
68
+ **Primary LLM**: Atomic Chat local Gemma-4-E4B-Uncensored-Q4_K_M (`http://127.0.0.1:1337/v1`).
69
+ **Fallback chain**: xai-fast → minimax-m27 → openrouter/qwen3.6-plus → openrouter/llama-3.1-8b-free.
70
+
71
+ **Prior art**: EverMemOS (arxiv 2601.02163, EverMind/Shanda, 2026-01) — LLM-orchestrated hybrid retrieval. This package adopts their Stage-1 hybrid candidate generation but replaces Stage-2 verifier-loop with direct listwise LLM judge (simpler + more deterministic).
72
+
73
+ ## License
74
+
75
+ `UNLICENSED` for now (pending org-level license decision). See parent repo LICENSE.
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * CLI entry point for @syndash/research-vault-mcp.
4
+ * Invoked via `npx @syndash/research-vault-mcp` or `bunx @syndash/research-vault-mcp`.
5
+ * Delegates to src/server.ts (compiled or via bun direct).
6
+ *
7
+ * Part of DASH SHATTER (Fearvox/Evensong repo, SynDASH org).
8
+ * See packages/research-vault-mcp/README.md for MCP client config.
9
+ */
10
+
11
+ import { fileURLToPath } from 'url'
12
+ import { dirname, join } from 'path'
13
+ import { existsSync } from 'fs'
14
+
15
+ const __filename = fileURLToPath(import.meta.url)
16
+ const __dirname = dirname(__filename)
17
+ const pkgRoot = join(__dirname, '..')
18
+
19
+ // Prefer compiled JS if available (post-build); fall back to bun direct execution of TS source.
20
+ const compiledServer = join(pkgRoot, 'dist', 'server.js')
21
+ const sourceServer = join(pkgRoot, 'src', 'server.ts')
22
+
23
+ async function main() {
24
+ const args = process.argv.slice(2)
25
+ let transport = 'sse'
26
+
27
+ for (let i = 0; i < args.length; i++) {
28
+ if (args[i] === '--transport' && args[i + 1]) {
29
+ transport = args[i + 1]
30
+ } else if (args[i].startsWith('--transport=')) {
31
+ transport = args[i].split('=')[1]
32
+ }
33
+ }
34
+ process.env.MCP_TRANSPORT = transport
35
+
36
+ if (existsSync(compiledServer)) {
37
+ await import(compiledServer)
38
+ } else if (existsSync(sourceServer)) {
39
+ await import(sourceServer)
40
+ } else {
41
+ console.error('research-vault-mcp: neither dist/server.js nor src/server.ts found')
42
+ process.exit(1)
43
+ }
44
+ }
45
+
46
+ main().catch(err => {
47
+ console.error('research-vault-mcp fatal:', err)
48
+ process.exit(1)
49
+ })
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "@syndash/research-vault-mcp",
3
+ "version": "1.1.0",
4
+ "description": "MCP server for Nolan's research vault — semantic search + memory persistence over 200+ markdown docs via local Gemma (Atomic Chat) or cloud LLM fallback. Part of DASH SHATTER / SynDASH.",
5
+ "type": "module",
6
+ "bin": {
7
+ "research-vault-mcp": "./bin/research-vault-mcp.mjs"
8
+ },
9
+ "scripts": {
10
+ "dev": "bun run src/server.ts",
11
+ "build": "bun build src/server.ts --outdir=dist --target=bun",
12
+ "test": "bun test"
13
+ },
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "https://github.com/Fearvox/Evensong.git",
17
+ "directory": "packages/research-vault-mcp"
18
+ },
19
+ "homepage": "https://github.com/Fearvox/Evensong/tree/main/packages/research-vault-mcp",
20
+ "bugs": {
21
+ "url": "https://github.com/Fearvox/Evensong/issues"
22
+ },
23
+ "license": "UNLICENSED",
24
+ "publishConfig": {
25
+ "access": "public"
26
+ },
27
+ "files": [
28
+ "src/**/*.ts",
29
+ "bin/**/*.mjs",
30
+ "README.md",
31
+ "package.json"
32
+ ],
33
+ "keywords": [
34
+ "mcp",
35
+ "model-context-protocol",
36
+ "research-vault",
37
+ "claude-code",
38
+ "evermind",
39
+ "dash-shatter",
40
+ "syndash"
41
+ ],
42
+ "dependencies": {
43
+ "@anthropic-ai/sdk": "^0.80.0",
44
+ "markitdown": "latest"
45
+ }
46
+ }
package/src/amplify.ts ADDED
@@ -0,0 +1,245 @@
1
+ // Amplify API MCP Tools
2
+ // Vanderbilt AI Amplify platform — chat, models, file management
3
+
4
+ const AMPLIFY_BASE = 'https://prod-api.vanderbilt.ai'
5
+
6
+ export interface AmplifyConfig {
7
+ apiKey: string
8
+ }
9
+
10
+ let config: AmplifyConfig | null = null
11
+
12
+ export function configureAmplify(apiKey: string) {
13
+ config = { apiKey }
14
+ }
15
+
16
+ function getHeaders() {
17
+ if (!config?.apiKey) throw new Error('Amplify API key not configured. Call configureAmplify() first.')
18
+ return {
19
+ 'Authorization': `Bearer ${config.apiKey}`,
20
+ 'Content-Type': 'application/json'
21
+ }
22
+ }
23
+
24
+ export interface ModelInfo {
25
+ id: string
26
+ name: string
27
+ provider: string
28
+ inputContextWindow: number
29
+ outputTokenLimit: number
30
+ supportsImages: boolean
31
+ supportsSystemPrompts: boolean
32
+ systemPrompt?: string
33
+ }
34
+
35
+ export interface ChatMessage {
36
+ role: 'system' | 'user' | 'assistant'
37
+ content: string
38
+ }
39
+
40
+ export interface ChatOptions {
41
+ temperature?: number
42
+ maxTokens?: number
43
+ dataSources?: string[]
44
+ modelId?: string
45
+ ragOnly?: boolean
46
+ skipRag?: boolean
47
+ }
48
+
49
+ export const amplifyTools = [
50
+ {
51
+ name: 'amplify_list_models',
52
+ description: 'List available models on Vanderbilt Amplify. Returns model IDs, context windows, providers, and pricing tiers.',
53
+ inputSchema: { type: 'object', properties: {} },
54
+ call: async () => {
55
+ try {
56
+ const res = await fetch(`${AMPLIFY_BASE}/available_models`, {
57
+ headers: getHeaders()
58
+ })
59
+ if (!res.ok) throw new Error(`HTTP ${res.status}`)
60
+ const data = await res.json()
61
+ return {
62
+ content: [{
63
+ type: 'text',
64
+ text: JSON.stringify(data, null, 2)
65
+ }]
66
+ }
67
+ } catch (e: any) {
68
+ return { content: [{ type: 'text', text: `Error: ${e.message}` }], isError: true }
69
+ }
70
+ }
71
+ },
72
+
73
+ {
74
+ name: 'amplify_chat',
75
+ description: 'Send a streaming chat message to Amplify. Returns Claude/GPT/Mistral responses via SSE.',
76
+ inputSchema: {
77
+ type: 'object',
78
+ required: ['message'],
79
+ properties: {
80
+ message: { type: 'string', description: 'User message' },
81
+ modelId: { type: 'string', description: 'Model ID (from amplify_list_models)' },
82
+ systemPrompt: { type: 'string', description: 'Optional system prompt override' },
83
+ temperature: { type: 'number', description: 'Temperature (0-2, default 0.7)' },
84
+ maxTokens: { type: 'number', description: 'Max output tokens (default 4000)' },
85
+ stream: { type: 'boolean', description: 'If true, yield chunks via onProgress callback instead of waiting for complete response (default false)' }
86
+ }
87
+ },
88
+ call: async ({ message, modelId, systemPrompt, temperature = 0.7, maxTokens = 4000, stream = false }: {
89
+ message: string, modelId?: string, systemPrompt?: string, temperature?: number, maxTokens?: number, stream?: boolean
90
+ }, onProgress?: (data: { type: string; text?: string }) => void) => {
91
+ try {
92
+ const body: any = {
93
+ data: {
94
+ model: modelId || 'gpt-4o',
95
+ temperature,
96
+ max_tokens: maxTokens,
97
+ messages: [{ role: 'user', content: message }]
98
+ }
99
+ }
100
+ if (systemPrompt) {
101
+ body.data.messages.unshift({ role: 'system', content: systemPrompt })
102
+ }
103
+
104
+ const res = await fetch(`${AMPLIFY_BASE}/chat`, {
105
+ method: 'POST',
106
+ headers: getHeaders(),
107
+ body: JSON.stringify(body)
108
+ })
109
+
110
+ if (!res.ok) {
111
+ const err = await res.text()
112
+ throw new Error(`HTTP ${res.status}: ${err}`)
113
+ }
114
+
115
+ // Read SSE stream
116
+ const reader = res.body?.getReader()
117
+ if (!reader) throw new Error('No response body')
118
+
119
+ let fullText = ''
120
+ const decoder = new TextDecoder()
121
+
122
+ while (true) {
123
+ const { done, value } = await reader.read()
124
+ if (done) break
125
+ const chunk = decoder.decode(value, { stream: true })
126
+ // Parse SSE lines: data: {...}
127
+ for (const line of chunk.split('\n')) {
128
+ if (line.startsWith('data: ')) {
129
+ try {
130
+ const parsed = JSON.parse(line.slice(6))
131
+ if (parsed.data?.content) fullText += parsed.data.content
132
+ else if (parsed.data) fullText += typeof parsed.data === 'string' ? parsed.data : JSON.stringify(parsed.data)
133
+ } catch {}
134
+ }
135
+ }
136
+ }
137
+
138
+ // ── Stream mode: yield chunks via onProgress ─────────────────────────
139
+ if (stream && onProgress) {
140
+ const res2 = await fetch(`${AMPLIFY_BASE}/chat`, {
141
+ method: 'POST',
142
+ headers: getHeaders(),
143
+ body: JSON.stringify(body)
144
+ })
145
+ if (!res2.ok) throw new Error(`HTTP ${res2.status}`)
146
+ const reader2 = res2.body?.getReader()
147
+ if (!reader2) throw new Error('No response body')
148
+ const decoder2 = new TextDecoder()
149
+ let buffer2 = ''
150
+ while (true) {
151
+ const { done, value } = await reader2.read()
152
+ if (done) break
153
+ buffer2 += decoder2.decode(value, { stream: true })
154
+ for (const line of buffer2.split('\n')) {
155
+ if (line.startsWith('data: ')) {
156
+ try {
157
+ const parsed = JSON.parse(line.slice(6))
158
+ if (parsed.data?.content) {
159
+ onProgress({ type: 'chunk', text: parsed.data.content })
160
+ }
161
+ } catch {}
162
+ }
163
+ }
164
+ }
165
+ return { content: [{ type: 'text', text: '(streamed)' }] }
166
+ }
167
+
168
+ return {
169
+ content: [{ type: 'text', text: fullText || '(no response)' }]
170
+ }
171
+ } catch (e: any) {
172
+ return { content: [{ type: 'text', text: `Error: ${e.message}` }], isError: true }
173
+ }
174
+ }
175
+ },
176
+
177
+ {
178
+ name: 'amplify_files_query',
179
+ description: 'Query uploaded files on Amplify using semantic search. Returns relevant file chunks.',
180
+ inputSchema: {
181
+ type: 'object',
182
+ required: ['query'],
183
+ properties: {
184
+ query: { type: 'string', description: 'Search query' },
185
+ limit: { type: 'number', description: 'Max results (default 5)' }
186
+ }
187
+ },
188
+ call: async ({ query, limit = 5 }: { query: string, limit?: number }) => {
189
+ try {
190
+ const res = await fetch(`${AMPLIFY_BASE}/files/query`, {
191
+ method: 'POST',
192
+ headers: getHeaders(),
193
+ body: JSON.stringify({ query, limit })
194
+ })
195
+ if (!res.ok) throw new Error(`HTTP ${res.status}`)
196
+ const data = await res.json()
197
+ return {
198
+ content: [{ type: 'text', text: JSON.stringify(data, null, 2) }]
199
+ }
200
+ } catch (e: any) {
201
+ return { content: [{ type: 'text', text: `Error: ${e.message}` }], isError: true }
202
+ }
203
+ }
204
+ },
205
+
206
+ {
207
+ name: 'amplify_files_list',
208
+ description: 'List tags/categories of uploaded files on Amplify.',
209
+ inputSchema: { type: 'object', properties: {} },
210
+ call: async () => {
211
+ try {
212
+ const res = await fetch(`${AMPLIFY_BASE}/files/tags/list`, {
213
+ headers: getHeaders()
214
+ })
215
+ if (!res.ok) throw new Error(`HTTP ${res.status}`)
216
+ const data = await res.json()
217
+ return {
218
+ content: [{ type: 'text', text: JSON.stringify(data, null, 2) }]
219
+ }
220
+ } catch (e: any) {
221
+ return { content: [{ type: 'text', text: `Error: ${e.message}` }], isError: true }
222
+ }
223
+ }
224
+ },
225
+
226
+ {
227
+ name: 'amplify_assistants_list',
228
+ description: 'List your Amplify assistants.',
229
+ inputSchema: { type: 'object', properties: {} },
230
+ call: async () => {
231
+ try {
232
+ const res = await fetch(`${AMPLIFY_BASE}/assistant/list`, {
233
+ headers: getHeaders()
234
+ })
235
+ if (!res.ok) throw new Error(`HTTP ${res.status}`)
236
+ const data = await res.json()
237
+ return {
238
+ content: [{ type: 'text', text: JSON.stringify(data, null, 2) }]
239
+ }
240
+ } catch (e: any) {
241
+ return { content: [{ type: 'text', text: `Error: ${e.message}` }], isError: true }
242
+ }
243
+ }
244
+ }
245
+ ]
@@ -0,0 +1,64 @@
1
+ import type { ArxivMetadata } from '../types.js'
2
+
3
+ const ARXIV_API = 'https://export.arxiv.org/api/query'
4
+
5
+ /**
6
+ * Parse an ArXiv ID from various URL formats.
7
+ * Handles:
8
+ * https://arxiv.org/abs/2501.00001
9
+ * http://arxiv.org/abs/2501.00001v2
10
+ * abs/2501.00001
11
+ * 2501.00001v2
12
+ */
13
+ export function parseArxivId(value: string): string | null {
14
+ // Bare versioned ID: 2501.00001v2
15
+ if (/^\d{4}\.\d{4,}(v\d+)?$/.test(value.trim())) {
16
+ return value.trim()
17
+ }
18
+ // URL or abs/ shorthand
19
+ const m = value.match(/(?:arxiv\.org\/abs\/|abs\/?)(\d{4}\.\d{4,}(?:v\d+)?)/i)
20
+ return m ? m[1] : null
21
+ }
22
+
23
+ export async function fetchArxivMetadata(id: string): Promise<ArxivMetadata> {
24
+ const url = `${ARXIV_API}?id_list=${id}`
25
+ const res = await fetch(url)
26
+ if (!res.ok) throw new Error(`ArXiv API error: ${res.status}`)
27
+ const xml = await res.text()
28
+ return parseArxivXml(xml)
29
+ }
30
+
31
+ function parseArxivXml(xml: string): ArxivMetadata {
32
+ // Extract title
33
+ const titleMatch = xml.match(/<title[^>]*>([\s\S]*?)<\/title>/i)
34
+ const title = titleMatch
35
+ ? titleMatch[1].replace(/\s+/g, ' ').trim()
36
+ : null
37
+
38
+ // Extract abstract/summary
39
+ const summaryMatch = xml.match(/<summary[^>]*>([\s\S]*?)<\/summary>/i)
40
+ const abstract = summaryMatch
41
+ ? summaryMatch[1].replace(/\s+/g, ' ').trim()
42
+ : null
43
+
44
+ // Extract all authors
45
+ const authors: string[] = []
46
+ const authorRe = /<author>[\s\S]*?<name>([\s\S]*?)<\/name>[\s\S]*?<\/author>/gi
47
+ let m
48
+ while ((m = authorRe.exec(xml)) !== null) {
49
+ authors.push(m[1].replace(/\s+/g, ' ').trim())
50
+ }
51
+
52
+ // Extract categories
53
+ const categories: string[] = []
54
+ const catRe = /<category[^>]*term="([^"]+)"/gi
55
+ while ((m = catRe.exec(xml)) !== null) categories.push(m[1])
56
+
57
+ return {
58
+ title,
59
+ authors: authors.length ? authors : null,
60
+ abstract,
61
+ arxivId: null, // set by caller
62
+ categories: categories.length ? categories : null
63
+ }
64
+ }
@@ -0,0 +1,46 @@
1
+ // packages/research-vault-mcp/src/ingest/html.ts
2
+
3
+ /**
4
+ * Fetch a URL and convert HTML to plain markdown-like text.
5
+ * Strips scripts, styles, nav, footer, header, aside elements.
6
+ * Uses Bun's native fetch — no external dependencies.
7
+ */
8
+ export async function fetchHtml(url: string): Promise<string> {
9
+ const res = await fetch(url, {
10
+ headers: {
11
+ 'User-Agent': 'Mozilla/5.0 research-vault-mcp/1.1.0',
12
+ 'Accept': 'text/html'
13
+ }
14
+ })
15
+ if (!res.ok) throw new Error(`HTTP ${res.status} fetching ${url}`)
16
+ const html = await res.text()
17
+
18
+ let text = html
19
+ .replace(/<script[\s\S]*?<\/script>/gi, '')
20
+ .replace(/<style[\s\S]*?<\/style>/gi, '')
21
+ .replace(/<nav[\s\S]*?<\/nav>/gi, '')
22
+ .replace(/<footer[\s\S]*?<\/footer>/gi, '')
23
+ .replace(/<header[\s\S]*?<\/header>/gi, '')
24
+ .replace(/<aside[\s\S]*?<\/aside>/gi, '')
25
+ .replace(/<!--[\s\S]*?-->/g, '')
26
+
27
+ // Block elements → newlines
28
+ text = text.replace(/<\/(p|div|br|h[1-6]|li|tr)>/gi, '\n')
29
+
30
+ // Remove all remaining tags
31
+ text = text.replace(/<[^>]+>/g, '')
32
+
33
+ // Decode common HTML entities
34
+ text = text
35
+ .replace(/&nbsp;/g, ' ')
36
+ .replace(/&amp;/g, '&')
37
+ .replace(/&lt;/g, '<')
38
+ .replace(/&gt;/g, '>')
39
+ .replace(/&quot;/g, '"')
40
+ .replace(/&#39;/g, "'")
41
+
42
+ // Collapse whitespace
43
+ text = text.replace(/\n{3,}/g, '\n\n').trim()
44
+
45
+ return text
46
+ }
@@ -0,0 +1,30 @@
1
+ // packages/research-vault-mcp/src/ingest/pdf.ts
2
+
3
+ /**
4
+ * Convert PDF to markdown using markitdown (preferred) or pandoc.
5
+ * Uses Bun.spawn for process execution — no child_process module needed.
6
+ * Returns null if neither tool is available.
7
+ */
8
+ export async function convertPdfToMarkdown(pdfPath: string): Promise<string | null> {
9
+ // Try markitdown first
10
+ try {
11
+ const proc = Bun.spawn(['markitdown', pdfPath], { timeout: 60_000 })
12
+ const [exited] = await proc.exited
13
+ if (exited === 0) {
14
+ const output = await new Response(proc.stdout as Blob).text()
15
+ if (output.trim()) return output
16
+ }
17
+ } catch {}
18
+
19
+ // Fallback: pandoc
20
+ try {
21
+ const proc = Bun.spawn(['pandoc', '--to', 'markdown', pdfPath], { timeout: 60_000 })
22
+ const [exited] = await proc.exited
23
+ if (exited === 0) {
24
+ const output = await new Response(proc.stdout as Blob).text()
25
+ if (output.trim()) return output
26
+ }
27
+ } catch {}
28
+
29
+ return null
30
+ }