npm - @syndash/research-vault-mcp - Versions diffs - 1.1.2 → 1.1.3 - Mend

@syndash/research-vault-mcp 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/CHANGELOG.md +35 -0
package/README.md +34 -7
package/dist/server.js +1114 -323
package/package.json +6 -5
package/src/amplify.ts +32 -41
package/src/evidence_metadata.ts +191 -0
package/src/guidance.ts +57 -0
package/src/ingest/html.ts +129 -19
package/src/profile.ts +15 -0
package/src/public_safety.ts +110 -0
package/src/response.ts +73 -0
package/src/server.ts +304 -108
package/src/tool_policy.ts +58 -0
package/src/types.ts +4 -3
package/src/vault.ts +300 -75
package/src/vault_get.ts +109 -0
package/src/vault_write.ts +78 -112

package/package.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
   "name": "@syndash/research-vault-mcp",
-  "version": "1.1.2",
+  "version": "1.1.3",
   "description": "Evensong Research Vault MCP module — local-first semantic search, note persistence, and knowledge-base tools for MCP-compatible agents.",
   "type": "module",
   "bin": {
-    "research-vault-mcp": "./bin/research-vault-mcp.mjs"
+    "research-vault-mcp": "bin/research-vault-mcp.mjs"
   },
   "scripts": {
     "dev": "bun run src/server.ts",
@@ -14,12 +14,12 @@
   },
   "repository": {
     "type": "git",
-    "url": "https://github.com/Fearvox/Evensong.git",
+    "url": "git+https://github.com/Fearvox/dash-research-vault.git",
     "directory": "packages/research-vault-mcp"
   },
-  "homepage": "https://github.com/Fearvox/Evensong/tree/main/packages/research-vault-mcp",
+  "homepage": "https://github.com/Fearvox/dash-research-vault/tree/main/packages/research-vault-mcp",
   "bugs": {
-    "url": "https://github.com/Fearvox/Evensong/issues"
+    "url": "https://github.com/Fearvox/dash-research-vault/issues"
   },
   "license": "Apache-2.0",
   "publishConfig": {
@@ -30,6 +30,7 @@
     "dist/**/*.js",
     "bin/**/*.mjs",
     "README.md",
+    "CHANGELOG.md",
     "package.json"
   ],
   "keywords": [

package/src/amplify.ts CHANGED Viewed

@@ -112,57 +112,48 @@ export const amplifyTools = [
           throw new Error(`HTTP ${res.status}: ${err}`)
         }
-        // Read SSE stream
         const reader = res.body?.getReader()
         if (!reader) throw new Error('No response body')
-        let fullText = ''
         const decoder = new TextDecoder()
+        let buffer = ''
+        let fullText = ''
-        while (true) {
-          const { done, value } = await reader.read()
-          if (done) break
-          const chunk = decoder.decode(value, { stream: true })
-          // Parse SSE lines: data: {...}
-          for (const line of chunk.split('\n')) {
-            if (line.startsWith('data: ')) {
-              try {
-                const parsed = JSON.parse(line.slice(6))
-                if (parsed.data?.content) fullText += parsed.data.content
-                else if (parsed.data) fullText += typeof parsed.data === 'string' ? parsed.data : JSON.stringify(parsed.data)
-              } catch {}
+        const processEventBlock = (block: string) => {
+          for (const line of block.split('\n')) {
+            if (!line.startsWith('data: ')) continue
+            let parsed: any
+            try { parsed = JSON.parse(line.slice(6)) } catch { continue }
+            let textChunk = ''
+            if (parsed?.data?.content) {
+              textChunk = parsed.data.content
+            } else if (parsed?.data) {
+              textChunk = typeof parsed.data === 'string' ? parsed.data : JSON.stringify(parsed.data)
+            }
+            if (textChunk) {
+              fullText += textChunk
+              if (stream && onProgress) {
+                onProgress({ type: 'chunk', text: textChunk })
+              }
             }
           }
         }
-        // ── Stream mode: yield chunks via onProgress ─────────────────────────
-        if (stream && onProgress) {
-          const res2 = await fetch(`${AMPLIFY_BASE}/chat`, {
-            method: 'POST',
-            headers: getHeaders(),
-            body: JSON.stringify(body)
-          })
-          if (!res2.ok) throw new Error(`HTTP ${res2.status}`)
-          const reader2 = res2.body?.getReader()
-          if (!reader2) throw new Error('No response body')
-          const decoder2 = new TextDecoder()
-          let buffer2 = ''
-          while (true) {
-            const { done, value } = await reader2.read()
-            if (done) break
-            buffer2 += decoder2.decode(value, { stream: true })
-            for (const line of buffer2.split('\n')) {
-              if (line.startsWith('data: ')) {
-                try {
-                  const parsed = JSON.parse(line.slice(6))
-                  if (parsed.data?.content) {
-                    onProgress({ type: 'chunk', text: parsed.data.content })
-                  }
-                } catch {}
-              }
-            }
+        while (true) {
+          const { done, value } = await reader.read()
+          if (done) break
+          buffer += decoder.decode(value, { stream: true })
+          let sep: number
+          while ((sep = buffer.indexOf('\n\n')) !== -1) {
+            const eventBlock = buffer.slice(0, sep)
+            buffer = buffer.slice(sep + 2)
+            processEventBlock(eventBlock)
           }
-          return { content: [{ type: 'text', text: '(streamed)' }] }
+        }
+        buffer += decoder.decode()
+        if (buffer.length > 0) {
+          processEventBlock(buffer)
+          buffer = ''
         }
         return {

package/src/evidence_metadata.ts ADDED Viewed

@@ -0,0 +1,191 @@
+import type { DecayScore, VaultEntry } from './types.ts'
+export type FreshnessVerdict = 'PASS' | 'FLAG'
+export interface FreshnessShape {
+  verdict: FreshnessVerdict
+  reason: string
+}
+const STALE_AFTER_DAYS = 7
+const DAY_MS = 24 * 60 * 60 * 1000
+function clamp(value: number, min: number, max: number): number {
+  return Math.min(max, Math.max(min, value))
+}
+function lower(value: string | undefined | null): string {
+  return (value ?? '').toLowerCase()
+}
+function queryTerms(query?: string): string[] {
+  return lower(query)
+    .split(/\s+/)
+    .map(term => term.trim())
+    .filter(Boolean)
+}
+function includesQuery(value: string | undefined, query?: string): boolean {
+  const terms = queryTerms(query)
+  if (terms.length === 0) return false
+  const haystack = lower(value)
+  return terms.some(term => haystack.includes(term))
+}
+export function matchedFields(entry: VaultEntry & { content?: string }, query?: string): string[] {
+  if (!query?.trim()) return []
+  const candidates: Array<[string, string | undefined]> = [
+    ['title', entry.title],
+    ['content', entry.content],
+    ['id', entry.id],
+    ['category', entry.category],
+  ]
+  return candidates
+    .filter(([, value]) => includesQuery(value, query))
+    .map(([field]) => field)
+}
+export function whyMatched(entry: VaultEntry & { content?: string }, query: string | undefined, fields: string[]): string {
+  if (!query?.trim()) return 'No query provided; result is included by category or default listing.'
+  if (fields.length === 0) return 'Result is included after filters; no direct field match was detected.'
+  const labels = fields.map(field => {
+    if (field === 'title') return 'title'
+    if (field === 'content') return 'note content'
+    if (field === 'category') return 'category'
+    return field
+  })
+  return `Matched query "${query}" in ${labels.join(', ')}.`
+}
+export function snippetFromContent(content: string, query?: string, maxChars = 240): string {
+  const limit = Math.max(0, Math.floor(maxChars))
+  if (limit === 0) return ''
+  const normalized = content.replace(/\s+/g, ' ').trim()
+  if (normalized.length <= limit) return normalized
+  const terms = queryTerms(query)
+  const lowerContent = lower(normalized)
+  const hitIndex = terms
+    .map(term => lowerContent.indexOf(term))
+    .filter(index => index >= 0)
+    .sort((a, b) => a - b)[0]
+  if (hitIndex === undefined) return normalized.slice(0, limit).trimEnd()
+  const halfWindow = Math.floor(limit / 2)
+  const start = Math.max(0, Math.min(hitIndex - halfWindow, normalized.length - limit))
+  const end = Math.min(normalized.length, start + limit)
+  const prefix = start > 0 ? '...' : ''
+  const suffix = end < normalized.length ? '...' : ''
+  const available = Math.max(0, limit - prefix.length - suffix.length)
+  return `${prefix}${normalized.slice(start, start + available).trim()}${suffix}`
+}
+export function staleVerdict(lastAnalyzedAt?: string | null): FreshnessShape {
+  if (!lastAnalyzedAt) {
+    return { verdict: 'FLAG', reason: 'No analysis timestamp was provided.' }
+  }
+  const timestamp = Date.parse(lastAnalyzedAt)
+  if (Number.isNaN(timestamp)) {
+    return { verdict: 'FLAG', reason: 'Analysis timestamp could not be parsed.' }
+  }
+  const ageDays = Math.floor((Date.now() - timestamp) / DAY_MS)
+  if (ageDays > STALE_AFTER_DAYS) {
+    return { verdict: 'FLAG', reason: `Analysis is ${ageDays} days old.` }
+  }
+  return { verdict: 'PASS', reason: 'Analysis is fresh enough for the read surface.' }
+}
+export function itemFreshness(entry: VaultEntry & { score?: DecayScore & { lastAnalyzedAt?: string } }) {
+  const lastAnalyzedAt = entry.score?.lastAnalyzedAt ?? null
+  const verdict = staleVerdict(lastAnalyzedAt)
+  return {
+    last_analyzed_at: lastAnalyzedAt,
+    source_mtime: entry.modified || null,
+    freshness_verdict: verdict.verdict,
+    freshness_reason: verdict.reason,
+  }
+}
+export function queueFreshness(queueItems: Array<{ source_mtime?: string | null }>) {
+  const timestamps = queueItems
+    .map(item => item.source_mtime ? Date.parse(item.source_mtime) : NaN)
+    .filter(timestamp => !Number.isNaN(timestamp))
+  if (timestamps.length === 0) {
+    return {
+      oldest_pending_age: null as number | null,
+      oldest_pending_at: null as string | null,
+    }
+  }
+  const oldest = Math.min(...timestamps)
+  return {
+    oldest_pending_age: Math.max(0, Math.floor((Date.now() - oldest) / DAY_MS)),
+    oldest_pending_at: new Date(oldest).toISOString(),
+  }
+}
+export function coverageMetadata(statusData: {
+  total: number
+  analyzed: number
+  scores?: Array<DecayScore & { lastAnalyzedAt?: string }>
+  queueItems?: Array<{ source_mtime?: string | null }>
+}) {
+  const analyzedCoverage = statusData.total === 0
+    ? 0
+    : Number(clamp(statusData.analyzed / statusData.total, 0, 1).toFixed(4))
+  const analyzedAt = (statusData.scores ?? [])
+    .map(score => score.lastAnalyzedAt)
+    .filter((value): value is string => Boolean(value))
+    .sort()
+    .at(-1) ?? null
+  const recentThroughput = (statusData.scores ?? []).filter(score => {
+    if (!score.lastAnalyzedAt) return false
+    const timestamp = Date.parse(score.lastAnalyzedAt)
+    return !Number.isNaN(timestamp) && Date.now() - timestamp <= STALE_AFTER_DAYS * DAY_MS
+  }).length
+  return {
+    as_of: new Date().toISOString(),
+    last_analyzed_at: analyzedAt,
+    analyzed_coverage: analyzedCoverage,
+    oldest_pending_age: queueFreshness(statusData.queueItems ?? []).oldest_pending_age,
+    recent_throughput: recentThroughput,
+  }
+}
+export function releaseMetadata(
+  env: Pick<NodeJS.ProcessEnv, 'RESEARCH_VAULT_NPM_LATEST_VERSION' | 'RESEARCH_VAULT_NPM_MODIFIED_AT' | 'RESEARCH_VAULT_PUBLIC_REPO_URL'>,
+  packageJson: { name?: string; version?: string },
+) {
+  const npmLatestVersion = env.RESEARCH_VAULT_NPM_LATEST_VERSION ?? null
+  const npmModifiedAt = env.RESEARCH_VAULT_NPM_MODIFIED_AT ?? null
+  const publicRepo = env.RESEARCH_VAULT_PUBLIC_REPO_URL ?? null
+  const modifiedVerdict = staleVerdict(npmModifiedAt)
+  const provided = Boolean(npmLatestVersion && npmModifiedAt && publicRepo)
+  return {
+    package_name: packageJson.name ?? null,
+    local_version: packageJson.version ?? null,
+    npm_latest_version: npmLatestVersion,
+    npm_modified_at: npmModifiedAt,
+    days_since_npm_update: npmModifiedAt && !Number.isNaN(Date.parse(npmModifiedAt))
+      ? Math.max(0, Math.floor((Date.now() - Date.parse(npmModifiedAt)) / DAY_MS))
+      : null,
+    public_repo: publicRepo,
+    freshness_verdict: provided ? modifiedVerdict.verdict : 'FLAG' as FreshnessVerdict,
+    freshness_reason: provided
+      ? modifiedVerdict.reason
+      : 'Release freshness was not provided by the runtime environment.',
+  }
+}

package/src/guidance.ts ADDED Viewed

@@ -0,0 +1,57 @@
+import type { McpProfile } from './profile.ts'
+export type GuidanceVerdict = 'PASS' | 'FLAG' | 'BLOCK'
+export interface AgentGuidance {
+  verdict: GuidanceVerdict
+  reason: string
+  next_step: string
+  recommended_tool?: string
+  retryable?: boolean
+}
+export function passGuidance(reason: string, next_step: string, recommended_tool?: string): AgentGuidance {
+  return {
+    verdict: 'PASS',
+    reason,
+    next_step,
+    recommended_tool,
+    retryable: false,
+  }
+}
+export function flagGuidance(reason: string, next_step: string, recommended_tool?: string): AgentGuidance {
+  return {
+    verdict: 'FLAG',
+    reason,
+    next_step,
+    recommended_tool,
+    retryable: true,
+  }
+}
+export function blockGuidance(reason: string, next_step: string, recommended_tool?: string): AgentGuidance {
+  return {
+    verdict: 'BLOCK',
+    reason,
+    next_step,
+    recommended_tool,
+    retryable: false,
+  }
+}
+export function readonlyBlockedGuidance(toolName: string, profile: McpProfile): AgentGuidance {
+  return blockGuidance(
+    `${toolName} is unavailable while Research Vault MCP is running in ${profile} profile.`,
+    'Use vault_search for readonly evidence, or switch to MCP_PROFILE=full for operator-approved non-destructive mutation in a private operator session.',
+    'vault_search',
+  )
+}
+export function adminBlockedGuidance(toolName: string, profile: McpProfile): AgentGuidance {
+  return blockGuidance(
+    `${toolName} is admin-only and unavailable while Research Vault MCP is running in ${profile} profile.`,
+    'Use vault_search for readonly evidence, or start a private admin operator session with MCP_PROFILE=admin; readonly/full profiles are insufficient for destructive/admin tools.',
+    'vault_search',
+  )
+}

package/src/ingest/html.ts CHANGED Viewed

@@ -1,10 +1,27 @@
 // packages/research-vault-mcp/src/ingest/html.ts
+import type { LookupAddress } from 'dns'
+type DnsLookupFn = (hostname: string) => Promise<LookupAddress[]>
+async function defaultLookup(hostname: string): Promise<LookupAddress[]> {
+  const { lookup } = await import('dns/promises')
+  return lookup(hostname, { all: true })
+}
+let dnsLookup: DnsLookupFn = defaultLookup
+export function _setDnsLookup(fn: DnsLookupFn | null): void {
+  dnsLookup = fn ?? defaultLookup
+}
 /**
  * Validate URL to prevent SSRF attacks.
- * Blocks: private IP ranges, localhost, cloud metadata endpoints, invalid schemes.
+ * Blocks: private IPv4/IPv6 ranges, loopback, link-local, cloud metadata
+ * endpoints, invalid schemes, and forbidden hostname literals. DNS-backed
+ * hostname checks happen in validateHostDns(), which safeFetch calls per hop.
  */
-function validateUrl(url: string): void {
+export function validateUrl(url: string): void {
   let parsed: URL
   try {
     parsed = new URL(url)
@@ -17,38 +34,131 @@ function validateUrl(url: string): void {
     throw new Error(`URL scheme not allowed: ${scheme}. Only http/https permitted.`)
   }
-  const hostname = parsed.hostname.toLowerCase()
+  const hostname = parsed.hostname.replace(/^\[(.*)\]$/, '$1').toLowerCase()
+  validateHostnameLiteralPolicy(hostname)
+  if (hostname.includes(':')) {
+    validateIpv6(hostname, hostname)
+    return
+  }
+  const ipMatch = hostname.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/)
+  if (ipMatch) {
+    validateIpv4(hostname, hostname)
+    return
+  }
+}
-  // Block cloud metadata endpoints
-  if (hostname === '169.254.169.254' || hostname === 'metadata.google.internal') {
-    throw new Error(`Cloud metadata endpoint blocked: ${hostname}`)
+function validateHostnameLiteralPolicy(hostname: string): void {
+  if (hostname === 'localhost' || hostname === 'metadata.google.internal') {
+    throw new Error(`Hostname not permitted: ${hostname}`)
   }
+}
-  // Block localhost variants
-  if (hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '::1' || hostname === '[::1]') {
-    throw new Error(`Localhost not permitted: ${hostname}`)
+function validateIpv4(ip: string, originalHostname: string): void {
+  const parts = ip.split('.').map(p => parseInt(p, 10))
+  if (parts.length !== 4 || parts.some(n => Number.isNaN(n) || n < 0 || n > 255)) {
+    throw new Error(`Invalid IPv4 address: ${originalHostname}`)
   }
+  const [a, b] = parts
-  // Block private IP ranges
-  const ip = hostname
-  if (/^(10\.\d+|172\.(1[6-9]|2\d|3[01])\.\d+\.\d+|192\.168\.\d+)$/.test(ip)) {
-    throw new Error(`Private IP not permitted: ${ip}`)
+  if (a === 0) throw new Error(`Reserved IP blocked: ${originalHostname}`)
+  if (a === 10) throw new Error(`Private IP blocked: ${originalHostname}`)
+  if (a === 127) throw new Error(`Loopback IP blocked: ${originalHostname}`)
+  if (a === 169 && b === 254 && parts[2] === 169 && parts[3] === 254) {
+    throw new Error(`Cloud metadata endpoint blocked: ${originalHostname}`)
   }
+  if (a === 169 && b === 254) throw new Error(`Link-local IP blocked: ${originalHostname}`)
+  if (a === 172 && b >= 16 && b <= 31) throw new Error(`Private IP blocked: ${originalHostname}`)
+  if (a === 192 && b === 168) throw new Error(`Private IP blocked: ${originalHostname}`)
+}
-  // Block link-local
-  if (hostname.startsWith('169.254.')) {
-    throw new Error(`Link-local IP blocked: ${hostname}`)
+function validateIpv6(ip: string, originalHostname: string): void {
+  const stripped = ip.toLowerCase().split('%')[0]
+  if (stripped === '::1' || stripped === '::') {
+    throw new Error(`IPv6 loopback/unspecified blocked: ${originalHostname}`)
+  }
+  if (/^(fc|fd)[0-9a-f]{0,2}:/i.test(stripped)) {
+    throw new Error(`IPv6 unique-local blocked: ${originalHostname}`)
+  }
+  if (/^fe[89ab][0-9a-f]?:/i.test(stripped)) {
+    throw new Error(`IPv6 link-local blocked: ${originalHostname}`)
+  }
+  const mappedV4 = stripped.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/)
+  if (mappedV4) {
+    validateIpv4(mappedV4[1], originalHostname)
   }
 }
+/**
+ * Resolve hostname via DNS and validate every returned IP against private,
+ * loopback, link-local, and cloud-metadata ranges. This closes the static
+ * hostname-to-private-IP SSRF gap.
+ *
+ * Residual risk: this narrows but does not fully close the DNS rebinding TOCTOU
+ * window between this lookup and fetch()'s own internal lookup. Full mitigation
+ * requires IP pinning, which is HTTPS-incompatible here without TLS SNI control.
+ */
+export async function validateHostDns(hostname: string): Promise<void> {
+  const stripped = hostname.replace(/^\[(.*)\]$/, '$1').toLowerCase()
+  validateHostnameLiteralPolicy(stripped)
+  if (/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(stripped)) return
+  if (stripped.includes(':')) return
+  let resolved: LookupAddress[]
+  try {
+    resolved = await dnsLookup(stripped)
+  } catch (e) {
+    const msg = e instanceof Error ? e.message : String(e)
+    throw new Error(`DNS lookup failed for ${hostname}: ${msg}`)
+  }
+  if (!resolved || resolved.length === 0) {
+    throw new Error(`DNS lookup returned no records for ${hostname}`)
+  }
+  for (const { address, family } of resolved) {
+    if (family === 4) validateIpv4(address, hostname)
+    else if (family === 6) validateIpv6(address, hostname)
+  }
+}
+const MAX_REDIRECTS = 5
+/**
+ * fetch wrapper that follows redirects manually, re-validating each hop with
+ * validateUrl() and validateHostDns(). Redirects to private IP literals or
+ * hostnames resolving to private IPs are blocked before fetch follows them.
+ */
+async function safeFetch(url: string, init: RequestInit = {}): Promise<Response> {
+  let currentUrl = url
+  for (let hop = 0; hop <= MAX_REDIRECTS; hop++) {
+    validateUrl(currentUrl)
+    const parsed = new URL(currentUrl)
+    const hostname = parsed.hostname.replace(/^\[(.*)\]$/, '$1')
+    await validateHostDns(hostname)
+    const res = await fetch(currentUrl, { ...init, redirect: 'manual' })
+    if (res.status < 300 || res.status >= 400) {
+      return res
+    }
+    const location = res.headers.get('location')
+    if (!location) {
+      return res
+    }
+    currentUrl = new URL(location, currentUrl).toString()
+  }
+  throw new Error(`Too many redirects (>${MAX_REDIRECTS}) starting from ${url}`)
+}
 /**
  * Fetch a URL and convert HTML to plain markdown-like text.
  * Strips scripts, styles, nav, footer, header, aside elements.
  * Uses Bun's native fetch — no external dependencies.
  */
 export async function fetchHtml(url: string): Promise<string> {
-  validateUrl(url)
-  const res = await fetch(url, {
+  const res = await safeFetch(url, {
     headers: {
       'User-Agent': 'Mozilla/5.0 research-vault-mcp/1.1.0',
       'Accept': 'text/html'
@@ -85,4 +195,4 @@ export async function fetchHtml(url: string): Promise<string> {
   text = text.replace(/\n{3,}/g, '\n\n').trim()
   return text
-}
+}

package/src/profile.ts ADDED Viewed

@@ -0,0 +1,15 @@
+export type McpProfile = 'readonly' | 'full' | 'admin'
+export function getActiveProfile(env: Record<string, string | undefined> = process.env): McpProfile {
+  const raw = String(env.MCP_PROFILE || env.RESEARCH_VAULT_MCP_PROFILE || 'readonly').toLowerCase()
+  if (raw === 'full' || raw === 'admin' || raw === 'readonly') return raw
+  return 'readonly'
+}
+export function profileAllowsMutation(profile: McpProfile): boolean {
+  return profile === 'full' || profile === 'admin'
+}
+export function profileAllowsAdmin(profile: McpProfile): boolean {
+  return profile === 'admin'
+}