npm - mevoric - Versions diffs - 2.1.0 → 2.3.0 - Mend

mevoric 2.1.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +2 -1
package/server.mjs +223 -23

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mevoric",
-  "version": "2.1.0",
+  "version": "2.3.0",
   "description": "Unified memory + agent bridge for Claude Code. Semantic recall, cross-tab messaging, session checkpoints — one MCP server.",
   "type": "module",
   "main": "server.mjs",
@@ -34,6 +34,7 @@
     "LICENSE"
   ],
   "dependencies": {
+    "@anthropic-ai/claude-agent-sdk": "^0.2.54",
     "@modelcontextprotocol/sdk": "^1.0.0",
     "node-notifier": "^10.0.1"
   }

package/server.mjs CHANGED Viewed

@@ -24,7 +24,7 @@ import {
 } from '@modelcontextprotocol/sdk/types.js';
 import {
   existsSync, mkdirSync, writeFileSync, readFileSync,
-  readdirSync, unlinkSync, renameSync
+  readdirSync, unlinkSync, renameSync, appendFileSync
 } from 'fs';
 import { resolve, dirname } from 'path';
 import { randomBytes, randomUUID } from 'crypto';
@@ -67,6 +67,10 @@ const MEMORY_SERVER_URL = process.env.MEVORIC_SERVER_URL
 // Session-level conversation ID for memory tools
 const sessionConversationId = randomUUID();
+// Cache retrieved memories so judge_memories can evaluate them locally
+// Map<conversationId, [{mem0_id, memory, score}]>
+const retrievalCache = new Map();
 // Write conversation ID to temp file so external tools can reference it
 const CONVID_FILE = resolve(tmpdir(), 'mevoric-convid');
 try { writeFileSync(CONVID_FILE, sessionConversationId); } catch {}
@@ -683,13 +687,19 @@ async function handleRetrieveMemories(args) {
       .filter(m => (m.score || 0) >= SCORE_THRESHOLD)
       .slice(0, MAX_RESULTS)
       .map((m, i) => ({
+        mem0_id: m.mem0_id,
         memory: m.memory,
         score: Math.round((m.score || 0) * 1000) / 1000,
         rank: i + 1
       }));
+    // Cache for judge_memories (includes mem0_id for verdict posting)
+    if (filtered.length > 0) {
+      retrievalCache.set(sessionConversationId, filtered);
+    }
     return {
-      memories: filtered,
+      memories: filtered.map(m => ({ memory: m.memory, score: m.score, rank: m.rank })),
       conversation_id: sessionConversationId,
       ...(filtered.length === 0 && raw.length > 0
         ? { note: `${raw.length} memories found but none above relevance threshold (${SCORE_THRESHOLD})` }
@@ -727,6 +737,134 @@ async function handleStoreConversation(args) {
   }
 }
+const JUDGE_PROMPT = `You are evaluating whether a retrieved memory helped answer a user's question.
+USER QUERY:
+{query}
+ASSISTANT RESPONSE:
+{response}
+RETRIEVED MEMORY:
+{memory}
+EVALUATION — walk through these steps:
+Step 1: Find evidence. Quote any part of the response that uses information from this memory.
+        Did you find evidence? Answer YES or NO.
+Step 2:
+  If YES (memory was used): Is the information in the memory correct based on the response?
+    - If correct → verdict: "strengthen"
+    - If incorrect → verdict: "correct", and provide the corrected text
+  If NO (memory was NOT used): Why wasn't it used?
+    - If the memory is irrelevant to the query → verdict: "drop"
+    - If the memory is related but wasn't needed → verdict: "weaken"
+Return JSON only:
+{
+  "evidence": "quote from response, or 'none'",
+  "reasoning": "your step-by-step reasoning",
+  "verdict": "strengthen" | "weaken" | "correct" | "drop",
+  "confidence": 0.0 to 1.0,
+  "corrected_content": "only if verdict is correct, otherwise null"
+}`;
+const CONFIDENCE_THRESHOLD = 0.85;
+function getCleanEnv() {
+  const env = { ...process.env };
+  delete env.ANTHROPIC_API_KEY;
+  delete env.CLAUDECODE;
+  return env;
+}
+async function judgeOneMemory(queryText, responseText, memoryContent) {
+  const prompt = JUDGE_PROMPT
+    .replace('{query}', queryText)
+    .replace('{response}', responseText)
+    .replace('{memory}', memoryContent);
+  let claudeQuery;
+  try {
+    const sdk = await import('@anthropic-ai/claude-agent-sdk');
+    claudeQuery = sdk.query;
+  } catch {
+    throw new Error('Claude Agent SDK not available');
+  }
+  let fullText = '';
+  for await (const ev of claudeQuery({
+    prompt,
+    options: {
+      maxTurns: 1,
+      allowedTools: [],
+      model: 'haiku',
+      permissionMode: 'bypassPermissions',
+      allowDangerouslySkipPermissions: true,
+      persistSession: false,
+      env: getCleanEnv(),
+    }
+  })) {
+    if (ev?.type === 'assistant' && ev.message?.content) {
+      for (const block of ev.message.content) {
+        if (block.type === 'text' && block.text) fullText += block.text;
+      }
+    }
+    if (ev?.type === 'result' && ev.text) fullText = ev.text;
+  }
+  let cleaned = fullText.trim();
+  if (cleaned.startsWith('```')) cleaned = cleaned.split('\n', 2)[1] ? cleaned.slice(cleaned.indexOf('\n') + 1) : cleaned.slice(3);
+  if (cleaned.endsWith('```')) cleaned = cleaned.slice(0, -3);
+  cleaned = cleaned.trim();
+  return JSON.parse(cleaned);
+}
+async function runJudgeInBackground(memories, queryText, responseText, convId, userId) {
+  let judged = 0;
+  let failed = 0;
+  for (const mem of memories) {
+    try {
+      const judgment = await judgeOneMemory(queryText, responseText, mem.memory);
+      const verdict = judgment.verdict || 'weaken';
+      const confidence = parseFloat(judgment.confidence) || 0;
+      const note = judgment.reasoning || '';
+      const corrected = judgment.corrected_content || null;
+      // Confidence guard: strengthen always passes, everything else needs >= 85%
+      const actionTaken = (verdict === 'strengthen' || confidence >= CONFIDENCE_THRESHOLD)
+        ? 'logged' : `blocked_low_confidence (${Math.round(confidence * 100)}%)`;
+      // POST verdict to Newcode for storage
+      try {
+        await memoryFetch('/api/verdict', {
+          mem0_id: mem.mem0_id,
+          conversation_id: convId,
+          user_id: userId,
+          verdict,
+          judge_note: note,
+          corrected_content: corrected,
+          action_taken: actionTaken,
+        }, 10000);
+      } catch {
+        // Storage failed but judgment succeeded — log locally
+        console.error(`[Mevoric] Failed to store verdict for ${mem.mem0_id}`);
+      }
+      judged++;
+    } catch (err) {
+      failed++;
+      console.error(`[Mevoric] Judge failed for memory: ${err.message}`);
+    }
+  }
+  console.error(`[Mevoric] Judge complete: ${judged} judged, ${failed} failed out of ${memories.length}`);
+}
 async function handleJudgeMemories(args) {
   const convId = args.conversation_id || sessionConversationId;
   const queryText = args.query_text;
@@ -736,18 +874,22 @@ async function handleJudgeMemories(args) {
   }
   const userId = args.user_id || 'lloyd';
-  try {
-    const data = await memoryFetch('/feedback', {
-      conversation_id: convId,
-      user_id: userId,
-      query_text: queryText,
-      response_text: responseText
-    }, 30000);
-    return { status: data.status || 'judging', conversation_id: convId };
-  } catch (err) {
-    return { error: err.message, conversation_id: convId };
+  // Get cached memories from this conversation's retrieve call
+  const memories = retrievalCache.get(convId);
+  if (!memories || memories.length === 0) {
+    return { status: 'skipped', reason: 'No memories retrieved in this conversation to judge', conversation_id: convId };
   }
+  // Run judging in background — don't block the tool response
+  runJudgeInBackground(memories, queryText, responseText, convId, userId)
+    .catch(err => console.error(`[Mevoric] Background judge error: ${err.message}`));
+  return {
+    status: 'judging',
+    count: memories.length,
+    conversation_id: convId,
+    note: 'Evaluating locally via Claude SDK. Verdicts will be posted to Newcode.'
+  };
 }
 // ============================================================
@@ -1095,8 +1237,34 @@ async function runCapturePrompt() {
   const clean = stripSystemTags(prompt);
   if (clean.length < 5) process.exit(0);
+  // Append to JSONL file (one line per prompt, accumulates across the session)
   const tmp = tmpdir();
-  writeFileSync(resolve(tmp, `mevoric-prompt-${sessionId}`), clean, 'utf8');
+  const entry = JSON.stringify({ ts: Date.now(), prompt: clean });
+  appendFileSync(resolve(tmp, `mevoric-prompt-${sessionId}`), entry + '\n', 'utf8');
+  // Fire-and-forget POST to /ingest so this prompt is saved even if session crashes
+  try {
+    let convId = '';
+    try { convId = readFileSync(resolve(tmp, 'mevoric-convid'), 'utf8').trim(); } catch {}
+    if (!convId) convId = sessionId;
+    const project = process.cwd().split(/[\\/]/).pop();
+    const controller = new AbortController();
+    const timer = setTimeout(() => controller.abort(), 3000);
+    await fetch(`${MEMORY_SERVER_URL}/ingest`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        messages: [{ role: 'user', content: clean.slice(0, 10000) }],
+        user_id: 'lloyd',
+        conversation_id: convId,
+        project
+      }),
+      signal: controller.signal
+    });
+    clearTimeout(timer);
+  } catch {} // Best-effort — prompt is still in JSONL file for Stop hook fallback
   process.exit(0);
 }
@@ -1119,13 +1287,26 @@ async function runIngest() {
   const assistantMsg = data.last_assistant_message || '';
   if (!sessionId || !assistantMsg) process.exit(0);
-  // Read user prompt saved by --capture-prompt
+  // Read ALL user prompts saved by --capture-prompt (JSONL format, one per line)
   const tmp = tmpdir();
   const promptPath = resolve(tmp, `mevoric-prompt-${sessionId}`);
-  let userMsg = '';
+  let allPrompts = [];
   try {
-    userMsg = readFileSync(promptPath, 'utf8');
+    const raw = readFileSync(promptPath, 'utf8');
+    allPrompts = raw.split('\n').filter(Boolean).map(line => {
+      try { return JSON.parse(line); } catch { return null; }
+    }).filter(Boolean);
   } catch {}
+  // Fallback for old plain-text format (pre-JSONL)
+  if (allPrompts.length === 0) {
+    try {
+      const plain = readFileSync(promptPath, 'utf8');
+      if (plain && plain.length >= 5) allPrompts = [{ ts: Date.now(), prompt: plain }];
+    } catch {}
+  }
+  const userMsg = allPrompts.length > 0 ? allPrompts[allPrompts.length - 1].prompt : '';
+  // Clean up temp file
+  try { unlinkSync(promptPath); } catch {}
   const cleanAssistant = stripSystemTags(assistantMsg);
   if (!cleanAssistant || cleanAssistant.length < 50) process.exit(0);
@@ -1144,6 +1325,17 @@ async function runIngest() {
     else if (prev.content) existing = { exchanges: [{ role: 'context', content: prev.content }] };
   } catch {}
+  // Store all user prompts from this session, not just the last one
+  if (allPrompts.length > 1) {
+    for (let i = 0; i < allPrompts.length - 1; i++) {
+      existing.exchanges.push({
+        timestamp: new Date(allPrompts[i].ts).toISOString(),
+        user: allPrompts[i].prompt.slice(0, 2000),
+        assistant: ''
+      });
+    }
+  }
+  // Final exchange has the actual assistant response
   existing.exchanges.push({
     timestamp: new Date().toISOString(),
     user: userMsg.slice(0, 2000),
@@ -1194,8 +1386,8 @@ async function runIngest() {
     renameSync(cpTmp, cpPath);
   } catch {}
-  // --- 3. POST to memory server /ingest (ported from Python auto-ingest.py) ---
-  if (userMsg && cleanAssistant) {
+  // --- 3. POST to memory server /ingest — full conversation (all prompts + final response) ---
+  if ((allPrompts.length > 0 || userMsg) && cleanAssistant) {
     // Read conversation ID from temp file (written by MCP server process)
     let convId = '';
     try {
@@ -1204,6 +1396,17 @@ async function runIngest() {
     if (!convId) convId = sessionId; // fallback
     try {
+      // Build messages array: all user prompts + final assistant response
+      const messages = [];
+      if (allPrompts.length > 0) {
+        for (const entry of allPrompts) {
+          messages.push({ role: 'user', content: entry.prompt.slice(0, 10000) });
+        }
+      } else if (userMsg) {
+        messages.push({ role: 'user', content: userMsg.slice(0, 10000) });
+      }
+      messages.push({ role: 'assistant', content: cleanAssistant.slice(0, 10000) });
       const project = process.cwd().split(/[\\/]/).pop();
       const controller = new AbortController();
       const timer = setTimeout(() => controller.abort(), 15000);
@@ -1211,10 +1414,7 @@ async function runIngest() {
         method: 'POST',
         headers: { 'Content-Type': 'application/json' },
         body: JSON.stringify({
-          messages: [
-            { role: 'user', content: userMsg.slice(0, 10000) },
-            { role: 'assistant', content: cleanAssistant.slice(0, 10000) }
-          ],
+          messages,
           user_id: 'lloyd',
           conversation_id: convId,
           project