mevoric 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +2 -1
  2. package/server.mjs +154 -12
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mevoric",
3
- "version": "2.1.0",
3
+ "version": "2.2.0",
4
4
  "description": "Unified memory + agent bridge for Claude Code. Semantic recall, cross-tab messaging, session checkpoints — one MCP server.",
5
5
  "type": "module",
6
6
  "main": "server.mjs",
@@ -34,6 +34,7 @@
34
34
  "LICENSE"
35
35
  ],
36
36
  "dependencies": {
37
+ "@anthropic-ai/claude-agent-sdk": "^0.2.54",
37
38
  "@modelcontextprotocol/sdk": "^1.0.0",
38
39
  "node-notifier": "^10.0.1"
39
40
  }
package/server.mjs CHANGED
@@ -67,6 +67,10 @@ const MEMORY_SERVER_URL = process.env.MEVORIC_SERVER_URL
67
67
  // Session-level conversation ID for memory tools
68
68
  const sessionConversationId = randomUUID();
69
69
 
70
+ // Cache retrieved memories so judge_memories can evaluate them locally
71
+ // Map<conversationId, [{mem0_id, memory, score}]>
72
+ const retrievalCache = new Map();
73
+
70
74
  // Write conversation ID to temp file so external tools can reference it
71
75
  const CONVID_FILE = resolve(tmpdir(), 'mevoric-convid');
72
76
  try { writeFileSync(CONVID_FILE, sessionConversationId); } catch {}
@@ -683,13 +687,19 @@ async function handleRetrieveMemories(args) {
683
687
  .filter(m => (m.score || 0) >= SCORE_THRESHOLD)
684
688
  .slice(0, MAX_RESULTS)
685
689
  .map((m, i) => ({
690
+ mem0_id: m.mem0_id,
686
691
  memory: m.memory,
687
692
  score: Math.round((m.score || 0) * 1000) / 1000,
688
693
  rank: i + 1
689
694
  }));
690
695
 
696
+ // Cache for judge_memories (includes mem0_id for verdict posting)
697
+ if (filtered.length > 0) {
698
+ retrievalCache.set(sessionConversationId, filtered);
699
+ }
700
+
691
701
  return {
692
- memories: filtered,
702
+ memories: filtered.map(m => ({ memory: m.memory, score: m.score, rank: m.rank })),
693
703
  conversation_id: sessionConversationId,
694
704
  ...(filtered.length === 0 && raw.length > 0
695
705
  ? { note: `${raw.length} memories found but none above relevance threshold (${SCORE_THRESHOLD})` }
@@ -727,6 +737,134 @@ async function handleStoreConversation(args) {
727
737
  }
728
738
  }
729
739
 
740
+ const JUDGE_PROMPT = `You are evaluating whether a retrieved memory helped answer a user's question.
741
+
742
+ USER QUERY:
743
+ {query}
744
+
745
+ ASSISTANT RESPONSE:
746
+ {response}
747
+
748
+ RETRIEVED MEMORY:
749
+ {memory}
750
+
751
+ EVALUATION — walk through these steps:
752
+
753
+ Step 1: Find evidence. Quote any part of the response that uses information from this memory.
754
+ Did you find evidence? Answer YES or NO.
755
+
756
+ Step 2:
757
+ If YES (memory was used): Is the information in the memory correct based on the response?
758
+ - If correct → verdict: "strengthen"
759
+ - If incorrect → verdict: "correct", and provide the corrected text
760
+
761
+ If NO (memory was NOT used): Why wasn't it used?
762
+ - If the memory is irrelevant to the query → verdict: "drop"
763
+ - If the memory is related but wasn't needed → verdict: "weaken"
764
+
765
+ Return JSON only:
766
+ {
767
+ "evidence": "quote from response, or 'none'",
768
+ "reasoning": "your step-by-step reasoning",
769
+ "verdict": "strengthen" | "weaken" | "correct" | "drop",
770
+ "confidence": 0.0 to 1.0,
771
+ "corrected_content": "only if verdict is correct, otherwise null"
772
+ }`;
773
+
774
+ const CONFIDENCE_THRESHOLD = 0.85;
775
+
776
+ function getCleanEnv() {
777
+ const env = { ...process.env };
778
+ delete env.ANTHROPIC_API_KEY;
779
+ delete env.CLAUDECODE;
780
+ return env;
781
+ }
782
+
783
+ async function judgeOneMemory(queryText, responseText, memoryContent) {
784
+ const prompt = JUDGE_PROMPT
785
+ .replace('{query}', queryText)
786
+ .replace('{response}', responseText)
787
+ .replace('{memory}', memoryContent);
788
+
789
+ let claudeQuery;
790
+ try {
791
+ const sdk = await import('@anthropic-ai/claude-agent-sdk');
792
+ claudeQuery = sdk.query;
793
+ } catch {
794
+ throw new Error('Claude Agent SDK not available');
795
+ }
796
+
797
+ let fullText = '';
798
+ for await (const ev of claudeQuery({
799
+ prompt,
800
+ options: {
801
+ maxTurns: 1,
802
+ allowedTools: [],
803
+ model: 'haiku',
804
+ permissionMode: 'bypassPermissions',
805
+ allowDangerouslySkipPermissions: true,
806
+ persistSession: false,
807
+ env: getCleanEnv(),
808
+ }
809
+ })) {
810
+ if (ev?.type === 'assistant' && ev.message?.content) {
811
+ for (const block of ev.message.content) {
812
+ if (block.type === 'text' && block.text) fullText += block.text;
813
+ }
814
+ }
815
+ if (ev?.type === 'result' && ev.text) fullText = ev.text;
816
+ }
817
+
818
+ let cleaned = fullText.trim();
819
+ if (cleaned.startsWith('```')) cleaned = cleaned.split('\n', 2)[1] ? cleaned.slice(cleaned.indexOf('\n') + 1) : cleaned.slice(3);
820
+ if (cleaned.endsWith('```')) cleaned = cleaned.slice(0, -3);
821
+ cleaned = cleaned.trim();
822
+
823
+ return JSON.parse(cleaned);
824
+ }
825
+
826
+ async function runJudgeInBackground(memories, queryText, responseText, convId, userId) {
827
+ let judged = 0;
828
+ let failed = 0;
829
+
830
+ for (const mem of memories) {
831
+ try {
832
+ const judgment = await judgeOneMemory(queryText, responseText, mem.memory);
833
+ const verdict = judgment.verdict || 'weaken';
834
+ const confidence = parseFloat(judgment.confidence) || 0;
835
+ const note = judgment.reasoning || '';
836
+ const corrected = judgment.corrected_content || null;
837
+
838
+ // Confidence guard: strengthen always passes, everything else needs >= 85%
839
+ const actionTaken = (verdict === 'strengthen' || confidence >= CONFIDENCE_THRESHOLD)
840
+ ? 'logged' : `blocked_low_confidence (${Math.round(confidence * 100)}%)`;
841
+
842
+ // POST verdict to Newcode for storage
843
+ try {
844
+ await memoryFetch('/api/verdict', {
845
+ mem0_id: mem.mem0_id,
846
+ conversation_id: convId,
847
+ user_id: userId,
848
+ verdict,
849
+ judge_note: note,
850
+ corrected_content: corrected,
851
+ action_taken: actionTaken,
852
+ }, 10000);
853
+ } catch {
854
+ // Storage failed but judgment succeeded — log locally
855
+ console.error(`[Mevoric] Failed to store verdict for ${mem.mem0_id}`);
856
+ }
857
+
858
+ judged++;
859
+ } catch (err) {
860
+ failed++;
861
+ console.error(`[Mevoric] Judge failed for memory: ${err.message}`);
862
+ }
863
+ }
864
+
865
+ console.error(`[Mevoric] Judge complete: ${judged} judged, ${failed} failed out of ${memories.length}`);
866
+ }
867
+
730
868
  async function handleJudgeMemories(args) {
731
869
  const convId = args.conversation_id || sessionConversationId;
732
870
  const queryText = args.query_text;
@@ -736,18 +874,22 @@ async function handleJudgeMemories(args) {
736
874
  }
737
875
  const userId = args.user_id || 'lloyd';
738
876
 
739
- try {
740
- const data = await memoryFetch('/feedback', {
741
- conversation_id: convId,
742
- user_id: userId,
743
- query_text: queryText,
744
- response_text: responseText
745
- }, 30000);
746
-
747
- return { status: data.status || 'judging', conversation_id: convId };
748
- } catch (err) {
749
- return { error: err.message, conversation_id: convId };
877
+ // Get cached memories from this conversation's retrieve call
878
+ const memories = retrievalCache.get(convId);
879
+ if (!memories || memories.length === 0) {
880
+ return { status: 'skipped', reason: 'No memories retrieved in this conversation to judge', conversation_id: convId };
750
881
  }
882
+
883
+ // Run judging in background — don't block the tool response
884
+ runJudgeInBackground(memories, queryText, responseText, convId, userId)
885
+ .catch(err => console.error(`[Mevoric] Background judge error: ${err.message}`));
886
+
887
+ return {
888
+ status: 'judging',
889
+ count: memories.length,
890
+ conversation_id: convId,
891
+ note: 'Evaluating locally via Claude SDK. Verdicts will be posted to Newcode.'
892
+ };
751
893
  }
752
894
 
753
895
  // ============================================================