npm - engram-sdk - Versions diffs - 0.1.10 → 0.3.0 - Mend

engram-sdk 0.1.10 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/vault.js CHANGED Viewed

@@ -2,7 +2,7 @@ import path from 'path';
 import { MemoryStore } from './store.js';
 import { RememberInputSchema, RecallInputSchema } from './types.js';
 import { extract } from './extract.js';
-import { calculateRecencyBoost, DEFAULT_TEMPORAL_CONFIG, findContradictionCandidates, verifyContradiction } from './temporal.js';
+import { calculateRecencyBoost, DEFAULT_TEMPORAL_CONFIG, findContradictionCandidates, verifyContradiction, temporalEdgeWeight } from './temporal.js';
 // ============================================================
 // Vault — The public API for Engram
 // ============================================================
@@ -66,6 +66,11 @@ export class Vault {
                 // Contradiction detection: if this memory updates a previous fact,
                 // mark the old one as superseded. Only runs when LLM is configured.
                 return this.detectContradictions(memory);
+            })
+                .then(() => {
+                // Post-remember inference: extract implicit insights that a human
+                // would obviously infer (e.g., "builds hunting platform" → "likes hunting").
+                return this.inferInsights(memory);
             })
                 .catch(err => {
                 console.warn(`Failed to process embedding/contradictions for ${memory.id}:`, err);
@@ -197,6 +202,103 @@ export class Vault {
             // Reinforcement is best-effort
         }
     }
+    /**
+     * Post-remember inference: after storing a memory, use LLM to extract
+     * 0-2 implicit insights that a human would obviously infer.
+     *
+     * Example: "Ian is building a hunting land acquisition platform"
+     *   → infers "Ian is interested in hunting"
+     *
+     * These are stored as low-confidence (0.3) semantic memories that
+     * accumulate via reinforcement over time. If someone is building
+     * a hunting platform AND talks about hunting trips AND buys hunting
+     * gear, the confidence on "Ian likes hunting" climbs naturally.
+     *
+     * Only runs when LLM is configured. Async, fire-and-forget.
+     * Skips memories that are already implicit or from consolidation.
+     */
+    async inferInsights(memory) {
+        if (!this.config.llm)
+            return;
+        // Don't infer from system/consolidation memories or already-implicit ones
+        if (memory.source?.type === 'consolidation')
+            return;
+        if (memory.topics?.includes('implicit'))
+            return;
+        if (memory.topics?.includes('meta'))
+            return;
+        // Skip low-salience memories (not worth the LLM call)
+        if (memory.salience < 0.4)
+            return;
+        // Skip very short memories (not enough signal)
+        if (memory.content.length < 40)
+            return;
+        const llmConfig = this.config.llm;
+        const model = llmConfig.provider === 'gemini' ? 'gemini-2.5-flash'
+            : llmConfig.provider === 'openai' ? 'gpt-4o-mini'
+                : 'claude-3-5-haiku-20241022';
+        const prompt = `Given this memory about a person, extract 0-2 basic personal insights that any human would obviously infer. Focus on interests, personality traits, preferences, and relationships.
+Memory: "${memory.content}"
+Entities: ${memory.entities?.join(', ') || 'none'}
+Rules:
+- Only include inferences that are clearly supported by the memory
+- Keep each insight to one short sentence
+- Do NOT restate the original memory — only new inferences
+- If nothing interesting can be inferred, return empty array
+- These should be things like "X is interested in Y", "X values Z", "X and Y are close"
+JSON: {"insights": [{"content": "...", "entities": ["..."], "topics": ["..."]}]}
+If nothing: {"insights": []}`;
+        try {
+            const response = await this.callLLM(model, prompt, llmConfig);
+            const parsed = JSON.parse(response);
+            for (const insight of parsed.insights ?? []) {
+                if (!insight.content || insight.content.length < 10)
+                    continue;
+                // Check if this insight already exists (don't duplicate)
+                if (this.embedder && this.store.hasVectorSearch()) {
+                    try {
+                        const embedding = await this.embedder.embed(insight.content);
+                        const similar = this.store.findSimilar(embedding, 0.15, 3);
+                        if (similar.length > 0) {
+                            // Similar insight exists — reinforce it instead of creating new
+                            const existing = this.store.getMemoryDirect(similar[0].memoryId);
+                            if (existing && existing.status === 'active') {
+                                const newConf = Math.min(1.0, existing.confidence + 0.05);
+                                this.store.updateMemory(existing.id, { confidence: newConf });
+                                this.store.createEdge(memory.id, existing.id, 'supports', 0.6);
+                                continue;
+                            }
+                        }
+                    }
+                    catch {
+                        // Embedding check failed — create anyway
+                    }
+                }
+                // Store as implicit memory with low confidence
+                const inferred = this.remember({
+                    content: insight.content,
+                    type: 'semantic',
+                    entities: insight.entities ?? memory.entities ?? [],
+                    topics: [...(insight.topics ?? []), 'implicit', 'inferred'],
+                    salience: 0.4,
+                    confidence: 0.3,
+                    source: {
+                        type: 'inference',
+                        evidence: [memory.id],
+                    },
+                });
+                // Link the insight to the source memory
+                this.store.createEdge(memory.id, inferred.id, 'derived_from', 0.7);
+            }
+        }
+        catch (err) {
+            // Inference is best-effort — never break the remember flow
+            console.error('Insight inference failed:', err);
+        }
+    }
     /**
      * Detect contradictions: when a new memory is stored, check if it
      * updates or replaces an existing fact about the same entity.
@@ -226,19 +328,35 @@ export class Vault {
         if (!memory.entities || memory.entities.length === 0)
             return;
         try {
-            // Phase 1: Find candidates via vector similarity + entity overlap
+            // Phase 1: Find candidates via BOTH vector similarity AND entity overlap
+            // Vector similarity alone misses cases like "X is 79%" vs "X is 72%" where
+            // the surrounding text differs but the factual claim conflicts.
+            const candidateSet = new Map();
+            // 1a. Vector similarity search
             const embedding = this.store.getEmbedding(memory.id);
-            if (!embedding)
-                return;
-            // Wider search than dedup — we want topically similar, not identical
-            const similar = this.store.findSimilar(embedding, 0.5, 20);
-            const candidateIds = similar
-                .filter(s => s.memoryId !== memory.id)
-                .map(s => s.memoryId);
-            if (candidateIds.length === 0)
+            if (embedding) {
+                const similar = this.store.findSimilar(embedding, 0.5, 20);
+                const vectorIds = similar
+                    .filter(s => s.memoryId !== memory.id)
+                    .map(s => s.memoryId);
+                for (const mem of this.store.getMemoriesDirect(vectorIds)) {
+                    if (mem.status === 'active')
+                        candidateSet.set(mem.id, mem);
+                }
+            }
+            // 1b. Entity-based search — find ALL memories sharing entities with this one
+            // This catches contradictions that vector search misses
+            for (const entity of memory.entities) {
+                const entityMemories = this.store.getByEntity(entity, 30);
+                for (const mem of entityMemories) {
+                    if (mem.id !== memory.id && mem.status === 'active') {
+                        candidateSet.set(mem.id, mem);
+                    }
+                }
+            }
+            if (candidateSet.size === 0)
                 return;
-            const candidateMemories = this.store.getMemoriesDirect(candidateIds)
-                .filter(m => m.status === 'active');
+            const candidateMemories = [...candidateSet.values()];
             // Phase 1b: Heuristic filter — must share entities
             const threshold = this.config.temporal?.contradictionSimilarityThreshold ?? 0.75;
             const minOverlap = this.config.temporal?.minEntityOverlap ?? 1;
@@ -251,7 +369,7 @@ export class Vault {
             if (filtered.length === 0)
                 return;
             // Phase 2: LLM verification — check top 3 candidates max
-            const llmCall = (prompt) => this.callLLM(this.config.llm.model ?? 'gemini-2.0-flash', prompt, this.config.llm);
+            const llmCall = (prompt) => this.callLLM(this.config.llm.model ?? 'gemini-2.5-flash', prompt, this.config.llm);
             // Only check older memories — newer ones can't be superseded by this memory
             const olderCandidates = filtered
                 .filter(c => new Date(c.createdAt) < new Date(memory.createdAt))
@@ -491,6 +609,34 @@ export class Vault {
             const oneWeekAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString();
             results = results.filter(r => r.memory.createdAt >= oneWeekAgo);
         }
+        else if (parsed.temporalFocus === 'latest') {
+            // Deduplicate by entity+topic overlap: when multiple memories share
+            // the same primary entity AND topic, keep only the newest one.
+            // This prevents stale facts from polluting results.
+            const seen = new Map();
+            const deduped = [];
+            for (const r of results) {
+                if (r.memory.entities.length === 0) {
+                    // No entities — can't dedup, keep it
+                    deduped.push(r);
+                    continue;
+                }
+                // Build a dedup key from primary entity + topics
+                const primaryEntity = r.memory.entities[0].toLowerCase();
+                const topicKey = (r.memory.topics ?? []).sort().join(',').toLowerCase();
+                const dedupKey = `${primaryEntity}:${topicKey}`;
+                const createdAt = new Date(r.memory.createdAt).getTime();
+                const existing = seen.get(dedupKey);
+                if (!existing || createdAt > existing.createdAt) {
+                    seen.set(dedupKey, { memory: r.memory, score: r.score, createdAt });
+                }
+            }
+            // Collect deduped results: non-entity memories + latest per entity+topic
+            for (const entry of seen.values()) {
+                deduped.push({ memory: entry.memory, score: entry.score });
+            }
+            results = deduped;
+        }
         // 8. Score with salience, stability, and type weighting
         // Semantic memories with high stability are the "core knowledge" —
         // they should outrank noisy episodic results when the vector
@@ -557,9 +703,15 @@ export class Vault {
                 const parentId = frontier.has(edge.sourceId) ? edge.sourceId : edge.targetId;
                 const neighborId = edge.sourceId === parentId ? edge.targetId : edge.sourceId;
                 const parentActivation = frontier.get(parentId) ?? 0;
-                // Activation = parent × edge_strength × decay × edge_type_weight
+                // Activation = parent × temporalEdgeWeight(strength, recency) × decay × edge_type_weight
                 const typeWeight = this.edgeTypeWeight(edge.type);
-                const activation = parentActivation * edge.strength * opts.decay * typeWeight;
+                // Apply temporal weighting: edges to newer memories carry more activation energy
+                // Use getMemoryDirect to avoid bumping access count during traversal
+                const neighborMem = this.store.getMemoryDirect(neighborId);
+                const effectiveStrength = neighborMem
+                    ? temporalEdgeWeight(edge.strength, neighborMem)
+                    : edge.strength;
+                const activation = parentActivation * effectiveStrength * opts.decay * typeWeight;
                 if (activation < opts.minActivation)
                     continue;
                 // Accumulate activation (multiple paths can reinforce)
@@ -684,13 +836,6 @@ export class Vault {
             entitiesDiscovered = result.entitiesDiscovered;
             connectionsFormed = result.connectionsFormed;
         }
-        // Apply decay
-        const decayed = this.store.applyDecay(this.config.decay?.halfLifeHours ?? 168);
-        // Archive deeply decayed memories
-        const archived = this.store.getDecayedMemories(this.config.decay?.archiveThreshold ?? 0.05);
-        for (const mem of archived) {
-            this.store.deleteMemory(mem.id); // TODO: move to cold storage instead of deleting
-        }
         const report = {
             startedAt,
             completedAt: new Date().toISOString(),
@@ -700,12 +845,12 @@ export class Vault {
             entitiesDiscovered,
             connectionsFormed,
             contradictionsFound,
-            memoriesDecayed: decayed,
-            memoriesArchived: archived.length,
+            memoriesDecayed: 0,
+            memoriesArchived: 0,
         };
         // Store the consolidation report as a memory itself
         this.remember({
-            content: `Consolidation completed: processed ${episodes.length} episodes, created ${semanticCreated} semantic memories, discovered ${entitiesDiscovered} entities, formed ${connectionsFormed} connections, decayed ${decayed} memories.`,
+            content: `Consolidation completed: processed ${episodes.length} episodes, created ${semanticCreated} semantic memories, discovered ${entitiesDiscovered} entities, formed ${connectionsFormed} connections.`,
             type: 'procedural',
             topics: ['meta', 'consolidation'],
             salience: 0.3,
@@ -867,6 +1012,315 @@ export class Vault {
         return results.slice(0, limit);
     }
     // --------------------------------------------------------
+    // ask() — Answer a question using memories as evidence.
+    //
+    // This is the feature that makes Engram useful for agents:
+    // instead of returning 30 raw memories and making the agent
+    // do the synthesis, ask() runs recall internally, then uses
+    // the LLM to produce a coherent answer with confidence signal.
+    //
+    // The agent gets: one answer, the evidence behind it, and a
+    // confidence level. No memory parsing, no synthesis burden.
+    // --------------------------------------------------------
+    async ask(question, opts) {
+        if (!this.config.llm) {
+            throw new Error('ask() requires LLM configuration (set llm in vault config)');
+        }
+        const limit = opts?.limit ?? 20;
+        const spread = opts?.spread ?? true;
+        // Step 1: Recall relevant memories
+        const memories = await this.recall({
+            context: question,
+            limit,
+            spread,
+            temporalFocus: 'latest', // Deduplicate by entity+topic, keep newest
+        });
+        if (memories.length === 0) {
+            return {
+                answer: 'I have no memories related to this question.',
+                confidence: 'low',
+                memories: [],
+                tokenEstimate: 0,
+                evidenceQuality: {
+                    memoryCount: 0,
+                    avgConfidence: 0,
+                    totalAccesses: 0,
+                    newestMemoryAgeDays: -1,
+                    sourceBreakdown: { directInput: 0, autoIngested: 0 },
+                },
+            };
+        }
+        // Step 2: Build evidence block with metadata
+        const evidenceLines = memories.map((m, i) => {
+            const age = Math.floor((Date.now() - new Date(m.createdAt).getTime()) / (1000 * 60 * 60 * 24));
+            const accessCount = m.accessCount ?? 0;
+            const confidenceLabel = m.confidence >= 0.8 ? 'high' : m.confidence >= 0.5 ? 'medium' : 'low';
+            const status = m.status !== 'active' ? ` [${m.status}]` : '';
+            return `[${i + 1}] (${m.type}, confidence: ${confidenceLabel}, ${age}d ago, accessed ${accessCount}x${status}) ${m.content}`;
+        });
+        const prompt = `You are answering a question using memories from a knowledge vault.
+RULES:
+- Answer the question directly and concisely based ONLY on the provided memories.
+- When multiple memories contain different values for the same fact, ALWAYS prefer the most recent one (lower "d ago" number).
+- If memories conflict, state the most recent fact and note it was updated.
+- If the memories don't contain enough information, say so honestly.
+- Do NOT make up information not supported by the memories.
+MEMORIES:
+${evidenceLines.join('\n')}
+QUESTION: ${question}
+Respond in JSON:
+{
+  "answer": "Your concise, synthesized answer",
+  "confidence": "high|medium|low",
+  "reasoning": "Brief note on evidence quality"
+}
+Confidence guide:
+- "high": Multiple memories support the answer, recent, frequently accessed
+- "medium": Answer is supported but by few memories or older data
+- "low": Sparse evidence, conflicting data, or mostly inference`;
+        // Step 3: Call LLM for synthesis
+        const llmConfig = this.config.llm;
+        const model = llmConfig.model ?? 'gemini-2.5-flash';
+        const response = await this.callLLM(model, prompt, llmConfig);
+        // Step 4: Parse response
+        let answer = 'Unable to synthesize an answer.';
+        let confidence = 'low';
+        try {
+            const parsed = JSON.parse(response);
+            answer = parsed.answer ?? answer;
+            confidence = ['high', 'medium', 'low'].includes(parsed.confidence) ? parsed.confidence : 'low';
+        }
+        catch {
+            // If JSON parsing fails, use raw text as answer
+            answer = response.trim();
+        }
+        // Estimate tokens used (rough: 4 chars per token)
+        const tokenEstimate = Math.ceil((prompt.length + answer.length) / 4);
+        // Step 5: Build confidence metadata from evidence
+        const avgConfidence = memories.reduce((sum, m) => sum + m.confidence, 0) / memories.length;
+        const totalAccesses = memories.reduce((sum, m) => sum + m.accessCount, 0);
+        const newestAge = Math.min(...memories.map(m => Math.floor((Date.now() - new Date(m.createdAt).getTime()) / (1000 * 60 * 60 * 24))));
+        const autoIngestedCount = memories.filter(m => (m.topics ?? []).includes('auto-ingested')).length;
+        return {
+            answer,
+            confidence,
+            memories,
+            tokenEstimate,
+            evidenceQuality: {
+                memoryCount: memories.length,
+                avgConfidence: Math.round(avgConfidence * 100) / 100,
+                totalAccesses,
+                newestMemoryAgeDays: newestAge,
+                sourceBreakdown: {
+                    directInput: memories.length - autoIngestedCount,
+                    autoIngested: autoIngestedCount,
+                },
+            },
+        };
+    }
+    // --------------------------------------------------------
+    // alerts() — What should the agent know RIGHT NOW?
+    //
+    // Unlike surface() (which needs context input) or briefing()
+    // (which is a full session dump), alerts() returns only the
+    // things that need attention. No context required.
+    //
+    // Three categories:
+    //   1. Pending commitments — things promised but not fulfilled
+    //   2. Stale follow-ups — things that haven't been touched in a while
+    //   3. Contradictions — conflicting facts that need resolution
+    //
+    // Returns empty array when nothing needs attention.
+    // Designed to be called on heartbeat or session start.
+    // --------------------------------------------------------
+    alerts(opts) {
+        const staleDays = opts?.staleDays ?? 3;
+        const limit = opts?.limit ?? 10;
+        const includeContradictions = opts?.includeContradictions ?? true;
+        const now = Date.now();
+        const alerts = [];
+        // 1. Pending commitments
+        const pending = this.store.getByStatus('pending', 50);
+        for (const mem of pending) {
+            const ageDays = Math.floor((now - new Date(mem.createdAt).getTime()) / (1000 * 60 * 60 * 24));
+            const priority = ageDays >= 7 ? 'high' :
+                ageDays >= 3 ? 'medium' : 'low';
+            // High-salience pending items are always worth surfacing
+            // Low-salience ones only if they're getting stale
+            if (mem.salience < 0.4 && ageDays < staleDays)
+                continue;
+            alerts.push({
+                type: 'pending',
+                priority,
+                message: `Pending (${ageDays}d): ${mem.content}`,
+                memoryId: mem.id,
+                entities: mem.entities,
+                ageDays,
+                sortScore: (priority === 'high' ? 3 : priority === 'medium' ? 2 : 1) + mem.salience,
+            });
+        }
+        // 2. Stale follow-ups — high-salience memories that haven't been accessed recently
+        const allMemories = this.store.getByType('semantic', 100);
+        const staleThreshold = now - staleDays * 24 * 60 * 60 * 1000;
+        for (const mem of allMemories) {
+            if (mem.status !== 'active')
+                continue;
+            if (mem.salience < 0.7)
+                continue; // Only flag important stuff
+            const lastAccessed = new Date(mem.lastAccessedAt).getTime();
+            const ageDays = Math.floor((now - lastAccessed) / (1000 * 60 * 60 * 24));
+            // Only flag if it hasn't been accessed in staleDays AND has topics suggesting follow-up
+            if (lastAccessed > staleThreshold)
+                continue;
+            if (ageDays < staleDays)
+                continue;
+            // Look for action-oriented content
+            const actionPatterns = /\b(should|need|must|todo|follow.?up|check|review|update|schedule|plan|deadline|due|remind)\b/i;
+            if (!actionPatterns.test(mem.content))
+                continue;
+            alerts.push({
+                type: 'stale',
+                priority: ageDays >= 7 ? 'medium' : 'low',
+                message: `Stale (${ageDays}d since accessed): ${mem.content}`,
+                memoryId: mem.id,
+                entities: mem.entities,
+                ageDays,
+                sortScore: (ageDays >= 7 ? 2 : 1) + mem.salience * 0.5,
+            });
+        }
+        // 3. Contradictions
+        if (includeContradictions) {
+            const contradictions = this.contradictions(5);
+            for (const c of contradictions) {
+                const ageA = Math.floor((now - new Date(c.memoryA.createdAt).getTime()) / (1000 * 60 * 60 * 24));
+                const ageB = Math.floor((now - new Date(c.memoryB.createdAt).getTime()) / (1000 * 60 * 60 * 24));
+                alerts.push({
+                    type: 'contradiction',
+                    priority: 'medium',
+                    message: `Contradiction: "${c.memoryA.content.slice(0, 80)}" vs "${c.memoryB.content.slice(0, 80)}"`,
+                    entities: [...new Set([...c.memoryA.entities, ...c.memoryB.entities])],
+                    ageDays: Math.min(ageA, ageB),
+                    sortScore: 2.5, // Contradictions are always medium-high priority
+                });
+            }
+        }
+        // Sort by priority score descending, then by age descending
+        alerts.sort((a, b) => b.sortScore - a.sortScore || b.ageDays - a.ageDays);
+        // Return without the internal sortScore
+        return alerts.slice(0, limit).map(({ sortScore, ...rest }) => rest);
+    }
+    // --------------------------------------------------------
+    // audit() — Cross-reference external memory against vault.
+    //
+    // Takes content from an external source (e.g., MEMORY.md)
+    // and checks for discrepancies with what's in the vault.
+    // Returns claims that are outdated, missing, or contradicted.
+    //
+    // This is how Engram earns trust: instead of silently
+    // disagreeing with the agent's other memory sources, it
+    // speaks up.
+    // --------------------------------------------------------
+    async audit(externalContent, opts) {
+        if (!this.config.llm) {
+            throw new Error('audit() requires LLM configuration');
+        }
+        const maxClaims = opts?.maxClaims ?? 20;
+        const relevanceThreshold = opts?.relevanceThreshold ?? 0.5;
+        // Step 1: Extract factual claims from external content
+        const extractPrompt = `Extract factual claims from this text. Each claim should be a single, verifiable statement.
+TEXT:
+${externalContent.slice(0, 8000)}
+Respond as JSON:
+{"claims": ["claim 1", "claim 2", ...]}
+Extract up to ${maxClaims} claims. Focus on specific facts (names, numbers, dates, statuses, relationships) not opinions or vague statements.`;
+        const llmConfig = this.config.llm;
+        const model = llmConfig.model ?? 'gemini-2.5-flash';
+        const extractResponse = await this.callLLM(model, extractPrompt, llmConfig);
+        let claims = [];
+        try {
+            const parsed = JSON.parse(extractResponse);
+            claims = (parsed.claims ?? []).slice(0, maxClaims);
+        }
+        catch {
+            return { discrepancies: [], verified: 0, total: 0 };
+        }
+        if (claims.length === 0) {
+            return { discrepancies: [], verified: 0, total: 0 };
+        }
+        // Step 2: For each claim, check against vault
+        const discrepancies = [];
+        let verified = 0;
+        for (const claim of claims) {
+            // Recall memories relevant to this claim
+            const memories = await this.recall({
+                context: claim,
+                limit: 5,
+                spread: true,
+                temporalFocus: 'latest',
+            });
+            if (memories.length === 0) {
+                // No relevant memories — can't verify or contradict
+                continue;
+            }
+            // Ask LLM to compare claim against vault memories
+            const memoryContext = memories.map((m, i) => {
+                const age = Math.floor((Date.now() - new Date(m.createdAt).getTime()) / (1000 * 60 * 60 * 24));
+                return `[${i + 1}] (${age}d ago) ${m.content}`;
+            }).join('\n');
+            const comparePrompt = `Compare this claim against the vault memories below. Determine if the claim is:
+- "verified": Vault memories support this claim
+- "outdated": Vault has a MORE RECENT version of this fact
+- "contradicted": Vault directly contradicts this claim
+- "unrelated": Vault memories aren't relevant to this claim
+CLAIM: ${claim}
+VAULT MEMORIES:
+${memoryContext}
+Respond as JSON:
+{"status": "verified|outdated|contradicted|unrelated", "explanation": "brief reason", "relevantMemoryIndex": 1}`;
+            try {
+                const compareResponse = await this.callLLM(model, comparePrompt, llmConfig);
+                const result = JSON.parse(compareResponse);
+                if (result.status === 'verified') {
+                    verified++;
+                }
+                else if (result.status === 'outdated' || result.status === 'contradicted') {
+                    const memIdx = (result.relevantMemoryIndex ?? 1) - 1;
+                    const relevantMem = memories[memIdx] ?? memories[0];
+                    discrepancies.push({
+                        claim,
+                        source: 'external',
+                        vaultMemory: relevantMem.content,
+                        vaultCreatedAt: relevantMem.createdAt,
+                        type: result.status,
+                        explanation: result.explanation ?? '',
+                    });
+                }
+                // 'unrelated' and parse failures are silently skipped
+            }
+            catch {
+                // LLM comparison failed for this claim, skip
+            }
+            // Rate limit between claims
+            await new Promise(r => setTimeout(r, 500));
+        }
+        return {
+            discrepancies,
+            verified,
+            total: claims.length,
+        };
+    }
+    // --------------------------------------------------------
     // surface() — Proactive memory surfacing.
     //
     // The key insight from the manifesto: memories should be
@@ -1097,7 +1551,7 @@ Respond in this exact JSON format:
 Keep entities specific and topics general. Limit to 10 entities and 8 topics max.`;
         try {
-            const response = await this.callLLM('gemini-2.0-flash', prompt, this.config.llm);
+            const response = await this.callLLM('gemini-2.5-flash', prompt, this.config.llm);
             const result = JSON.parse(response);
             return {
                 entities: (result.entities || []).slice(0, 10),
@@ -1160,7 +1614,7 @@ Keep entities specific and topics general. Limit to 10 entities and 8 topics max
             return { semanticCreated: 0, semanticUpdated: 0, entitiesDiscovered: 0, connectionsFormed: 0, contradictionsFound: 0 };
         }
         const llmConfig = this.config.llm;
-        const defaultModel = llmConfig.provider === 'gemini' ? 'gemini-2.0-flash'
+        const defaultModel = llmConfig.provider === 'gemini' ? 'gemini-2.5-flash'
             : llmConfig.provider === 'openai' ? 'gpt-4o-mini'
                 : 'claude-3-5-haiku-20241022';
         const model = llmConfig.model ?? defaultModel;
@@ -1313,7 +1767,7 @@ Be conservative with explicit memories. Be observant with implicit ones — look
             return jsonMatch ? (jsonMatch[1] ?? jsonMatch[0]) : text;
         }
         if (config.provider === 'gemini') {
-            const geminiModel = model.startsWith('gemini') ? model : 'gemini-2.0-flash';
+            const geminiModel = model.startsWith('gemini') ? model : 'gemini-2.5-flash';
             const response = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${geminiModel}:generateContent?key=${config.apiKey}`, {
                 method: 'POST',
                 headers: { 'Content-Type': 'application/json' },