npm - hippo-memory - Versions diffs - 0.26.0 → 0.28.0 - Mend

hippo-memory 0.26.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +16 -0
package/dist/cli.js +489 -2
package/dist/cli.js.map +1 -1
package/dist/config.d.ts +13 -0
package/dist/config.d.ts.map +1 -1
package/dist/config.js +9 -0
package/dist/config.js.map +1 -1
package/dist/eval.d.ts +103 -0
package/dist/eval.d.ts.map +1 -0
package/dist/eval.js +187 -0
package/dist/eval.js.map +1 -0
package/dist/refine-llm.d.ts +53 -0
package/dist/refine-llm.d.ts.map +1 -0
package/dist/refine-llm.js +147 -0
package/dist/refine-llm.js.map +1 -0
package/dist/search.d.ts +91 -0
package/dist/search.d.ts.map +1 -1
package/dist/search.js +215 -29
package/dist/search.js.map +1 -1
package/dist/shared.d.ts +7 -0
package/dist/shared.d.ts.map +1 -1
package/dist/shared.js +31 -14
package/dist/shared.js.map +1 -1
package/extensions/openclaw-plugin/openclaw.plugin.json +1 -1
package/extensions/openclaw-plugin/package.json +1 -1
package/openclaw.plugin.json +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -60,6 +60,22 @@ hippo recall "data pipeline issues" --budget 2000
 ---
+### What's new in v0.28.0
+- **Budget saturation fix.** Large memories (14k+ chars) no longer starve retrieval. New `minResults` option guarantees at least N results regardless of token budget. `hippo recall <q> --min-results 5`.
+- **LongMemEval parity restored.** The 35pp R@10 gap vs v0.11 was a benchmark methodology issue (budget-limited vs unlimited comparison). Corrected: v0.28 R@3 67.0% (+0.4pp), answer_in_content@5 49.6% (+3.0pp), R@10 81.0% (-1.6pp). Top-5 results now more often contain the actual answer.
+- **MMR performance.** Re-ranking capped at top-100 candidates, dropping per-query time from ~50s to ~9s. `preparedCorpus` option skips per-query tokenization for batch callers.
+- **RRF scoring option.** `hybridSearch` accepts `scoring: 'rrf'` for reciprocal rank fusion as an alternative to score blending.
+- **`hippo refine` command.** LLM-powered semantic rewrite of memories for improved recall quality.
+### What's new in v0.27.0
+- **Recall is now debuggable.** `hippo explain <query>` prints the full score breakdown for each retrieved memory: BM25 + cosine, every multiplier (strength, recency, decision, path, source-bump, outcome), age, and final composite. Read-only so it's safe to run as a diagnostic.
+- **`hippo trace <id>`** gives a one-page dossier per memory: decay trajectory projected to 30/90 days, effective half-life, retrieval staleness, outcome counts, consolidation parents, open conflicts.
+- **MMR diversity** re-ranks near-duplicate results so you don't get five paraphrases at the top. Default `lambda=0.7`, tunable via config or `--no-mmr` / `--mmr-lambda`.
+- **Outcome feedback is immediate.** `hippo outcome --good` now nudges that memory up on the very next recall (not just via slow half-life decay). Bounded at +/-15%.
+- **`hippo eval`** measures recall quality against a test corpus (MRR, Recall@K, NDCG@K). Gate CI with `--min-mrr`. A real 15-case corpus ships at `evals/real-corpus.json`; baseline numbers in `evals/README.md`.
 ### What's new in v0.26.0
 - **`hippo audit` catches junk memories.** New command flags too-short entries, release/merge/WIP commit noise, fragments, and vague single-clause notes. `--fix` removes the worst offenders. `hippo sleep` now runs audit automatically so commit-noise never survives consolidation.

package/dist/cli.js CHANGED Viewed

@@ -47,6 +47,8 @@ import { DAILY_TASK_NAME, buildDailyRunnerCommand, listRegisteredWorkspaces, reg
 import { importChatGPT, importClaude, importCursor, importGenericFile, importMarkdown, } from './importers.js';
 import { cmdCapture } from './capture.js';
 import { auditMemories } from './audit.js';
+import { runEval, bootstrapCorpus, compareSummaries } from './eval.js';
+import { refineStore } from './refine-llm.js';
 import { wmPush, wmRead, wmClear, wmFlush } from './working-memory.js';
 // ---------------------------------------------------------------------------
 // Helpers
@@ -439,20 +441,38 @@ async function cmdRecall(hippoRoot, query, flags) {
     const config = loadConfig(hippoRoot);
     const usePhysics = forcePhysics
         || (!forceClassic && config.physics.enabled !== false);
+    const noMmr = Boolean(flags['no-mmr']);
+    const mmrLambda = flags['mmr-lambda'] !== undefined
+        ? parseFloat(String(flags['mmr-lambda']))
+        : config.mmr.lambda;
+    const mmrEnabled = !noMmr && config.mmr.enabled;
+    const localBump = flags['equal-sources']
+        ? 1.0
+        : flags['local-bump'] !== undefined
+            ? parseFloat(String(flags['local-bump']))
+            : config.search.localBump;
+    const minResults = flags['min-results'] !== undefined
+        ? parseInt(String(flags['min-results']), 10)
+        : undefined;
     let results;
     if (usePhysics && !hasGlobal) {
         results = await physicsSearch(query, localEntries, {
             budget,
             hippoRoot,
             physicsConfig: config.physics,
+            minResults,
         });
     }
     else if (hasGlobal) {
         // Use searchBothHybrid for merged results with embedding support
-        results = await searchBothHybrid(query, hippoRoot, globalRoot, { budget });
+        results = await searchBothHybrid(query, hippoRoot, globalRoot, {
+            budget, mmr: mmrEnabled, mmrLambda, localBump, minResults,
+        });
     }
     else {
-        results = await hybridSearch(query, localEntries, { budget, hippoRoot });
+        results = await hybridSearch(query, localEntries, {
+            budget, hippoRoot, mmr: mmrEnabled, mmrLambda, minResults,
+        });
     }
     if (limit < results.length) {
         results = results.slice(0, limit);
@@ -524,6 +544,414 @@ async function cmdRecall(hippoRoot, query, flags) {
         console.log();
     }
 }
+async function cmdExplain(hippoRoot, query, flags) {
+    requireInit(hippoRoot);
+    const budget = parseInt(String(flags['budget'] ?? '4000'), 10);
+    const limit = parseLimitFlag(flags['limit']);
+    const asJson = Boolean(flags['json']);
+    const forcePhysics = Boolean(flags['physics']);
+    const forceClassic = Boolean(flags['classic']);
+    const globalRoot = getGlobalRoot();
+    const localEntries = loadSearchEntries(hippoRoot, query);
+    const globalEntries = isInitialized(globalRoot) ? loadSearchEntries(globalRoot, query) : [];
+    const hasGlobal = globalEntries.length > 0;
+    const config = loadConfig(hippoRoot);
+    const usePhysics = forcePhysics
+        || (!forceClassic && config.physics.enabled !== false);
+    const noMmr = Boolean(flags['no-mmr']);
+    const mmrLambda = flags['mmr-lambda'] !== undefined
+        ? parseFloat(String(flags['mmr-lambda']))
+        : config.mmr.lambda;
+    const mmrEnabled = !noMmr && config.mmr.enabled;
+    const localBump = flags['equal-sources']
+        ? 1.0
+        : flags['local-bump'] !== undefined
+            ? parseFloat(String(flags['local-bump']))
+            : config.search.localBump;
+    let results;
+    let modeUsed;
+    if (usePhysics && !hasGlobal) {
+        results = await physicsSearch(query, localEntries, {
+            budget,
+            hippoRoot,
+            physicsConfig: config.physics,
+            explain: true,
+        });
+        modeUsed = 'physics';
+    }
+    else if (hasGlobal) {
+        results = await searchBothHybrid(query, hippoRoot, globalRoot, {
+            budget, explain: true, mmr: mmrEnabled, mmrLambda, localBump,
+        });
+        modeUsed = 'searchBothHybrid';
+    }
+    else {
+        results = await hybridSearch(query, localEntries, {
+            budget, hippoRoot, explain: true, mmr: mmrEnabled, mmrLambda,
+        });
+        modeUsed = 'hybrid';
+    }
+    if (limit < results.length) {
+        results = results.slice(0, limit);
+    }
+    const candidates = localEntries.length + globalEntries.length;
+    if (asJson) {
+        const output = results.map((r, rank) => ({
+            rank: rank + 1,
+            id: r.entry.id,
+            layer: r.entry.layer,
+            confidence: resolveConfidence(r.entry),
+            score: r.score,
+            tokens: r.tokens,
+            tags: r.entry.tags,
+            content: r.entry.content,
+            breakdown: r.breakdown,
+        }));
+        console.log(JSON.stringify({
+            query,
+            mode: modeUsed,
+            candidates,
+            returned: output.length,
+            results: output,
+        }));
+        return;
+    }
+    if (results.length === 0) {
+        console.log(`No memories matched "${query}" (scanned ${candidates}).`);
+        return;
+    }
+    console.log(`Query: "${query}"`);
+    console.log(`Mode:  ${modeUsed}   candidates: ${candidates}   returned: ${results.length}`);
+    console.log();
+    console.log('Rank  Score   Strength  Age    Layer      ID                Preview');
+    console.log('----- ------- --------- ------ ---------- ----------------- ---------------------------------');
+    for (let i = 0; i < results.length; i++) {
+        const r = results[i];
+        const b = r.breakdown;
+        const preview = r.entry.content.replace(/\s+/g, ' ').slice(0, 48);
+        const ageStr = b ? `${b.ageDays}d` : '?';
+        console.log(`${String(i + 1).padEnd(5)} ${fmt(r.score, 3).padEnd(7)} ${fmt(r.entry.strength).padEnd(9)} ${ageStr.padEnd(6)} ${r.entry.layer.padEnd(10)} ${r.entry.id.padEnd(17)} ${preview}`);
+    }
+    console.log();
+    for (let i = 0; i < results.length; i++) {
+        const r = results[i];
+        const b = r.breakdown;
+        console.log(`[${i + 1}] ${r.entry.id}   composite=${fmt(r.score, 4)}`);
+        if (!b) {
+            console.log('    (no breakdown available)');
+            console.log();
+            continue;
+        }
+        if (b.mode === 'physics') {
+            console.log(`    mode:      physics-gravity`);
+            console.log(`    cosine:    ${fmt(b.cosine, 3)}  (pre-amp baseline)`);
+            console.log(`    final:     ${fmt(b.final, 4)}  (post-amp, from physics scorer)`);
+        }
+        else {
+            const matched = b.matchedTerms.length > 0 ? b.matchedTerms.join(', ') : '(none)';
+            console.log(`    mode:      ${b.mode}${b.mode === 'hybrid-no-vec' ? '  (no cached doc vector — run `hippo embed`)' : ''}`);
+            console.log(`    BM25:      raw=${fmt(r.bm25, 3)}  normalized=${fmt(b.normBm25, 3)}  weight=${fmt(b.bm25Weight, 2)}  matched=[${matched}]`);
+            console.log(`    embedding: cosine=${fmt(b.cosine, 3)}  weight=${fmt(b.embeddingWeight, 2)}`);
+            console.log(`    base:      ${fmt(b.bm25Weight, 2)}*${fmt(b.normBm25, 3)} + ${fmt(b.embeddingWeight, 2)}*${fmt(b.cosine, 3)} = ${fmt(b.base, 4)}`);
+            console.log(`    strength:  x${fmt(b.strengthMultiplier, 3)}  (strength=${fmt(r.entry.strength, 3)})`);
+            console.log(`    recency:   x${fmt(b.recencyMultiplier, 3)}  (age=${b.ageDays}d)`);
+            if (b.decisionBoost !== 1)
+                console.log(`    decision:  x${fmt(b.decisionBoost, 2)}  (tagged 'decision')`);
+            if (b.pathBoost !== 1)
+                console.log(`    path:      x${fmt(b.pathBoost, 3)}  (cwd path tag overlap)`);
+            if (b.sourceBump !== 1)
+                console.log(`    source:    x${fmt(b.sourceBump, 2)}  (local priority bump over global)`);
+            if (b.outcomeBoost !== 1)
+                console.log(`    outcome:   x${fmt(b.outcomeBoost, 3)}  (user feedback: pos-neg = ${(r.entry.outcome_positive ?? 0) - (r.entry.outcome_negative ?? 0)})`);
+            if (b.preMmrRank !== undefined && b.postMmrRank !== undefined && b.preMmrRank !== b.postMmrRank) {
+                const arrow = b.postMmrRank < b.preMmrRank ? 'up' : 'down';
+                console.log(`    mmr:       rank ${b.preMmrRank} -> ${b.postMmrRank}  (diversity ${arrow})`);
+            }
+            console.log(`    final:     ${fmt(b.final, 4)}`);
+        }
+        console.log();
+    }
+    console.log('Note: explain does not mark memories as retrieved (read-only).');
+}
+async function cmdEval(hippoRoot, corpusPath, flags) {
+    requireInit(hippoRoot);
+    const asJson = Boolean(flags['json']);
+    const minMrr = flags['min-mrr'] !== undefined ? parseFloat(String(flags['min-mrr'])) : null;
+    const showCases = Boolean(flags['show-cases']);
+    const comparePath = flags['compare'] ? String(flags['compare']) : null;
+    const noMmr = Boolean(flags['no-mmr']);
+    const mmrLambda = flags['mmr-lambda'] !== undefined ? parseFloat(String(flags['mmr-lambda'])) : undefined;
+    const embeddingWeight = flags['embedding-weight'] !== undefined ? parseFloat(String(flags['embedding-weight'])) : undefined;
+    const entries = loadAllEntries(hippoRoot);
+    // Bootstrap mode: emit a synthetic corpus and exit.
+    if (flags['bootstrap']) {
+        const outPath = flags['out'] ? String(flags['out']) : null;
+        const max = flags['max-cases'] !== undefined ? parseInt(String(flags['max-cases']), 10) : 50;
+        const corpus = bootstrapCorpus(entries, max);
+        const payload = JSON.stringify({ cases: corpus }, null, 2);
+        if (outPath) {
+            fs.mkdirSync(path.dirname(outPath), { recursive: true });
+            fs.writeFileSync(outPath, payload, 'utf8');
+            console.log(`Wrote ${corpus.length} bootstrap cases to ${outPath}`);
+        }
+        else {
+            console.log(payload);
+        }
+        return;
+    }
+    if (!corpusPath) {
+        console.error('Usage: hippo eval <corpus.json>  OR  hippo eval --bootstrap [--out <path>]');
+        process.exit(1);
+    }
+    if (!fs.existsSync(corpusPath)) {
+        console.error(`Corpus file not found: ${corpusPath}`);
+        process.exit(1);
+    }
+    let cases;
+    try {
+        const raw = JSON.parse(fs.readFileSync(corpusPath, 'utf8'));
+        cases = Array.isArray(raw) ? raw : raw.cases;
+        if (!Array.isArray(cases))
+            throw new Error('Corpus JSON must be an array or { cases: [...] }');
+    }
+    catch (err) {
+        console.error(`Failed to read corpus: ${err instanceof Error ? err.message : err}`);
+        process.exit(1);
+    }
+    const globalRoot = getGlobalRoot();
+    const localBump = flags['equal-sources']
+        ? 1.0
+        : flags['local-bump'] !== undefined
+            ? parseFloat(String(flags['local-bump']))
+            : loadConfig(hippoRoot).search.localBump;
+    const summary = await runEval(cases, entries, {
+        hippoRoot,
+        globalRoot,
+        mmr: !noMmr,
+        mmrLambda,
+        embeddingWeight,
+        localBump,
+    });
+    if (asJson) {
+        console.log(JSON.stringify(summary, null, 2));
+    }
+    else {
+        console.log(`Eval: ${summary.cases.length} cases, ${summary.durationMs}ms`);
+        console.log();
+        console.log(`MRR:          ${fmt(summary.meanMrr, 4)}`);
+        console.log(`Recall@5:     ${fmt(summary.meanRecallAt5, 4)}`);
+        console.log(`Recall@10:    ${fmt(summary.meanRecallAt10, 4)}`);
+        console.log(`NDCG@10:      ${fmt(summary.meanNdcgAt10, 4)}`);
+        if (showCases) {
+            console.log();
+            console.log('Case details:');
+            for (const c of summary.cases) {
+                const exp = c.case.expectedIds.length;
+                const expectedSet = new Set(c.case.expectedIds);
+                const hitTop10 = c.returnedIds.slice(0, 10).filter((id) => expectedSet.has(id));
+                const missed = c.case.expectedIds.filter((id) => !c.returnedIds.slice(0, 10).includes(id));
+                console.log();
+                console.log(`[${c.case.id}] R@10=${fmt(c.recallAt10, 2)}  MRR=${fmt(c.mrr, 2)}  expected=${exp}  hit=${hitTop10.length}`);
+                console.log(`  query: ${c.case.query}`);
+                console.log(`  top 3: ${c.returnedIds.slice(0, 3).join(', ') || '(none)'}`);
+                if (missed.length > 0) {
+                    const shown = missed.slice(0, 4);
+                    const more = missed.length > shown.length ? ` +${missed.length - shown.length} more` : '';
+                    console.log(`  missed: ${shown.join(', ')}${more}`);
+                }
+            }
+        }
+        console.log();
+        const failing = summary.cases.filter((c) => c.mrr === 0);
+        if (failing.length > 0) {
+            console.log(`${failing.length} case(s) returned zero relevant results:`);
+            for (const f of failing.slice(0, 10)) {
+                console.log(`  [${f.case.id}] "${f.case.query.slice(0, 60)}"`);
+            }
+            if (failing.length > 10)
+                console.log(`  ...and ${failing.length - 10} more`);
+        }
+    }
+    if (minMrr !== null && summary.meanMrr < minMrr) {
+        console.error(`MRR ${fmt(summary.meanMrr, 4)} below threshold ${minMrr}`);
+        process.exit(1);
+    }
+    if (comparePath) {
+        if (!fs.existsSync(comparePath)) {
+            console.error(`Baseline file not found: ${comparePath}`);
+            process.exit(1);
+        }
+        let baseline;
+        try {
+            baseline = JSON.parse(fs.readFileSync(comparePath, 'utf8'));
+        }
+        catch (err) {
+            console.error(`Failed to parse baseline: ${err instanceof Error ? err.message : err}`);
+            process.exit(1);
+        }
+        const cmp = compareSummaries(baseline, summary);
+        if (asJson) {
+            // The main JSON output already emitted; append comparison to stderr so
+            // both can be captured independently.
+            console.error(JSON.stringify({ compare: cmp }, null, 2));
+        }
+        else {
+            console.log();
+            console.log('Compare vs baseline:');
+            const sign = (d) => (d >= 0 ? '+' : '') + fmt(d, 4);
+            console.log(`  MRR:        ${sign(cmp.aggregate.mrr)}`);
+            console.log(`  Recall@5:   ${sign(cmp.aggregate.recallAt5)}`);
+            console.log(`  Recall@10:  ${sign(cmp.aggregate.recallAt10)}`);
+            console.log(`  NDCG@10:    ${sign(cmp.aggregate.ndcgAt10)}`);
+            console.log();
+            console.log(`  improved: ${cmp.improved.length}   regressed: ${cmp.regressed.length}   unchanged: ${cmp.unchanged}`);
+            if (cmp.onlyInBaseline.length > 0)
+                console.log(`  only in baseline: ${cmp.onlyInBaseline.length}`);
+            if (cmp.onlyInCurrent.length > 0)
+                console.log(`  only in current:  ${cmp.onlyInCurrent.length}`);
+            const showPerCase = cmp.improved.length + cmp.regressed.length > 0;
+            if (showPerCase) {
+                for (const d of cmp.improved.slice(0, 5)) {
+                    const delta = d.ndcgAfter - d.ndcgBefore;
+                    console.log(`  + [${d.id}] NDCG ${fmt(d.ndcgBefore, 2)} -> ${fmt(d.ndcgAfter, 2)} (+${fmt(delta, 3)})`);
+                }
+                for (const d of cmp.regressed.slice(0, 5)) {
+                    const delta = d.ndcgAfter - d.ndcgBefore;
+                    console.log(`  - [${d.id}] NDCG ${fmt(d.ndcgBefore, 2)} -> ${fmt(d.ndcgAfter, 2)} (${fmt(delta, 3)})`);
+                }
+            }
+        }
+    }
+}
+function cmdTrace(hippoRoot, id, flags) {
+    requireInit(hippoRoot);
+    const asJson = Boolean(flags['json']);
+    // Look in local store first, then global.
+    let entry = readEntry(hippoRoot, id);
+    let sourceLabel = 'local';
+    const globalRoot = getGlobalRoot();
+    if (!entry && isInitialized(globalRoot)) {
+        entry = readEntry(globalRoot, id);
+        sourceLabel = 'global';
+    }
+    if (!entry) {
+        console.error(`Memory not found: ${id}`);
+        process.exit(1);
+    }
+    const now = new Date();
+    const strength = calculateStrength(entry, now);
+    const halfLife = deriveHalfLife(7, entry);
+    const rewardFactor = calculateRewardFactor(entry);
+    const effHalfLife = halfLife * rewardFactor;
+    const createdMs = new Date(entry.created).getTime();
+    const ageDays = (now.getTime() - createdMs) / 86_400_000;
+    const lastMs = new Date(entry.last_retrieved).getTime();
+    const sinceLast = (now.getTime() - lastMs) / 86_400_000;
+    const conf = resolveConfidence(entry, now);
+    // Projected strength: same decay curve, just push `now` out.
+    const projectedAt = (days) => calculateStrength(entry, new Date(now.getTime() + days * 86_400_000));
+    // Parents (consolidation lineage) — schema v9 field.
+    const parents = Array.isArray(entry.parents) ? entry.parents : [];
+    const parentPreviews = parents.map((pid) => {
+        const p = readEntry(hippoRoot, pid) ?? (isInitialized(globalRoot) ? readEntry(globalRoot, pid) : null);
+        return { id: pid, content: p ? p.content.replace(/\s+/g, ' ').slice(0, 70) : '(not found)' };
+    });
+    // Open conflicts involving this memory.
+    const allConflicts = [
+        ...listMemoryConflicts(hippoRoot, 'open'),
+        ...(isInitialized(globalRoot) ? listMemoryConflicts(globalRoot, 'open') : []),
+    ];
+    const myConflicts = allConflicts.filter((c) => c.memory_a_id === id || c.memory_b_id === id);
+    if (asJson) {
+        console.log(JSON.stringify({
+            id: entry.id,
+            source: sourceLabel,
+            layer: entry.layer,
+            confidence: conf,
+            pinned: entry.pinned,
+            starred: entry.starred,
+            tags: entry.tags,
+            content: entry.content,
+            created: entry.created,
+            age_days: ageDays,
+            last_retrieved: entry.last_retrieved,
+            days_since_last_retrieval: sinceLast,
+            retrieval_count: entry.retrieval_count,
+            strength_now: strength,
+            half_life_days: halfLife,
+            reward_factor: rewardFactor,
+            effective_half_life_days: effHalfLife,
+            projected_strength_30d: projectedAt(30),
+            projected_strength_90d: projectedAt(90),
+            outcome_positive: entry.outcome_positive,
+            outcome_negative: entry.outcome_negative,
+            parents: parentPreviews,
+            open_conflicts: myConflicts,
+        }, null, 2));
+        return;
+    }
+    console.log(`Memory: ${entry.id}  [${sourceLabel}]`);
+    console.log('='.repeat(50));
+    console.log(`Content:   ${entry.content.replace(/\s+/g, ' ').slice(0, 160)}${entry.content.length > 160 ? '...' : ''}`);
+    console.log(`Layer:     ${entry.layer.padEnd(10)} Confidence: ${conf.padEnd(10)} Pinned: ${entry.pinned ? 'yes' : 'no'}${entry.starred ? '  Starred: yes' : ''}`);
+    console.log(`Tags:      ${entry.tags.join(', ') || '(none)'}`);
+    console.log(`Created:   ${entry.created}  (${fmt(ageDays, 1)} days ago)`);
+    console.log();
+    console.log(`Strength trajectory:`);
+    console.log(`  now:        ${fmt(strength, 3)}`);
+    console.log(`  in 30 days: ${fmt(projectedAt(30), 3)}`);
+    console.log(`  in 90 days: ${fmt(projectedAt(90), 3)}`);
+    console.log(`  half-life:  ${fmt(halfLife, 1)}d (base) x ${fmt(rewardFactor, 2)} reward = ${fmt(effHalfLife, 1)}d effective`);
+    console.log();
+    console.log(`Retrieval:`);
+    console.log(`  count:      ${entry.retrieval_count}`);
+    console.log(`  last:       ${entry.last_retrieved}  (${fmt(sinceLast, 1)} days ago)`);
+    console.log();
+    console.log(`Outcomes:   +${entry.outcome_positive} / -${entry.outcome_negative}`);
+    if (parentPreviews.length > 0) {
+        console.log();
+        console.log(`Parents (consolidation lineage):`);
+        for (const p of parentPreviews) {
+            console.log(`  - ${p.id}: ${p.content}`);
+        }
+    }
+    if (myConflicts.length > 0) {
+        console.log();
+        console.log(`Open conflicts: ${myConflicts.length}`);
+        for (const c of myConflicts) {
+            const other = c.memory_a_id === id ? c.memory_b_id : c.memory_a_id;
+            console.log(`  - with ${other}: ${c.reason} (score=${fmt(c.score, 2)})`);
+        }
+    }
+}
+async function cmdRefine(hippoRoot, flags) {
+    requireInit(hippoRoot);
+    const apiKey = process.env.ANTHROPIC_API_KEY;
+    if (!apiKey) {
+        console.error('hippo refine needs ANTHROPIC_API_KEY in the environment.');
+        process.exit(1);
+    }
+    const dryRun = Boolean(flags['dry-run']);
+    const all = Boolean(flags['all']);
+    const limit = flags['limit'] !== undefined ? parseInt(String(flags['limit']), 10) : undefined;
+    const model = flags['model'] ? String(flags['model']) : undefined;
+    const asJson = Boolean(flags['json']);
+    const result = await refineStore(hippoRoot, { apiKey, model, limit, dryRun, all });
+    if (asJson) {
+        console.log(JSON.stringify(result, null, 2));
+        return;
+    }
+    console.log(`Scanned:  ${result.scanned} consolidated semantic memories`);
+    console.log(`Refined:  ${result.refined}${dryRun ? '  (dry-run — no writes)' : ''}`);
+    console.log(`Skipped:  ${result.skipped}`);
+    console.log(`Failed:   ${result.failed}`);
+    if (result.failed > 0) {
+        console.log('\nFailures:');
+        for (const d of result.details.filter((x) => x.status === 'failed').slice(0, 5)) {
+            console.log(`  ${d.id}: ${d.reason}`);
+        }
+    }
+}
 /**
  * Scan for Claude Code MEMORY.md files and import new entries into hippo.
  * Looks in ~/.claude/projects/<project>/memory/ for .md files with YAML frontmatter.
@@ -2638,8 +3066,41 @@ Commands:
     --global               Store in global store ($HIPPO_HOME or ~/.hippo/)
   recall <query>           Search and retrieve memories (local + global)
     --budget <n>           Token budget (default: 4000)
+    --min-results <n>      Minimum results regardless of budget (default: 1)
     --json                 Output as JSON
     --why                  Show match reasons and source annotations
+    --no-mmr               Disable MMR diversity re-ranking
+    --mmr-lambda <f>       MMR balance 0..1 (default: 0.7, 1.0 = pure relevance)
+  explain <query>          Show full score breakdown for each retrieved memory
+    --budget <n>           Token budget (default: 4000)
+    --limit <n>            Cap the number of results displayed
+    --json                 Output as JSON
+    --physics | --classic  Force search mode (default: from config)
+    --no-mmr               Disable MMR diversity re-ranking
+    --mmr-lambda <f>       MMR balance 0..1 (default: 0.7, 1.0 = pure relevance)
+  trace <id>               Memory dossier: content, decay trajectory, retrievals,
+                           outcomes, consolidation parents, open conflicts
+    --json                 Output as JSON
+  refine                   Rewrite consolidated semantic memories with Claude
+    --limit <n>            Cap the number of memories processed this run
+    --all                  Ignore \`llm-refined\` tag and re-refine everything
+    --dry-run              Call the API but don't write results back
+    --model <id>           Override the default model (claude-sonnet-4-6)
+    --json                 Output summary as JSON
+    (requires ANTHROPIC_API_KEY in env)
+  eval [<corpus.json>]     Measure recall quality against a test corpus
+    --bootstrap            Generate a synthetic corpus from current memories
+    --out <path>           With --bootstrap, write to file instead of stdout
+    --max-cases <n>        With --bootstrap, cap case count (default: 50)
+    --show-cases           Print per-case details (query, R@10, missed, top 3)
+    --compare <path>       JSON from a prior \`eval --json\` run; print deltas
+    --no-mmr               Disable MMR for this eval run
+    --mmr-lambda <f>       Override MMR lambda for this run
+    --embedding-weight <f> Override cosine weight (default: 0.6)
+    --local-bump <f>       Local-over-global priority multiplier (default: 1.2)
+    --equal-sources        Shortcut for --local-bump 1.0
+    --min-mrr <f>          Exit non-zero if mean MRR falls below this
+    --json                 Output full summary as JSON
   context                  Smart context injection for AI agents
     --auto                 Auto-detect task from git state
     --budget <n>           Token budget (default: 1500)
@@ -2841,6 +3302,32 @@ async function main() {
             await cmdRecall(hippoRoot, query, flags);
             break;
         }
+        case 'explain': {
+            const query = args.join(' ').trim();
+            if (!query) {
+                console.error('Please provide a search query.');
+                process.exit(1);
+            }
+            await cmdExplain(hippoRoot, query, flags);
+            break;
+        }
+        case 'eval': {
+            const corpusPath = args[0] ? String(args[0]) : null;
+            await cmdEval(hippoRoot, corpusPath, flags);
+            break;
+        }
+        case 'trace': {
+            const id = args[0] ? String(args[0]) : null;
+            if (!id) {
+                console.error('Usage: hippo trace <memory-id>');
+                process.exit(1);
+            }
+            cmdTrace(hippoRoot, id, flags);
+            break;
+        }
+        case 'refine':
+            await cmdRefine(hippoRoot, flags);
+            break;
         case 'sleep':
             cmdSleep(hippoRoot, flags);
             break;