hippo-memory 0.26.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -60,6 +60,22 @@ hippo recall "data pipeline issues" --budget 2000
60
60
 
61
61
  ---
62
62
 
63
+ ### What's new in v0.28.0
64
+
65
+ - **Budget saturation fix.** Large memories (14k+ chars) no longer starve retrieval. New `minResults` option guarantees at least N results regardless of token budget. `hippo recall <q> --min-results 5`.
66
+ - **LongMemEval parity restored.** The 35pp R@10 gap vs v0.11 was a benchmark methodology issue (budget-limited vs unlimited comparison). Corrected: v0.28 R@3 67.0% (+0.4pp), answer_in_content@5 49.6% (+3.0pp), R@10 81.0% (-1.6pp). Top-5 results now more often contain the actual answer.
67
+ - **MMR performance.** Re-ranking capped at top-100 candidates, dropping per-query time from ~50s to ~9s. `preparedCorpus` option skips per-query tokenization for batch callers.
68
+ - **RRF scoring option.** `hybridSearch` accepts `scoring: 'rrf'` for reciprocal rank fusion as an alternative to score blending.
69
+ - **`hippo refine` command.** LLM-powered semantic rewrite of memories for improved recall quality.
70
+
71
+ ### What's new in v0.27.0
72
+
73
+ - **Recall is now debuggable.** `hippo explain <query>` prints the full score breakdown for each retrieved memory: BM25 + cosine, every multiplier (strength, recency, decision, path, source-bump, outcome), age, and final composite. Read-only so it's safe to run as a diagnostic.
74
+ - **`hippo trace <id>`** gives a one-page dossier per memory: decay trajectory projected to 30/90 days, effective half-life, retrieval staleness, outcome counts, consolidation parents, open conflicts.
75
+ - **MMR diversity** re-ranks near-duplicate results so you don't get five paraphrases at the top. Default `lambda=0.7`, tunable via config or `--no-mmr` / `--mmr-lambda`.
76
+ - **Outcome feedback is immediate.** `hippo outcome --good` now nudges that memory up on the very next recall (not just via slow half-life decay). Bounded at +/-15%.
77
+ - **`hippo eval`** measures recall quality against a test corpus (MRR, Recall@K, NDCG@K). Gate CI with `--min-mrr`. A real 15-case corpus ships at `evals/real-corpus.json`; baseline numbers in `evals/README.md`.
78
+
63
79
  ### What's new in v0.26.0
64
80
 
65
81
  - **`hippo audit` catches junk memories.** New command flags too-short entries, release/merge/WIP commit noise, fragments, and vague single-clause notes. `--fix` removes the worst offenders. `hippo sleep` now runs audit automatically so commit-noise never survives consolidation.
package/dist/cli.js CHANGED
@@ -47,6 +47,8 @@ import { DAILY_TASK_NAME, buildDailyRunnerCommand, listRegisteredWorkspaces, reg
47
47
  import { importChatGPT, importClaude, importCursor, importGenericFile, importMarkdown, } from './importers.js';
48
48
  import { cmdCapture } from './capture.js';
49
49
  import { auditMemories } from './audit.js';
50
+ import { runEval, bootstrapCorpus, compareSummaries } from './eval.js';
51
+ import { refineStore } from './refine-llm.js';
50
52
  import { wmPush, wmRead, wmClear, wmFlush } from './working-memory.js';
51
53
  // ---------------------------------------------------------------------------
52
54
  // Helpers
@@ -439,20 +441,38 @@ async function cmdRecall(hippoRoot, query, flags) {
439
441
  const config = loadConfig(hippoRoot);
440
442
  const usePhysics = forcePhysics
441
443
  || (!forceClassic && config.physics.enabled !== false);
444
+ const noMmr = Boolean(flags['no-mmr']);
445
+ const mmrLambda = flags['mmr-lambda'] !== undefined
446
+ ? parseFloat(String(flags['mmr-lambda']))
447
+ : config.mmr.lambda;
448
+ const mmrEnabled = !noMmr && config.mmr.enabled;
449
+ const localBump = flags['equal-sources']
450
+ ? 1.0
451
+ : flags['local-bump'] !== undefined
452
+ ? parseFloat(String(flags['local-bump']))
453
+ : config.search.localBump;
454
+ const minResults = flags['min-results'] !== undefined
455
+ ? parseInt(String(flags['min-results']), 10)
456
+ : undefined;
442
457
  let results;
443
458
  if (usePhysics && !hasGlobal) {
444
459
  results = await physicsSearch(query, localEntries, {
445
460
  budget,
446
461
  hippoRoot,
447
462
  physicsConfig: config.physics,
463
+ minResults,
448
464
  });
449
465
  }
450
466
  else if (hasGlobal) {
451
467
  // Use searchBothHybrid for merged results with embedding support
452
- results = await searchBothHybrid(query, hippoRoot, globalRoot, { budget });
468
+ results = await searchBothHybrid(query, hippoRoot, globalRoot, {
469
+ budget, mmr: mmrEnabled, mmrLambda, localBump, minResults,
470
+ });
453
471
  }
454
472
  else {
455
- results = await hybridSearch(query, localEntries, { budget, hippoRoot });
473
+ results = await hybridSearch(query, localEntries, {
474
+ budget, hippoRoot, mmr: mmrEnabled, mmrLambda, minResults,
475
+ });
456
476
  }
457
477
  if (limit < results.length) {
458
478
  results = results.slice(0, limit);
@@ -524,6 +544,414 @@ async function cmdRecall(hippoRoot, query, flags) {
524
544
  console.log();
525
545
  }
526
546
  }
547
+ async function cmdExplain(hippoRoot, query, flags) {
548
+ requireInit(hippoRoot);
549
+ const budget = parseInt(String(flags['budget'] ?? '4000'), 10);
550
+ const limit = parseLimitFlag(flags['limit']);
551
+ const asJson = Boolean(flags['json']);
552
+ const forcePhysics = Boolean(flags['physics']);
553
+ const forceClassic = Boolean(flags['classic']);
554
+ const globalRoot = getGlobalRoot();
555
+ const localEntries = loadSearchEntries(hippoRoot, query);
556
+ const globalEntries = isInitialized(globalRoot) ? loadSearchEntries(globalRoot, query) : [];
557
+ const hasGlobal = globalEntries.length > 0;
558
+ const config = loadConfig(hippoRoot);
559
+ const usePhysics = forcePhysics
560
+ || (!forceClassic && config.physics.enabled !== false);
561
+ const noMmr = Boolean(flags['no-mmr']);
562
+ const mmrLambda = flags['mmr-lambda'] !== undefined
563
+ ? parseFloat(String(flags['mmr-lambda']))
564
+ : config.mmr.lambda;
565
+ const mmrEnabled = !noMmr && config.mmr.enabled;
566
+ const localBump = flags['equal-sources']
567
+ ? 1.0
568
+ : flags['local-bump'] !== undefined
569
+ ? parseFloat(String(flags['local-bump']))
570
+ : config.search.localBump;
571
+ let results;
572
+ let modeUsed;
573
+ if (usePhysics && !hasGlobal) {
574
+ results = await physicsSearch(query, localEntries, {
575
+ budget,
576
+ hippoRoot,
577
+ physicsConfig: config.physics,
578
+ explain: true,
579
+ });
580
+ modeUsed = 'physics';
581
+ }
582
+ else if (hasGlobal) {
583
+ results = await searchBothHybrid(query, hippoRoot, globalRoot, {
584
+ budget, explain: true, mmr: mmrEnabled, mmrLambda, localBump,
585
+ });
586
+ modeUsed = 'searchBothHybrid';
587
+ }
588
+ else {
589
+ results = await hybridSearch(query, localEntries, {
590
+ budget, hippoRoot, explain: true, mmr: mmrEnabled, mmrLambda,
591
+ });
592
+ modeUsed = 'hybrid';
593
+ }
594
+ if (limit < results.length) {
595
+ results = results.slice(0, limit);
596
+ }
597
+ const candidates = localEntries.length + globalEntries.length;
598
+ if (asJson) {
599
+ const output = results.map((r, rank) => ({
600
+ rank: rank + 1,
601
+ id: r.entry.id,
602
+ layer: r.entry.layer,
603
+ confidence: resolveConfidence(r.entry),
604
+ score: r.score,
605
+ tokens: r.tokens,
606
+ tags: r.entry.tags,
607
+ content: r.entry.content,
608
+ breakdown: r.breakdown,
609
+ }));
610
+ console.log(JSON.stringify({
611
+ query,
612
+ mode: modeUsed,
613
+ candidates,
614
+ returned: output.length,
615
+ results: output,
616
+ }));
617
+ return;
618
+ }
619
+ if (results.length === 0) {
620
+ console.log(`No memories matched "${query}" (scanned ${candidates}).`);
621
+ return;
622
+ }
623
+ console.log(`Query: "${query}"`);
624
+ console.log(`Mode: ${modeUsed} candidates: ${candidates} returned: ${results.length}`);
625
+ console.log();
626
+ console.log('Rank Score Strength Age Layer ID Preview');
627
+ console.log('----- ------- --------- ------ ---------- ----------------- ---------------------------------');
628
+ for (let i = 0; i < results.length; i++) {
629
+ const r = results[i];
630
+ const b = r.breakdown;
631
+ const preview = r.entry.content.replace(/\s+/g, ' ').slice(0, 48);
632
+ const ageStr = b ? `${b.ageDays}d` : '?';
633
+ console.log(`${String(i + 1).padEnd(5)} ${fmt(r.score, 3).padEnd(7)} ${fmt(r.entry.strength).padEnd(9)} ${ageStr.padEnd(6)} ${r.entry.layer.padEnd(10)} ${r.entry.id.padEnd(17)} ${preview}`);
634
+ }
635
+ console.log();
636
+ for (let i = 0; i < results.length; i++) {
637
+ const r = results[i];
638
+ const b = r.breakdown;
639
+ console.log(`[${i + 1}] ${r.entry.id} composite=${fmt(r.score, 4)}`);
640
+ if (!b) {
641
+ console.log(' (no breakdown available)');
642
+ console.log();
643
+ continue;
644
+ }
645
+ if (b.mode === 'physics') {
646
+ console.log(` mode: physics-gravity`);
647
+ console.log(` cosine: ${fmt(b.cosine, 3)} (pre-amp baseline)`);
648
+ console.log(` final: ${fmt(b.final, 4)} (post-amp, from physics scorer)`);
649
+ }
650
+ else {
651
+ const matched = b.matchedTerms.length > 0 ? b.matchedTerms.join(', ') : '(none)';
652
+ console.log(` mode: ${b.mode}${b.mode === 'hybrid-no-vec' ? ' (no cached doc vector — run `hippo embed`)' : ''}`);
653
+ console.log(` BM25: raw=${fmt(r.bm25, 3)} normalized=${fmt(b.normBm25, 3)} weight=${fmt(b.bm25Weight, 2)} matched=[${matched}]`);
654
+ console.log(` embedding: cosine=${fmt(b.cosine, 3)} weight=${fmt(b.embeddingWeight, 2)}`);
655
+ console.log(` base: ${fmt(b.bm25Weight, 2)}*${fmt(b.normBm25, 3)} + ${fmt(b.embeddingWeight, 2)}*${fmt(b.cosine, 3)} = ${fmt(b.base, 4)}`);
656
+ console.log(` strength: x${fmt(b.strengthMultiplier, 3)} (strength=${fmt(r.entry.strength, 3)})`);
657
+ console.log(` recency: x${fmt(b.recencyMultiplier, 3)} (age=${b.ageDays}d)`);
658
+ if (b.decisionBoost !== 1)
659
+ console.log(` decision: x${fmt(b.decisionBoost, 2)} (tagged 'decision')`);
660
+ if (b.pathBoost !== 1)
661
+ console.log(` path: x${fmt(b.pathBoost, 3)} (cwd path tag overlap)`);
662
+ if (b.sourceBump !== 1)
663
+ console.log(` source: x${fmt(b.sourceBump, 2)} (local priority bump over global)`);
664
+ if (b.outcomeBoost !== 1)
665
+ console.log(` outcome: x${fmt(b.outcomeBoost, 3)} (user feedback: pos-neg = ${(r.entry.outcome_positive ?? 0) - (r.entry.outcome_negative ?? 0)})`);
666
+ if (b.preMmrRank !== undefined && b.postMmrRank !== undefined && b.preMmrRank !== b.postMmrRank) {
667
+ const arrow = b.postMmrRank < b.preMmrRank ? 'up' : 'down';
668
+ console.log(` mmr: rank ${b.preMmrRank} -> ${b.postMmrRank} (diversity ${arrow})`);
669
+ }
670
+ console.log(` final: ${fmt(b.final, 4)}`);
671
+ }
672
+ console.log();
673
+ }
674
+ console.log('Note: explain does not mark memories as retrieved (read-only).');
675
+ }
676
+ async function cmdEval(hippoRoot, corpusPath, flags) {
677
+ requireInit(hippoRoot);
678
+ const asJson = Boolean(flags['json']);
679
+ const minMrr = flags['min-mrr'] !== undefined ? parseFloat(String(flags['min-mrr'])) : null;
680
+ const showCases = Boolean(flags['show-cases']);
681
+ const comparePath = flags['compare'] ? String(flags['compare']) : null;
682
+ const noMmr = Boolean(flags['no-mmr']);
683
+ const mmrLambda = flags['mmr-lambda'] !== undefined ? parseFloat(String(flags['mmr-lambda'])) : undefined;
684
+ const embeddingWeight = flags['embedding-weight'] !== undefined ? parseFloat(String(flags['embedding-weight'])) : undefined;
685
+ const entries = loadAllEntries(hippoRoot);
686
+ // Bootstrap mode: emit a synthetic corpus and exit.
687
+ if (flags['bootstrap']) {
688
+ const outPath = flags['out'] ? String(flags['out']) : null;
689
+ const max = flags['max-cases'] !== undefined ? parseInt(String(flags['max-cases']), 10) : 50;
690
+ const corpus = bootstrapCorpus(entries, max);
691
+ const payload = JSON.stringify({ cases: corpus }, null, 2);
692
+ if (outPath) {
693
+ fs.mkdirSync(path.dirname(outPath), { recursive: true });
694
+ fs.writeFileSync(outPath, payload, 'utf8');
695
+ console.log(`Wrote ${corpus.length} bootstrap cases to ${outPath}`);
696
+ }
697
+ else {
698
+ console.log(payload);
699
+ }
700
+ return;
701
+ }
702
+ if (!corpusPath) {
703
+ console.error('Usage: hippo eval <corpus.json> OR hippo eval --bootstrap [--out <path>]');
704
+ process.exit(1);
705
+ }
706
+ if (!fs.existsSync(corpusPath)) {
707
+ console.error(`Corpus file not found: ${corpusPath}`);
708
+ process.exit(1);
709
+ }
710
+ let cases;
711
+ try {
712
+ const raw = JSON.parse(fs.readFileSync(corpusPath, 'utf8'));
713
+ cases = Array.isArray(raw) ? raw : raw.cases;
714
+ if (!Array.isArray(cases))
715
+ throw new Error('Corpus JSON must be an array or { cases: [...] }');
716
+ }
717
+ catch (err) {
718
+ console.error(`Failed to read corpus: ${err instanceof Error ? err.message : err}`);
719
+ process.exit(1);
720
+ }
721
+ const globalRoot = getGlobalRoot();
722
+ const localBump = flags['equal-sources']
723
+ ? 1.0
724
+ : flags['local-bump'] !== undefined
725
+ ? parseFloat(String(flags['local-bump']))
726
+ : loadConfig(hippoRoot).search.localBump;
727
+ const summary = await runEval(cases, entries, {
728
+ hippoRoot,
729
+ globalRoot,
730
+ mmr: !noMmr,
731
+ mmrLambda,
732
+ embeddingWeight,
733
+ localBump,
734
+ });
735
+ if (asJson) {
736
+ console.log(JSON.stringify(summary, null, 2));
737
+ }
738
+ else {
739
+ console.log(`Eval: ${summary.cases.length} cases, ${summary.durationMs}ms`);
740
+ console.log();
741
+ console.log(`MRR: ${fmt(summary.meanMrr, 4)}`);
742
+ console.log(`Recall@5: ${fmt(summary.meanRecallAt5, 4)}`);
743
+ console.log(`Recall@10: ${fmt(summary.meanRecallAt10, 4)}`);
744
+ console.log(`NDCG@10: ${fmt(summary.meanNdcgAt10, 4)}`);
745
+ if (showCases) {
746
+ console.log();
747
+ console.log('Case details:');
748
+ for (const c of summary.cases) {
749
+ const exp = c.case.expectedIds.length;
750
+ const expectedSet = new Set(c.case.expectedIds);
751
+ const hitTop10 = c.returnedIds.slice(0, 10).filter((id) => expectedSet.has(id));
752
+ const missed = c.case.expectedIds.filter((id) => !c.returnedIds.slice(0, 10).includes(id));
753
+ console.log();
754
+ console.log(`[${c.case.id}] R@10=${fmt(c.recallAt10, 2)} MRR=${fmt(c.mrr, 2)} expected=${exp} hit=${hitTop10.length}`);
755
+ console.log(` query: ${c.case.query}`);
756
+ console.log(` top 3: ${c.returnedIds.slice(0, 3).join(', ') || '(none)'}`);
757
+ if (missed.length > 0) {
758
+ const shown = missed.slice(0, 4);
759
+ const more = missed.length > shown.length ? ` +${missed.length - shown.length} more` : '';
760
+ console.log(` missed: ${shown.join(', ')}${more}`);
761
+ }
762
+ }
763
+ }
764
+ console.log();
765
+ const failing = summary.cases.filter((c) => c.mrr === 0);
766
+ if (failing.length > 0) {
767
+ console.log(`${failing.length} case(s) returned zero relevant results:`);
768
+ for (const f of failing.slice(0, 10)) {
769
+ console.log(` [${f.case.id}] "${f.case.query.slice(0, 60)}"`);
770
+ }
771
+ if (failing.length > 10)
772
+ console.log(` ...and ${failing.length - 10} more`);
773
+ }
774
+ }
775
+ if (minMrr !== null && summary.meanMrr < minMrr) {
776
+ console.error(`MRR ${fmt(summary.meanMrr, 4)} below threshold ${minMrr}`);
777
+ process.exit(1);
778
+ }
779
+ if (comparePath) {
780
+ if (!fs.existsSync(comparePath)) {
781
+ console.error(`Baseline file not found: ${comparePath}`);
782
+ process.exit(1);
783
+ }
784
+ let baseline;
785
+ try {
786
+ baseline = JSON.parse(fs.readFileSync(comparePath, 'utf8'));
787
+ }
788
+ catch (err) {
789
+ console.error(`Failed to parse baseline: ${err instanceof Error ? err.message : err}`);
790
+ process.exit(1);
791
+ }
792
+ const cmp = compareSummaries(baseline, summary);
793
+ if (asJson) {
794
+ // The main JSON output already emitted; append comparison to stderr so
795
+ // both can be captured independently.
796
+ console.error(JSON.stringify({ compare: cmp }, null, 2));
797
+ }
798
+ else {
799
+ console.log();
800
+ console.log('Compare vs baseline:');
801
+ const sign = (d) => (d >= 0 ? '+' : '') + fmt(d, 4);
802
+ console.log(` MRR: ${sign(cmp.aggregate.mrr)}`);
803
+ console.log(` Recall@5: ${sign(cmp.aggregate.recallAt5)}`);
804
+ console.log(` Recall@10: ${sign(cmp.aggregate.recallAt10)}`);
805
+ console.log(` NDCG@10: ${sign(cmp.aggregate.ndcgAt10)}`);
806
+ console.log();
807
+ console.log(` improved: ${cmp.improved.length} regressed: ${cmp.regressed.length} unchanged: ${cmp.unchanged}`);
808
+ if (cmp.onlyInBaseline.length > 0)
809
+ console.log(` only in baseline: ${cmp.onlyInBaseline.length}`);
810
+ if (cmp.onlyInCurrent.length > 0)
811
+ console.log(` only in current: ${cmp.onlyInCurrent.length}`);
812
+ const showPerCase = cmp.improved.length + cmp.regressed.length > 0;
813
+ if (showPerCase) {
814
+ for (const d of cmp.improved.slice(0, 5)) {
815
+ const delta = d.ndcgAfter - d.ndcgBefore;
816
+ console.log(` + [${d.id}] NDCG ${fmt(d.ndcgBefore, 2)} -> ${fmt(d.ndcgAfter, 2)} (+${fmt(delta, 3)})`);
817
+ }
818
+ for (const d of cmp.regressed.slice(0, 5)) {
819
+ const delta = d.ndcgAfter - d.ndcgBefore;
820
+ console.log(` - [${d.id}] NDCG ${fmt(d.ndcgBefore, 2)} -> ${fmt(d.ndcgAfter, 2)} (${fmt(delta, 3)})`);
821
+ }
822
+ }
823
+ }
824
+ }
825
+ }
826
+ function cmdTrace(hippoRoot, id, flags) {
827
+ requireInit(hippoRoot);
828
+ const asJson = Boolean(flags['json']);
829
+ // Look in local store first, then global.
830
+ let entry = readEntry(hippoRoot, id);
831
+ let sourceLabel = 'local';
832
+ const globalRoot = getGlobalRoot();
833
+ if (!entry && isInitialized(globalRoot)) {
834
+ entry = readEntry(globalRoot, id);
835
+ sourceLabel = 'global';
836
+ }
837
+ if (!entry) {
838
+ console.error(`Memory not found: ${id}`);
839
+ process.exit(1);
840
+ }
841
+ const now = new Date();
842
+ const strength = calculateStrength(entry, now);
843
+ const halfLife = deriveHalfLife(7, entry);
844
+ const rewardFactor = calculateRewardFactor(entry);
845
+ const effHalfLife = halfLife * rewardFactor;
846
+ const createdMs = new Date(entry.created).getTime();
847
+ const ageDays = (now.getTime() - createdMs) / 86_400_000;
848
+ const lastMs = new Date(entry.last_retrieved).getTime();
849
+ const sinceLast = (now.getTime() - lastMs) / 86_400_000;
850
+ const conf = resolveConfidence(entry, now);
851
+ // Projected strength: same decay curve, just push `now` out.
852
+ const projectedAt = (days) => calculateStrength(entry, new Date(now.getTime() + days * 86_400_000));
853
+ // Parents (consolidation lineage) — schema v9 field.
854
+ const parents = Array.isArray(entry.parents) ? entry.parents : [];
855
+ const parentPreviews = parents.map((pid) => {
856
+ const p = readEntry(hippoRoot, pid) ?? (isInitialized(globalRoot) ? readEntry(globalRoot, pid) : null);
857
+ return { id: pid, content: p ? p.content.replace(/\s+/g, ' ').slice(0, 70) : '(not found)' };
858
+ });
859
+ // Open conflicts involving this memory.
860
+ const allConflicts = [
861
+ ...listMemoryConflicts(hippoRoot, 'open'),
862
+ ...(isInitialized(globalRoot) ? listMemoryConflicts(globalRoot, 'open') : []),
863
+ ];
864
+ const myConflicts = allConflicts.filter((c) => c.memory_a_id === id || c.memory_b_id === id);
865
+ if (asJson) {
866
+ console.log(JSON.stringify({
867
+ id: entry.id,
868
+ source: sourceLabel,
869
+ layer: entry.layer,
870
+ confidence: conf,
871
+ pinned: entry.pinned,
872
+ starred: entry.starred,
873
+ tags: entry.tags,
874
+ content: entry.content,
875
+ created: entry.created,
876
+ age_days: ageDays,
877
+ last_retrieved: entry.last_retrieved,
878
+ days_since_last_retrieval: sinceLast,
879
+ retrieval_count: entry.retrieval_count,
880
+ strength_now: strength,
881
+ half_life_days: halfLife,
882
+ reward_factor: rewardFactor,
883
+ effective_half_life_days: effHalfLife,
884
+ projected_strength_30d: projectedAt(30),
885
+ projected_strength_90d: projectedAt(90),
886
+ outcome_positive: entry.outcome_positive,
887
+ outcome_negative: entry.outcome_negative,
888
+ parents: parentPreviews,
889
+ open_conflicts: myConflicts,
890
+ }, null, 2));
891
+ return;
892
+ }
893
+ console.log(`Memory: ${entry.id} [${sourceLabel}]`);
894
+ console.log('='.repeat(50));
895
+ console.log(`Content: ${entry.content.replace(/\s+/g, ' ').slice(0, 160)}${entry.content.length > 160 ? '...' : ''}`);
896
+ console.log(`Layer: ${entry.layer.padEnd(10)} Confidence: ${conf.padEnd(10)} Pinned: ${entry.pinned ? 'yes' : 'no'}${entry.starred ? ' Starred: yes' : ''}`);
897
+ console.log(`Tags: ${entry.tags.join(', ') || '(none)'}`);
898
+ console.log(`Created: ${entry.created} (${fmt(ageDays, 1)} days ago)`);
899
+ console.log();
900
+ console.log(`Strength trajectory:`);
901
+ console.log(` now: ${fmt(strength, 3)}`);
902
+ console.log(` in 30 days: ${fmt(projectedAt(30), 3)}`);
903
+ console.log(` in 90 days: ${fmt(projectedAt(90), 3)}`);
904
+ console.log(` half-life: ${fmt(halfLife, 1)}d (base) x ${fmt(rewardFactor, 2)} reward = ${fmt(effHalfLife, 1)}d effective`);
905
+ console.log();
906
+ console.log(`Retrieval:`);
907
+ console.log(` count: ${entry.retrieval_count}`);
908
+ console.log(` last: ${entry.last_retrieved} (${fmt(sinceLast, 1)} days ago)`);
909
+ console.log();
910
+ console.log(`Outcomes: +${entry.outcome_positive} / -${entry.outcome_negative}`);
911
+ if (parentPreviews.length > 0) {
912
+ console.log();
913
+ console.log(`Parents (consolidation lineage):`);
914
+ for (const p of parentPreviews) {
915
+ console.log(` - ${p.id}: ${p.content}`);
916
+ }
917
+ }
918
+ if (myConflicts.length > 0) {
919
+ console.log();
920
+ console.log(`Open conflicts: ${myConflicts.length}`);
921
+ for (const c of myConflicts) {
922
+ const other = c.memory_a_id === id ? c.memory_b_id : c.memory_a_id;
923
+ console.log(` - with ${other}: ${c.reason} (score=${fmt(c.score, 2)})`);
924
+ }
925
+ }
926
+ }
927
+ async function cmdRefine(hippoRoot, flags) {
928
+ requireInit(hippoRoot);
929
+ const apiKey = process.env.ANTHROPIC_API_KEY;
930
+ if (!apiKey) {
931
+ console.error('hippo refine needs ANTHROPIC_API_KEY in the environment.');
932
+ process.exit(1);
933
+ }
934
+ const dryRun = Boolean(flags['dry-run']);
935
+ const all = Boolean(flags['all']);
936
+ const limit = flags['limit'] !== undefined ? parseInt(String(flags['limit']), 10) : undefined;
937
+ const model = flags['model'] ? String(flags['model']) : undefined;
938
+ const asJson = Boolean(flags['json']);
939
+ const result = await refineStore(hippoRoot, { apiKey, model, limit, dryRun, all });
940
+ if (asJson) {
941
+ console.log(JSON.stringify(result, null, 2));
942
+ return;
943
+ }
944
+ console.log(`Scanned: ${result.scanned} consolidated semantic memories`);
945
+ console.log(`Refined: ${result.refined}${dryRun ? ' (dry-run — no writes)' : ''}`);
946
+ console.log(`Skipped: ${result.skipped}`);
947
+ console.log(`Failed: ${result.failed}`);
948
+ if (result.failed > 0) {
949
+ console.log('\nFailures:');
950
+ for (const d of result.details.filter((x) => x.status === 'failed').slice(0, 5)) {
951
+ console.log(` ${d.id}: ${d.reason}`);
952
+ }
953
+ }
954
+ }
527
955
  /**
528
956
  * Scan for Claude Code MEMORY.md files and import new entries into hippo.
529
957
  * Looks in ~/.claude/projects/<project>/memory/ for .md files with YAML frontmatter.
@@ -2638,8 +3066,41 @@ Commands:
2638
3066
  --global Store in global store ($HIPPO_HOME or ~/.hippo/)
2639
3067
  recall <query> Search and retrieve memories (local + global)
2640
3068
  --budget <n> Token budget (default: 4000)
3069
+ --min-results <n> Minimum results regardless of budget (default: 1)
2641
3070
  --json Output as JSON
2642
3071
  --why Show match reasons and source annotations
3072
+ --no-mmr Disable MMR diversity re-ranking
3073
+ --mmr-lambda <f> MMR balance 0..1 (default: 0.7, 1.0 = pure relevance)
3074
+ explain <query> Show full score breakdown for each retrieved memory
3075
+ --budget <n> Token budget (default: 4000)
3076
+ --limit <n> Cap the number of results displayed
3077
+ --json Output as JSON
3078
+ --physics | --classic Force search mode (default: from config)
3079
+ --no-mmr Disable MMR diversity re-ranking
3080
+ --mmr-lambda <f> MMR balance 0..1 (default: 0.7, 1.0 = pure relevance)
3081
+ trace <id> Memory dossier: content, decay trajectory, retrievals,
3082
+ outcomes, consolidation parents, open conflicts
3083
+ --json Output as JSON
3084
+ refine Rewrite consolidated semantic memories with Claude
3085
+ --limit <n> Cap the number of memories processed this run
3086
+ --all Ignore \`llm-refined\` tag and re-refine everything
3087
+ --dry-run Call the API but don't write results back
3088
+ --model <id> Override the default model (claude-sonnet-4-6)
3089
+ --json Output summary as JSON
3090
+ (requires ANTHROPIC_API_KEY in env)
3091
+ eval [<corpus.json>] Measure recall quality against a test corpus
3092
+ --bootstrap Generate a synthetic corpus from current memories
3093
+ --out <path> With --bootstrap, write to file instead of stdout
3094
+ --max-cases <n> With --bootstrap, cap case count (default: 50)
3095
+ --show-cases Print per-case details (query, R@10, missed, top 3)
3096
+ --compare <path> JSON from a prior \`eval --json\` run; print deltas
3097
+ --no-mmr Disable MMR for this eval run
3098
+ --mmr-lambda <f> Override MMR lambda for this run
3099
+ --embedding-weight <f> Override cosine weight (default: 0.6)
3100
+ --local-bump <f> Local-over-global priority multiplier (default: 1.2)
3101
+ --equal-sources Shortcut for --local-bump 1.0
3102
+ --min-mrr <f> Exit non-zero if mean MRR falls below this
3103
+ --json Output full summary as JSON
2643
3104
  context Smart context injection for AI agents
2644
3105
  --auto Auto-detect task from git state
2645
3106
  --budget <n> Token budget (default: 1500)
@@ -2841,6 +3302,32 @@ async function main() {
2841
3302
  await cmdRecall(hippoRoot, query, flags);
2842
3303
  break;
2843
3304
  }
3305
+ case 'explain': {
3306
+ const query = args.join(' ').trim();
3307
+ if (!query) {
3308
+ console.error('Please provide a search query.');
3309
+ process.exit(1);
3310
+ }
3311
+ await cmdExplain(hippoRoot, query, flags);
3312
+ break;
3313
+ }
3314
+ case 'eval': {
3315
+ const corpusPath = args[0] ? String(args[0]) : null;
3316
+ await cmdEval(hippoRoot, corpusPath, flags);
3317
+ break;
3318
+ }
3319
+ case 'trace': {
3320
+ const id = args[0] ? String(args[0]) : null;
3321
+ if (!id) {
3322
+ console.error('Usage: hippo trace <memory-id>');
3323
+ process.exit(1);
3324
+ }
3325
+ cmdTrace(hippoRoot, id, flags);
3326
+ break;
3327
+ }
3328
+ case 'refine':
3329
+ await cmdRefine(hippoRoot, flags);
3330
+ break;
2844
3331
  case 'sleep':
2845
3332
  cmdSleep(hippoRoot, flags);
2846
3333
  break;