hippo-memory 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -60,6 +60,14 @@ hippo recall "data pipeline issues" --budget 2000
60
60
 
61
61
  ---
62
62
 
63
+ ### What's new in v0.28.0
64
+
65
+ - **Budget saturation fix.** Large memories (14k+ chars) no longer starve retrieval. New `minResults` option guarantees at least N results regardless of token budget. `hippo recall <q> --min-results 5`.
66
+ - **LongMemEval parity restored.** The 35pp R@10 gap vs v0.11 was a benchmark methodology issue (budget-limited vs unlimited comparison). Corrected: v0.28 R@3 67.0% (+0.4pp), answer_in_content@5 49.6% (+3.0pp), R@10 81.0% (-1.6pp). Top-5 results now more often contain the actual answer.
67
+ - **MMR performance.** Re-ranking capped at top-100 candidates, dropping per-query time from ~50s to ~9s. `preparedCorpus` option skips per-query tokenization for batch callers.
68
+ - **RRF scoring option.** `hybridSearch` accepts `scoring: 'rrf'` for reciprocal rank fusion as an alternative to score blending.
69
+ - **`hippo refine` command.** LLM-powered semantic rewrite of memories for improved recall quality.
70
+
63
71
  ### What's new in v0.27.0
64
72
 
65
73
  - **Recall is now debuggable.** `hippo explain <query>` prints the full score breakdown for each retrieved memory: BM25 + cosine, every multiplier (strength, recency, decision, path, source-bump, outcome), age, and final composite. Read-only so it's safe to run as a diagnostic.
package/dist/cli.js CHANGED
@@ -47,7 +47,8 @@ import { DAILY_TASK_NAME, buildDailyRunnerCommand, listRegisteredWorkspaces, reg
47
47
  import { importChatGPT, importClaude, importCursor, importGenericFile, importMarkdown, } from './importers.js';
48
48
  import { cmdCapture } from './capture.js';
49
49
  import { auditMemories } from './audit.js';
50
- import { runEval, bootstrapCorpus } from './eval.js';
50
+ import { runEval, bootstrapCorpus, compareSummaries } from './eval.js';
51
+ import { refineStore } from './refine-llm.js';
51
52
  import { wmPush, wmRead, wmClear, wmFlush } from './working-memory.js';
52
53
  // ---------------------------------------------------------------------------
53
54
  // Helpers
@@ -445,23 +446,32 @@ async function cmdRecall(hippoRoot, query, flags) {
445
446
  ? parseFloat(String(flags['mmr-lambda']))
446
447
  : config.mmr.lambda;
447
448
  const mmrEnabled = !noMmr && config.mmr.enabled;
449
+ const localBump = flags['equal-sources']
450
+ ? 1.0
451
+ : flags['local-bump'] !== undefined
452
+ ? parseFloat(String(flags['local-bump']))
453
+ : config.search.localBump;
454
+ const minResults = flags['min-results'] !== undefined
455
+ ? parseInt(String(flags['min-results']), 10)
456
+ : undefined;
448
457
  let results;
449
458
  if (usePhysics && !hasGlobal) {
450
459
  results = await physicsSearch(query, localEntries, {
451
460
  budget,
452
461
  hippoRoot,
453
462
  physicsConfig: config.physics,
463
+ minResults,
454
464
  });
455
465
  }
456
466
  else if (hasGlobal) {
457
467
  // Use searchBothHybrid for merged results with embedding support
458
468
  results = await searchBothHybrid(query, hippoRoot, globalRoot, {
459
- budget, mmr: mmrEnabled, mmrLambda,
469
+ budget, mmr: mmrEnabled, mmrLambda, localBump, minResults,
460
470
  });
461
471
  }
462
472
  else {
463
473
  results = await hybridSearch(query, localEntries, {
464
- budget, hippoRoot, mmr: mmrEnabled, mmrLambda,
474
+ budget, hippoRoot, mmr: mmrEnabled, mmrLambda, minResults,
465
475
  });
466
476
  }
467
477
  if (limit < results.length) {
@@ -553,6 +563,11 @@ async function cmdExplain(hippoRoot, query, flags) {
553
563
  ? parseFloat(String(flags['mmr-lambda']))
554
564
  : config.mmr.lambda;
555
565
  const mmrEnabled = !noMmr && config.mmr.enabled;
566
+ const localBump = flags['equal-sources']
567
+ ? 1.0
568
+ : flags['local-bump'] !== undefined
569
+ ? parseFloat(String(flags['local-bump']))
570
+ : config.search.localBump;
556
571
  let results;
557
572
  let modeUsed;
558
573
  if (usePhysics && !hasGlobal) {
@@ -566,7 +581,7 @@ async function cmdExplain(hippoRoot, query, flags) {
566
581
  }
567
582
  else if (hasGlobal) {
568
583
  results = await searchBothHybrid(query, hippoRoot, globalRoot, {
569
- budget, explain: true, mmr: mmrEnabled, mmrLambda,
584
+ budget, explain: true, mmr: mmrEnabled, mmrLambda, localBump,
570
585
  });
571
586
  modeUsed = 'searchBothHybrid';
572
587
  }
@@ -663,6 +678,7 @@ async function cmdEval(hippoRoot, corpusPath, flags) {
663
678
  const asJson = Boolean(flags['json']);
664
679
  const minMrr = flags['min-mrr'] !== undefined ? parseFloat(String(flags['min-mrr'])) : null;
665
680
  const showCases = Boolean(flags['show-cases']);
681
+ const comparePath = flags['compare'] ? String(flags['compare']) : null;
666
682
  const noMmr = Boolean(flags['no-mmr']);
667
683
  const mmrLambda = flags['mmr-lambda'] !== undefined ? parseFloat(String(flags['mmr-lambda'])) : undefined;
668
684
  const embeddingWeight = flags['embedding-weight'] !== undefined ? parseFloat(String(flags['embedding-weight'])) : undefined;
@@ -702,11 +718,19 @@ async function cmdEval(hippoRoot, corpusPath, flags) {
702
718
  console.error(`Failed to read corpus: ${err instanceof Error ? err.message : err}`);
703
719
  process.exit(1);
704
720
  }
721
+ const globalRoot = getGlobalRoot();
722
+ const localBump = flags['equal-sources']
723
+ ? 1.0
724
+ : flags['local-bump'] !== undefined
725
+ ? parseFloat(String(flags['local-bump']))
726
+ : loadConfig(hippoRoot).search.localBump;
705
727
  const summary = await runEval(cases, entries, {
706
728
  hippoRoot,
729
+ globalRoot,
707
730
  mmr: !noMmr,
708
731
  mmrLambda,
709
732
  embeddingWeight,
733
+ localBump,
710
734
  });
711
735
  if (asJson) {
712
736
  console.log(JSON.stringify(summary, null, 2));
@@ -752,6 +776,52 @@ async function cmdEval(hippoRoot, corpusPath, flags) {
752
776
  console.error(`MRR ${fmt(summary.meanMrr, 4)} below threshold ${minMrr}`);
753
777
  process.exit(1);
754
778
  }
779
+ if (comparePath) {
780
+ if (!fs.existsSync(comparePath)) {
781
+ console.error(`Baseline file not found: ${comparePath}`);
782
+ process.exit(1);
783
+ }
784
+ let baseline;
785
+ try {
786
+ baseline = JSON.parse(fs.readFileSync(comparePath, 'utf8'));
787
+ }
788
+ catch (err) {
789
+ console.error(`Failed to parse baseline: ${err instanceof Error ? err.message : err}`);
790
+ process.exit(1);
791
+ }
792
+ const cmp = compareSummaries(baseline, summary);
793
+ if (asJson) {
794
+ // The main JSON output already emitted; append comparison to stderr so
795
+ // both can be captured independently.
796
+ console.error(JSON.stringify({ compare: cmp }, null, 2));
797
+ }
798
+ else {
799
+ console.log();
800
+ console.log('Compare vs baseline:');
801
+ const sign = (d) => (d >= 0 ? '+' : '') + fmt(d, 4);
802
+ console.log(` MRR: ${sign(cmp.aggregate.mrr)}`);
803
+ console.log(` Recall@5: ${sign(cmp.aggregate.recallAt5)}`);
804
+ console.log(` Recall@10: ${sign(cmp.aggregate.recallAt10)}`);
805
+ console.log(` NDCG@10: ${sign(cmp.aggregate.ndcgAt10)}`);
806
+ console.log();
807
+ console.log(` improved: ${cmp.improved.length} regressed: ${cmp.regressed.length} unchanged: ${cmp.unchanged}`);
808
+ if (cmp.onlyInBaseline.length > 0)
809
+ console.log(` only in baseline: ${cmp.onlyInBaseline.length}`);
810
+ if (cmp.onlyInCurrent.length > 0)
811
+ console.log(` only in current: ${cmp.onlyInCurrent.length}`);
812
+ const showPerCase = cmp.improved.length + cmp.regressed.length > 0;
813
+ if (showPerCase) {
814
+ for (const d of cmp.improved.slice(0, 5)) {
815
+ const delta = d.ndcgAfter - d.ndcgBefore;
816
+ console.log(` + [${d.id}] NDCG ${fmt(d.ndcgBefore, 2)} -> ${fmt(d.ndcgAfter, 2)} (+${fmt(delta, 3)})`);
817
+ }
818
+ for (const d of cmp.regressed.slice(0, 5)) {
819
+ const delta = d.ndcgAfter - d.ndcgBefore;
820
+ console.log(` - [${d.id}] NDCG ${fmt(d.ndcgBefore, 2)} -> ${fmt(d.ndcgAfter, 2)} (${fmt(delta, 3)})`);
821
+ }
822
+ }
823
+ }
824
+ }
755
825
  }
756
826
  function cmdTrace(hippoRoot, id, flags) {
757
827
  requireInit(hippoRoot);
@@ -854,6 +924,34 @@ function cmdTrace(hippoRoot, id, flags) {
854
924
  }
855
925
  }
856
926
  }
927
+ async function cmdRefine(hippoRoot, flags) {
928
+ requireInit(hippoRoot);
929
+ const apiKey = process.env.ANTHROPIC_API_KEY;
930
+ if (!apiKey) {
931
+ console.error('hippo refine needs ANTHROPIC_API_KEY in the environment.');
932
+ process.exit(1);
933
+ }
934
+ const dryRun = Boolean(flags['dry-run']);
935
+ const all = Boolean(flags['all']);
936
+ const limit = flags['limit'] !== undefined ? parseInt(String(flags['limit']), 10) : undefined;
937
+ const model = flags['model'] ? String(flags['model']) : undefined;
938
+ const asJson = Boolean(flags['json']);
939
+ const result = await refineStore(hippoRoot, { apiKey, model, limit, dryRun, all });
940
+ if (asJson) {
941
+ console.log(JSON.stringify(result, null, 2));
942
+ return;
943
+ }
944
+ console.log(`Scanned: ${result.scanned} consolidated semantic memories`);
945
+ console.log(`Refined: ${result.refined}${dryRun ? ' (dry-run — no writes)' : ''}`);
946
+ console.log(`Skipped: ${result.skipped}`);
947
+ console.log(`Failed: ${result.failed}`);
948
+ if (result.failed > 0) {
949
+ console.log('\nFailures:');
950
+ for (const d of result.details.filter((x) => x.status === 'failed').slice(0, 5)) {
951
+ console.log(` ${d.id}: ${d.reason}`);
952
+ }
953
+ }
954
+ }
857
955
  /**
858
956
  * Scan for Claude Code MEMORY.md files and import new entries into hippo.
859
957
  * Looks in ~/.claude/projects/<project>/memory/ for .md files with YAML frontmatter.
@@ -2968,6 +3066,7 @@ Commands:
2968
3066
  --global Store in global store ($HIPPO_HOME or ~/.hippo/)
2969
3067
  recall <query> Search and retrieve memories (local + global)
2970
3068
  --budget <n> Token budget (default: 4000)
3069
+ --min-results <n> Minimum results regardless of budget (default: 1)
2971
3070
  --json Output as JSON
2972
3071
  --why Show match reasons and source annotations
2973
3072
  --no-mmr Disable MMR diversity re-ranking
@@ -2982,14 +3081,24 @@ Commands:
2982
3081
  trace <id> Memory dossier: content, decay trajectory, retrievals,
2983
3082
  outcomes, consolidation parents, open conflicts
2984
3083
  --json Output as JSON
3084
+ refine Rewrite consolidated semantic memories with Claude
3085
+ --limit <n> Cap the number of memories processed this run
3086
+ --all Ignore \`llm-refined\` tag and re-refine everything
3087
+ --dry-run Call the API but don't write results back
3088
+ --model <id> Override the default model (claude-sonnet-4-6)
3089
+ --json Output summary as JSON
3090
+ (requires ANTHROPIC_API_KEY in env)
2985
3091
  eval [<corpus.json>] Measure recall quality against a test corpus
2986
3092
  --bootstrap Generate a synthetic corpus from current memories
2987
3093
  --out <path> With --bootstrap, write to file instead of stdout
2988
3094
  --max-cases <n> With --bootstrap, cap case count (default: 50)
2989
3095
  --show-cases Print per-case details (query, R@10, missed, top 3)
3096
+ --compare <path> JSON from a prior \`eval --json\` run; print deltas
2990
3097
  --no-mmr Disable MMR for this eval run
2991
3098
  --mmr-lambda <f> Override MMR lambda for this run
2992
3099
  --embedding-weight <f> Override cosine weight (default: 0.6)
3100
+ --local-bump <f> Local-over-global priority multiplier (default: 1.2)
3101
+ --equal-sources Shortcut for --local-bump 1.0
2993
3102
  --min-mrr <f> Exit non-zero if mean MRR falls below this
2994
3103
  --json Output full summary as JSON
2995
3104
  context Smart context injection for AI agents
@@ -3216,6 +3325,9 @@ async function main() {
3216
3325
  cmdTrace(hippoRoot, id, flags);
3217
3326
  break;
3218
3327
  }
3328
+ case 'refine':
3329
+ await cmdRefine(hippoRoot, flags);
3330
+ break;
3219
3331
  case 'sleep':
3220
3332
  cmdSleep(hippoRoot, flags);
3221
3333
  break;