hippo-memory 0.27.0 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -0
- package/dist/cli.js +116 -4
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +5 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +4 -0
- package/dist/config.js.map +1 -1
- package/dist/eval.d.ts +35 -0
- package/dist/eval.d.ts.map +1 -1
- package/dist/eval.js +68 -8
- package/dist/eval.js.map +1 -1
- package/dist/refine-llm.d.ts +53 -0
- package/dist/refine-llm.d.ts.map +1 -0
- package/dist/refine-llm.js +147 -0
- package/dist/refine-llm.js.map +1 -0
- package/dist/search.d.ts +26 -0
- package/dist/search.d.ts.map +1 -1
- package/dist/search.js +70 -26
- package/dist/search.js.map +1 -1
- package/dist/shared.d.ts +4 -0
- package/dist/shared.d.ts.map +1 -1
- package/dist/shared.js +19 -18
- package/dist/shared.js.map +1 -1
- package/extensions/openclaw-plugin/openclaw.plugin.json +1 -1
- package/extensions/openclaw-plugin/package.json +1 -1
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -60,6 +60,14 @@ hippo recall "data pipeline issues" --budget 2000
|
|
|
60
60
|
|
|
61
61
|
---
|
|
62
62
|
|
|
63
|
+
### What's new in v0.28.0
|
|
64
|
+
|
|
65
|
+
- **Budget saturation fix.** Large memories (14k+ chars) no longer starve retrieval. New `minResults` option guarantees at least N results regardless of token budget. `hippo recall <q> --min-results 5`.
|
|
66
|
+
- **LongMemEval parity restored.** The 35pp R@10 gap vs v0.11 was a benchmark methodology issue (budget-limited vs unlimited comparison). Corrected: v0.28 R@3 67.0% (+0.4pp), answer_in_content@5 49.6% (+3.0pp), R@10 81.0% (-1.6pp). Top-5 results now more often contain the actual answer.
|
|
67
|
+
- **MMR performance.** Re-ranking capped at top-100 candidates, dropping per-query time from ~50s to ~9s. `preparedCorpus` option skips per-query tokenization for batch callers.
|
|
68
|
+
- **RRF scoring option.** `hybridSearch` accepts `scoring: 'rrf'` for reciprocal rank fusion as an alternative to score blending.
|
|
69
|
+
- **`hippo refine` command.** LLM-powered semantic rewrite of memories for improved recall quality.
|
|
70
|
+
|
|
63
71
|
### What's new in v0.27.0
|
|
64
72
|
|
|
65
73
|
- **Recall is now debuggable.** `hippo explain <query>` prints the full score breakdown for each retrieved memory: BM25 + cosine, every multiplier (strength, recency, decision, path, source-bump, outcome), age, and final composite. Read-only so it's safe to run as a diagnostic.
|
package/dist/cli.js
CHANGED
|
@@ -47,7 +47,8 @@ import { DAILY_TASK_NAME, buildDailyRunnerCommand, listRegisteredWorkspaces, reg
|
|
|
47
47
|
import { importChatGPT, importClaude, importCursor, importGenericFile, importMarkdown, } from './importers.js';
|
|
48
48
|
import { cmdCapture } from './capture.js';
|
|
49
49
|
import { auditMemories } from './audit.js';
|
|
50
|
-
import { runEval, bootstrapCorpus } from './eval.js';
|
|
50
|
+
import { runEval, bootstrapCorpus, compareSummaries } from './eval.js';
|
|
51
|
+
import { refineStore } from './refine-llm.js';
|
|
51
52
|
import { wmPush, wmRead, wmClear, wmFlush } from './working-memory.js';
|
|
52
53
|
// ---------------------------------------------------------------------------
|
|
53
54
|
// Helpers
|
|
@@ -445,23 +446,32 @@ async function cmdRecall(hippoRoot, query, flags) {
|
|
|
445
446
|
? parseFloat(String(flags['mmr-lambda']))
|
|
446
447
|
: config.mmr.lambda;
|
|
447
448
|
const mmrEnabled = !noMmr && config.mmr.enabled;
|
|
449
|
+
const localBump = flags['equal-sources']
|
|
450
|
+
? 1.0
|
|
451
|
+
: flags['local-bump'] !== undefined
|
|
452
|
+
? parseFloat(String(flags['local-bump']))
|
|
453
|
+
: config.search.localBump;
|
|
454
|
+
const minResults = flags['min-results'] !== undefined
|
|
455
|
+
? parseInt(String(flags['min-results']), 10)
|
|
456
|
+
: undefined;
|
|
448
457
|
let results;
|
|
449
458
|
if (usePhysics && !hasGlobal) {
|
|
450
459
|
results = await physicsSearch(query, localEntries, {
|
|
451
460
|
budget,
|
|
452
461
|
hippoRoot,
|
|
453
462
|
physicsConfig: config.physics,
|
|
463
|
+
minResults,
|
|
454
464
|
});
|
|
455
465
|
}
|
|
456
466
|
else if (hasGlobal) {
|
|
457
467
|
// Use searchBothHybrid for merged results with embedding support
|
|
458
468
|
results = await searchBothHybrid(query, hippoRoot, globalRoot, {
|
|
459
|
-
budget, mmr: mmrEnabled, mmrLambda,
|
|
469
|
+
budget, mmr: mmrEnabled, mmrLambda, localBump, minResults,
|
|
460
470
|
});
|
|
461
471
|
}
|
|
462
472
|
else {
|
|
463
473
|
results = await hybridSearch(query, localEntries, {
|
|
464
|
-
budget, hippoRoot, mmr: mmrEnabled, mmrLambda,
|
|
474
|
+
budget, hippoRoot, mmr: mmrEnabled, mmrLambda, minResults,
|
|
465
475
|
});
|
|
466
476
|
}
|
|
467
477
|
if (limit < results.length) {
|
|
@@ -553,6 +563,11 @@ async function cmdExplain(hippoRoot, query, flags) {
|
|
|
553
563
|
? parseFloat(String(flags['mmr-lambda']))
|
|
554
564
|
: config.mmr.lambda;
|
|
555
565
|
const mmrEnabled = !noMmr && config.mmr.enabled;
|
|
566
|
+
const localBump = flags['equal-sources']
|
|
567
|
+
? 1.0
|
|
568
|
+
: flags['local-bump'] !== undefined
|
|
569
|
+
? parseFloat(String(flags['local-bump']))
|
|
570
|
+
: config.search.localBump;
|
|
556
571
|
let results;
|
|
557
572
|
let modeUsed;
|
|
558
573
|
if (usePhysics && !hasGlobal) {
|
|
@@ -566,7 +581,7 @@ async function cmdExplain(hippoRoot, query, flags) {
|
|
|
566
581
|
}
|
|
567
582
|
else if (hasGlobal) {
|
|
568
583
|
results = await searchBothHybrid(query, hippoRoot, globalRoot, {
|
|
569
|
-
budget, explain: true, mmr: mmrEnabled, mmrLambda,
|
|
584
|
+
budget, explain: true, mmr: mmrEnabled, mmrLambda, localBump,
|
|
570
585
|
});
|
|
571
586
|
modeUsed = 'searchBothHybrid';
|
|
572
587
|
}
|
|
@@ -663,6 +678,7 @@ async function cmdEval(hippoRoot, corpusPath, flags) {
|
|
|
663
678
|
const asJson = Boolean(flags['json']);
|
|
664
679
|
const minMrr = flags['min-mrr'] !== undefined ? parseFloat(String(flags['min-mrr'])) : null;
|
|
665
680
|
const showCases = Boolean(flags['show-cases']);
|
|
681
|
+
const comparePath = flags['compare'] ? String(flags['compare']) : null;
|
|
666
682
|
const noMmr = Boolean(flags['no-mmr']);
|
|
667
683
|
const mmrLambda = flags['mmr-lambda'] !== undefined ? parseFloat(String(flags['mmr-lambda'])) : undefined;
|
|
668
684
|
const embeddingWeight = flags['embedding-weight'] !== undefined ? parseFloat(String(flags['embedding-weight'])) : undefined;
|
|
@@ -702,11 +718,19 @@ async function cmdEval(hippoRoot, corpusPath, flags) {
|
|
|
702
718
|
console.error(`Failed to read corpus: ${err instanceof Error ? err.message : err}`);
|
|
703
719
|
process.exit(1);
|
|
704
720
|
}
|
|
721
|
+
const globalRoot = getGlobalRoot();
|
|
722
|
+
const localBump = flags['equal-sources']
|
|
723
|
+
? 1.0
|
|
724
|
+
: flags['local-bump'] !== undefined
|
|
725
|
+
? parseFloat(String(flags['local-bump']))
|
|
726
|
+
: loadConfig(hippoRoot).search.localBump;
|
|
705
727
|
const summary = await runEval(cases, entries, {
|
|
706
728
|
hippoRoot,
|
|
729
|
+
globalRoot,
|
|
707
730
|
mmr: !noMmr,
|
|
708
731
|
mmrLambda,
|
|
709
732
|
embeddingWeight,
|
|
733
|
+
localBump,
|
|
710
734
|
});
|
|
711
735
|
if (asJson) {
|
|
712
736
|
console.log(JSON.stringify(summary, null, 2));
|
|
@@ -752,6 +776,52 @@ async function cmdEval(hippoRoot, corpusPath, flags) {
|
|
|
752
776
|
console.error(`MRR ${fmt(summary.meanMrr, 4)} below threshold ${minMrr}`);
|
|
753
777
|
process.exit(1);
|
|
754
778
|
}
|
|
779
|
+
if (comparePath) {
|
|
780
|
+
if (!fs.existsSync(comparePath)) {
|
|
781
|
+
console.error(`Baseline file not found: ${comparePath}`);
|
|
782
|
+
process.exit(1);
|
|
783
|
+
}
|
|
784
|
+
let baseline;
|
|
785
|
+
try {
|
|
786
|
+
baseline = JSON.parse(fs.readFileSync(comparePath, 'utf8'));
|
|
787
|
+
}
|
|
788
|
+
catch (err) {
|
|
789
|
+
console.error(`Failed to parse baseline: ${err instanceof Error ? err.message : err}`);
|
|
790
|
+
process.exit(1);
|
|
791
|
+
}
|
|
792
|
+
const cmp = compareSummaries(baseline, summary);
|
|
793
|
+
if (asJson) {
|
|
794
|
+
// The main JSON output already emitted; append comparison to stderr so
|
|
795
|
+
// both can be captured independently.
|
|
796
|
+
console.error(JSON.stringify({ compare: cmp }, null, 2));
|
|
797
|
+
}
|
|
798
|
+
else {
|
|
799
|
+
console.log();
|
|
800
|
+
console.log('Compare vs baseline:');
|
|
801
|
+
const sign = (d) => (d >= 0 ? '+' : '') + fmt(d, 4);
|
|
802
|
+
console.log(` MRR: ${sign(cmp.aggregate.mrr)}`);
|
|
803
|
+
console.log(` Recall@5: ${sign(cmp.aggregate.recallAt5)}`);
|
|
804
|
+
console.log(` Recall@10: ${sign(cmp.aggregate.recallAt10)}`);
|
|
805
|
+
console.log(` NDCG@10: ${sign(cmp.aggregate.ndcgAt10)}`);
|
|
806
|
+
console.log();
|
|
807
|
+
console.log(` improved: ${cmp.improved.length} regressed: ${cmp.regressed.length} unchanged: ${cmp.unchanged}`);
|
|
808
|
+
if (cmp.onlyInBaseline.length > 0)
|
|
809
|
+
console.log(` only in baseline: ${cmp.onlyInBaseline.length}`);
|
|
810
|
+
if (cmp.onlyInCurrent.length > 0)
|
|
811
|
+
console.log(` only in current: ${cmp.onlyInCurrent.length}`);
|
|
812
|
+
const showPerCase = cmp.improved.length + cmp.regressed.length > 0;
|
|
813
|
+
if (showPerCase) {
|
|
814
|
+
for (const d of cmp.improved.slice(0, 5)) {
|
|
815
|
+
const delta = d.ndcgAfter - d.ndcgBefore;
|
|
816
|
+
console.log(` + [${d.id}] NDCG ${fmt(d.ndcgBefore, 2)} -> ${fmt(d.ndcgAfter, 2)} (+${fmt(delta, 3)})`);
|
|
817
|
+
}
|
|
818
|
+
for (const d of cmp.regressed.slice(0, 5)) {
|
|
819
|
+
const delta = d.ndcgAfter - d.ndcgBefore;
|
|
820
|
+
console.log(` - [${d.id}] NDCG ${fmt(d.ndcgBefore, 2)} -> ${fmt(d.ndcgAfter, 2)} (${fmt(delta, 3)})`);
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
}
|
|
755
825
|
}
|
|
756
826
|
function cmdTrace(hippoRoot, id, flags) {
|
|
757
827
|
requireInit(hippoRoot);
|
|
@@ -854,6 +924,34 @@ function cmdTrace(hippoRoot, id, flags) {
|
|
|
854
924
|
}
|
|
855
925
|
}
|
|
856
926
|
}
|
|
927
|
+
async function cmdRefine(hippoRoot, flags) {
|
|
928
|
+
requireInit(hippoRoot);
|
|
929
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
930
|
+
if (!apiKey) {
|
|
931
|
+
console.error('hippo refine needs ANTHROPIC_API_KEY in the environment.');
|
|
932
|
+
process.exit(1);
|
|
933
|
+
}
|
|
934
|
+
const dryRun = Boolean(flags['dry-run']);
|
|
935
|
+
const all = Boolean(flags['all']);
|
|
936
|
+
const limit = flags['limit'] !== undefined ? parseInt(String(flags['limit']), 10) : undefined;
|
|
937
|
+
const model = flags['model'] ? String(flags['model']) : undefined;
|
|
938
|
+
const asJson = Boolean(flags['json']);
|
|
939
|
+
const result = await refineStore(hippoRoot, { apiKey, model, limit, dryRun, all });
|
|
940
|
+
if (asJson) {
|
|
941
|
+
console.log(JSON.stringify(result, null, 2));
|
|
942
|
+
return;
|
|
943
|
+
}
|
|
944
|
+
console.log(`Scanned: ${result.scanned} consolidated semantic memories`);
|
|
945
|
+
console.log(`Refined: ${result.refined}${dryRun ? ' (dry-run — no writes)' : ''}`);
|
|
946
|
+
console.log(`Skipped: ${result.skipped}`);
|
|
947
|
+
console.log(`Failed: ${result.failed}`);
|
|
948
|
+
if (result.failed > 0) {
|
|
949
|
+
console.log('\nFailures:');
|
|
950
|
+
for (const d of result.details.filter((x) => x.status === 'failed').slice(0, 5)) {
|
|
951
|
+
console.log(` ${d.id}: ${d.reason}`);
|
|
952
|
+
}
|
|
953
|
+
}
|
|
954
|
+
}
|
|
857
955
|
/**
|
|
858
956
|
* Scan for Claude Code MEMORY.md files and import new entries into hippo.
|
|
859
957
|
* Looks in ~/.claude/projects/<project>/memory/ for .md files with YAML frontmatter.
|
|
@@ -2968,6 +3066,7 @@ Commands:
|
|
|
2968
3066
|
--global Store in global store ($HIPPO_HOME or ~/.hippo/)
|
|
2969
3067
|
recall <query> Search and retrieve memories (local + global)
|
|
2970
3068
|
--budget <n> Token budget (default: 4000)
|
|
3069
|
+
--min-results <n> Minimum results regardless of budget (default: 1)
|
|
2971
3070
|
--json Output as JSON
|
|
2972
3071
|
--why Show match reasons and source annotations
|
|
2973
3072
|
--no-mmr Disable MMR diversity re-ranking
|
|
@@ -2982,14 +3081,24 @@ Commands:
|
|
|
2982
3081
|
trace <id> Memory dossier: content, decay trajectory, retrievals,
|
|
2983
3082
|
outcomes, consolidation parents, open conflicts
|
|
2984
3083
|
--json Output as JSON
|
|
3084
|
+
refine Rewrite consolidated semantic memories with Claude
|
|
3085
|
+
--limit <n> Cap the number of memories processed this run
|
|
3086
|
+
--all Ignore \`llm-refined\` tag and re-refine everything
|
|
3087
|
+
--dry-run Call the API but don't write results back
|
|
3088
|
+
--model <id> Override the default model (claude-sonnet-4-6)
|
|
3089
|
+
--json Output summary as JSON
|
|
3090
|
+
(requires ANTHROPIC_API_KEY in env)
|
|
2985
3091
|
eval [<corpus.json>] Measure recall quality against a test corpus
|
|
2986
3092
|
--bootstrap Generate a synthetic corpus from current memories
|
|
2987
3093
|
--out <path> With --bootstrap, write to file instead of stdout
|
|
2988
3094
|
--max-cases <n> With --bootstrap, cap case count (default: 50)
|
|
2989
3095
|
--show-cases Print per-case details (query, R@10, missed, top 3)
|
|
3096
|
+
--compare <path> JSON from a prior \`eval --json\` run; print deltas
|
|
2990
3097
|
--no-mmr Disable MMR for this eval run
|
|
2991
3098
|
--mmr-lambda <f> Override MMR lambda for this run
|
|
2992
3099
|
--embedding-weight <f> Override cosine weight (default: 0.6)
|
|
3100
|
+
--local-bump <f> Local-over-global priority multiplier (default: 1.2)
|
|
3101
|
+
--equal-sources Shortcut for --local-bump 1.0
|
|
2993
3102
|
--min-mrr <f> Exit non-zero if mean MRR falls below this
|
|
2994
3103
|
--json Output full summary as JSON
|
|
2995
3104
|
context Smart context injection for AI agents
|
|
@@ -3216,6 +3325,9 @@ async function main() {
|
|
|
3216
3325
|
cmdTrace(hippoRoot, id, flags);
|
|
3217
3326
|
break;
|
|
3218
3327
|
}
|
|
3328
|
+
case 'refine':
|
|
3329
|
+
await cmdRefine(hippoRoot, flags);
|
|
3330
|
+
break;
|
|
3219
3331
|
case 'sleep':
|
|
3220
3332
|
cmdSleep(hippoRoot, flags);
|
|
3221
3333
|
break;
|