hippo-memory 0.26.0 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/dist/cli.js +489 -2
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +13 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +9 -0
- package/dist/config.js.map +1 -1
- package/dist/eval.d.ts +103 -0
- package/dist/eval.d.ts.map +1 -0
- package/dist/eval.js +187 -0
- package/dist/eval.js.map +1 -0
- package/dist/refine-llm.d.ts +53 -0
- package/dist/refine-llm.d.ts.map +1 -0
- package/dist/refine-llm.js +147 -0
- package/dist/refine-llm.js.map +1 -0
- package/dist/search.d.ts +91 -0
- package/dist/search.d.ts.map +1 -1
- package/dist/search.js +215 -29
- package/dist/search.js.map +1 -1
- package/dist/shared.d.ts +7 -0
- package/dist/shared.d.ts.map +1 -1
- package/dist/shared.js +31 -14
- package/dist/shared.js.map +1 -1
- package/extensions/openclaw-plugin/openclaw.plugin.json +1 -1
- package/extensions/openclaw-plugin/package.json +1 -1
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -60,6 +60,22 @@ hippo recall "data pipeline issues" --budget 2000
|
|
|
60
60
|
|
|
61
61
|
---
|
|
62
62
|
|
|
63
|
+
### What's new in v0.28.0
|
|
64
|
+
|
|
65
|
+
- **Budget saturation fix.** Large memories (14k+ chars) no longer starve retrieval. New `minResults` option guarantees at least N results regardless of token budget. `hippo recall <q> --min-results 5`.
|
|
66
|
+
- **LongMemEval parity restored.** The 35pp R@10 gap vs v0.11 was a benchmark methodology issue (budget-limited vs unlimited comparison). Corrected: v0.28 R@3 67.0% (+0.4pp), answer_in_content@5 49.6% (+3.0pp), R@10 81.0% (-1.6pp). Top-5 results now more often contain the actual answer.
|
|
67
|
+
- **MMR performance.** Re-ranking capped at top-100 candidates, dropping per-query time from ~50s to ~9s. `preparedCorpus` option skips per-query tokenization for batch callers.
|
|
68
|
+
- **RRF scoring option.** `hybridSearch` accepts `scoring: 'rrf'` for reciprocal rank fusion as an alternative to score blending.
|
|
69
|
+
- **`hippo refine` command.** LLM-powered semantic rewrite of memories for improved recall quality.
|
|
70
|
+
|
|
71
|
+
### What's new in v0.27.0
|
|
72
|
+
|
|
73
|
+
- **Recall is now debuggable.** `hippo explain <query>` prints the full score breakdown for each retrieved memory: BM25 + cosine, every multiplier (strength, recency, decision, path, source-bump, outcome), age, and final composite. Read-only so it's safe to run as a diagnostic.
|
|
74
|
+
- **`hippo trace <id>`** gives a one-page dossier per memory: decay trajectory projected to 30/90 days, effective half-life, retrieval staleness, outcome counts, consolidation parents, open conflicts.
|
|
75
|
+
- **MMR diversity** re-ranks near-duplicate results so you don't get five paraphrases at the top. Default `lambda=0.7`, tunable via config or `--no-mmr` / `--mmr-lambda`.
|
|
76
|
+
- **Outcome feedback is immediate.** `hippo outcome --good` now nudges that memory up on the very next recall (not just via slow half-life decay). Bounded at +/-15%.
|
|
77
|
+
- **`hippo eval`** measures recall quality against a test corpus (MRR, Recall@K, NDCG@K). Gate CI with `--min-mrr`. A real 15-case corpus ships at `evals/real-corpus.json`; baseline numbers in `evals/README.md`.
|
|
78
|
+
|
|
63
79
|
### What's new in v0.26.0
|
|
64
80
|
|
|
65
81
|
- **`hippo audit` catches junk memories.** New command flags too-short entries, release/merge/WIP commit noise, fragments, and vague single-clause notes. `--fix` removes the worst offenders. `hippo sleep` now runs audit automatically so commit-noise never survives consolidation.
|
package/dist/cli.js
CHANGED
|
@@ -47,6 +47,8 @@ import { DAILY_TASK_NAME, buildDailyRunnerCommand, listRegisteredWorkspaces, reg
|
|
|
47
47
|
import { importChatGPT, importClaude, importCursor, importGenericFile, importMarkdown, } from './importers.js';
|
|
48
48
|
import { cmdCapture } from './capture.js';
|
|
49
49
|
import { auditMemories } from './audit.js';
|
|
50
|
+
import { runEval, bootstrapCorpus, compareSummaries } from './eval.js';
|
|
51
|
+
import { refineStore } from './refine-llm.js';
|
|
50
52
|
import { wmPush, wmRead, wmClear, wmFlush } from './working-memory.js';
|
|
51
53
|
// ---------------------------------------------------------------------------
|
|
52
54
|
// Helpers
|
|
@@ -439,20 +441,38 @@ async function cmdRecall(hippoRoot, query, flags) {
|
|
|
439
441
|
const config = loadConfig(hippoRoot);
|
|
440
442
|
const usePhysics = forcePhysics
|
|
441
443
|
|| (!forceClassic && config.physics.enabled !== false);
|
|
444
|
+
const noMmr = Boolean(flags['no-mmr']);
|
|
445
|
+
const mmrLambda = flags['mmr-lambda'] !== undefined
|
|
446
|
+
? parseFloat(String(flags['mmr-lambda']))
|
|
447
|
+
: config.mmr.lambda;
|
|
448
|
+
const mmrEnabled = !noMmr && config.mmr.enabled;
|
|
449
|
+
const localBump = flags['equal-sources']
|
|
450
|
+
? 1.0
|
|
451
|
+
: flags['local-bump'] !== undefined
|
|
452
|
+
? parseFloat(String(flags['local-bump']))
|
|
453
|
+
: config.search.localBump;
|
|
454
|
+
const minResults = flags['min-results'] !== undefined
|
|
455
|
+
? parseInt(String(flags['min-results']), 10)
|
|
456
|
+
: undefined;
|
|
442
457
|
let results;
|
|
443
458
|
if (usePhysics && !hasGlobal) {
|
|
444
459
|
results = await physicsSearch(query, localEntries, {
|
|
445
460
|
budget,
|
|
446
461
|
hippoRoot,
|
|
447
462
|
physicsConfig: config.physics,
|
|
463
|
+
minResults,
|
|
448
464
|
});
|
|
449
465
|
}
|
|
450
466
|
else if (hasGlobal) {
|
|
451
467
|
// Use searchBothHybrid for merged results with embedding support
|
|
452
|
-
results = await searchBothHybrid(query, hippoRoot, globalRoot, {
|
|
468
|
+
results = await searchBothHybrid(query, hippoRoot, globalRoot, {
|
|
469
|
+
budget, mmr: mmrEnabled, mmrLambda, localBump, minResults,
|
|
470
|
+
});
|
|
453
471
|
}
|
|
454
472
|
else {
|
|
455
|
-
results = await hybridSearch(query, localEntries, {
|
|
473
|
+
results = await hybridSearch(query, localEntries, {
|
|
474
|
+
budget, hippoRoot, mmr: mmrEnabled, mmrLambda, minResults,
|
|
475
|
+
});
|
|
456
476
|
}
|
|
457
477
|
if (limit < results.length) {
|
|
458
478
|
results = results.slice(0, limit);
|
|
@@ -524,6 +544,414 @@ async function cmdRecall(hippoRoot, query, flags) {
|
|
|
524
544
|
console.log();
|
|
525
545
|
}
|
|
526
546
|
}
|
|
547
|
+
async function cmdExplain(hippoRoot, query, flags) {
|
|
548
|
+
requireInit(hippoRoot);
|
|
549
|
+
const budget = parseInt(String(flags['budget'] ?? '4000'), 10);
|
|
550
|
+
const limit = parseLimitFlag(flags['limit']);
|
|
551
|
+
const asJson = Boolean(flags['json']);
|
|
552
|
+
const forcePhysics = Boolean(flags['physics']);
|
|
553
|
+
const forceClassic = Boolean(flags['classic']);
|
|
554
|
+
const globalRoot = getGlobalRoot();
|
|
555
|
+
const localEntries = loadSearchEntries(hippoRoot, query);
|
|
556
|
+
const globalEntries = isInitialized(globalRoot) ? loadSearchEntries(globalRoot, query) : [];
|
|
557
|
+
const hasGlobal = globalEntries.length > 0;
|
|
558
|
+
const config = loadConfig(hippoRoot);
|
|
559
|
+
const usePhysics = forcePhysics
|
|
560
|
+
|| (!forceClassic && config.physics.enabled !== false);
|
|
561
|
+
const noMmr = Boolean(flags['no-mmr']);
|
|
562
|
+
const mmrLambda = flags['mmr-lambda'] !== undefined
|
|
563
|
+
? parseFloat(String(flags['mmr-lambda']))
|
|
564
|
+
: config.mmr.lambda;
|
|
565
|
+
const mmrEnabled = !noMmr && config.mmr.enabled;
|
|
566
|
+
const localBump = flags['equal-sources']
|
|
567
|
+
? 1.0
|
|
568
|
+
: flags['local-bump'] !== undefined
|
|
569
|
+
? parseFloat(String(flags['local-bump']))
|
|
570
|
+
: config.search.localBump;
|
|
571
|
+
let results;
|
|
572
|
+
let modeUsed;
|
|
573
|
+
if (usePhysics && !hasGlobal) {
|
|
574
|
+
results = await physicsSearch(query, localEntries, {
|
|
575
|
+
budget,
|
|
576
|
+
hippoRoot,
|
|
577
|
+
physicsConfig: config.physics,
|
|
578
|
+
explain: true,
|
|
579
|
+
});
|
|
580
|
+
modeUsed = 'physics';
|
|
581
|
+
}
|
|
582
|
+
else if (hasGlobal) {
|
|
583
|
+
results = await searchBothHybrid(query, hippoRoot, globalRoot, {
|
|
584
|
+
budget, explain: true, mmr: mmrEnabled, mmrLambda, localBump,
|
|
585
|
+
});
|
|
586
|
+
modeUsed = 'searchBothHybrid';
|
|
587
|
+
}
|
|
588
|
+
else {
|
|
589
|
+
results = await hybridSearch(query, localEntries, {
|
|
590
|
+
budget, hippoRoot, explain: true, mmr: mmrEnabled, mmrLambda,
|
|
591
|
+
});
|
|
592
|
+
modeUsed = 'hybrid';
|
|
593
|
+
}
|
|
594
|
+
if (limit < results.length) {
|
|
595
|
+
results = results.slice(0, limit);
|
|
596
|
+
}
|
|
597
|
+
const candidates = localEntries.length + globalEntries.length;
|
|
598
|
+
if (asJson) {
|
|
599
|
+
const output = results.map((r, rank) => ({
|
|
600
|
+
rank: rank + 1,
|
|
601
|
+
id: r.entry.id,
|
|
602
|
+
layer: r.entry.layer,
|
|
603
|
+
confidence: resolveConfidence(r.entry),
|
|
604
|
+
score: r.score,
|
|
605
|
+
tokens: r.tokens,
|
|
606
|
+
tags: r.entry.tags,
|
|
607
|
+
content: r.entry.content,
|
|
608
|
+
breakdown: r.breakdown,
|
|
609
|
+
}));
|
|
610
|
+
console.log(JSON.stringify({
|
|
611
|
+
query,
|
|
612
|
+
mode: modeUsed,
|
|
613
|
+
candidates,
|
|
614
|
+
returned: output.length,
|
|
615
|
+
results: output,
|
|
616
|
+
}));
|
|
617
|
+
return;
|
|
618
|
+
}
|
|
619
|
+
if (results.length === 0) {
|
|
620
|
+
console.log(`No memories matched "${query}" (scanned ${candidates}).`);
|
|
621
|
+
return;
|
|
622
|
+
}
|
|
623
|
+
console.log(`Query: "${query}"`);
|
|
624
|
+
console.log(`Mode: ${modeUsed} candidates: ${candidates} returned: ${results.length}`);
|
|
625
|
+
console.log();
|
|
626
|
+
console.log('Rank Score Strength Age Layer ID Preview');
|
|
627
|
+
console.log('----- ------- --------- ------ ---------- ----------------- ---------------------------------');
|
|
628
|
+
for (let i = 0; i < results.length; i++) {
|
|
629
|
+
const r = results[i];
|
|
630
|
+
const b = r.breakdown;
|
|
631
|
+
const preview = r.entry.content.replace(/\s+/g, ' ').slice(0, 48);
|
|
632
|
+
const ageStr = b ? `${b.ageDays}d` : '?';
|
|
633
|
+
console.log(`${String(i + 1).padEnd(5)} ${fmt(r.score, 3).padEnd(7)} ${fmt(r.entry.strength).padEnd(9)} ${ageStr.padEnd(6)} ${r.entry.layer.padEnd(10)} ${r.entry.id.padEnd(17)} ${preview}`);
|
|
634
|
+
}
|
|
635
|
+
console.log();
|
|
636
|
+
for (let i = 0; i < results.length; i++) {
|
|
637
|
+
const r = results[i];
|
|
638
|
+
const b = r.breakdown;
|
|
639
|
+
console.log(`[${i + 1}] ${r.entry.id} composite=${fmt(r.score, 4)}`);
|
|
640
|
+
if (!b) {
|
|
641
|
+
console.log(' (no breakdown available)');
|
|
642
|
+
console.log();
|
|
643
|
+
continue;
|
|
644
|
+
}
|
|
645
|
+
if (b.mode === 'physics') {
|
|
646
|
+
console.log(` mode: physics-gravity`);
|
|
647
|
+
console.log(` cosine: ${fmt(b.cosine, 3)} (pre-amp baseline)`);
|
|
648
|
+
console.log(` final: ${fmt(b.final, 4)} (post-amp, from physics scorer)`);
|
|
649
|
+
}
|
|
650
|
+
else {
|
|
651
|
+
const matched = b.matchedTerms.length > 0 ? b.matchedTerms.join(', ') : '(none)';
|
|
652
|
+
console.log(` mode: ${b.mode}${b.mode === 'hybrid-no-vec' ? ' (no cached doc vector — run `hippo embed`)' : ''}`);
|
|
653
|
+
console.log(` BM25: raw=${fmt(r.bm25, 3)} normalized=${fmt(b.normBm25, 3)} weight=${fmt(b.bm25Weight, 2)} matched=[${matched}]`);
|
|
654
|
+
console.log(` embedding: cosine=${fmt(b.cosine, 3)} weight=${fmt(b.embeddingWeight, 2)}`);
|
|
655
|
+
console.log(` base: ${fmt(b.bm25Weight, 2)}*${fmt(b.normBm25, 3)} + ${fmt(b.embeddingWeight, 2)}*${fmt(b.cosine, 3)} = ${fmt(b.base, 4)}`);
|
|
656
|
+
console.log(` strength: x${fmt(b.strengthMultiplier, 3)} (strength=${fmt(r.entry.strength, 3)})`);
|
|
657
|
+
console.log(` recency: x${fmt(b.recencyMultiplier, 3)} (age=${b.ageDays}d)`);
|
|
658
|
+
if (b.decisionBoost !== 1)
|
|
659
|
+
console.log(` decision: x${fmt(b.decisionBoost, 2)} (tagged 'decision')`);
|
|
660
|
+
if (b.pathBoost !== 1)
|
|
661
|
+
console.log(` path: x${fmt(b.pathBoost, 3)} (cwd path tag overlap)`);
|
|
662
|
+
if (b.sourceBump !== 1)
|
|
663
|
+
console.log(` source: x${fmt(b.sourceBump, 2)} (local priority bump over global)`);
|
|
664
|
+
if (b.outcomeBoost !== 1)
|
|
665
|
+
console.log(` outcome: x${fmt(b.outcomeBoost, 3)} (user feedback: pos-neg = ${(r.entry.outcome_positive ?? 0) - (r.entry.outcome_negative ?? 0)})`);
|
|
666
|
+
if (b.preMmrRank !== undefined && b.postMmrRank !== undefined && b.preMmrRank !== b.postMmrRank) {
|
|
667
|
+
const arrow = b.postMmrRank < b.preMmrRank ? 'up' : 'down';
|
|
668
|
+
console.log(` mmr: rank ${b.preMmrRank} -> ${b.postMmrRank} (diversity ${arrow})`);
|
|
669
|
+
}
|
|
670
|
+
console.log(` final: ${fmt(b.final, 4)}`);
|
|
671
|
+
}
|
|
672
|
+
console.log();
|
|
673
|
+
}
|
|
674
|
+
console.log('Note: explain does not mark memories as retrieved (read-only).');
|
|
675
|
+
}
|
|
676
|
+
async function cmdEval(hippoRoot, corpusPath, flags) {
|
|
677
|
+
requireInit(hippoRoot);
|
|
678
|
+
const asJson = Boolean(flags['json']);
|
|
679
|
+
const minMrr = flags['min-mrr'] !== undefined ? parseFloat(String(flags['min-mrr'])) : null;
|
|
680
|
+
const showCases = Boolean(flags['show-cases']);
|
|
681
|
+
const comparePath = flags['compare'] ? String(flags['compare']) : null;
|
|
682
|
+
const noMmr = Boolean(flags['no-mmr']);
|
|
683
|
+
const mmrLambda = flags['mmr-lambda'] !== undefined ? parseFloat(String(flags['mmr-lambda'])) : undefined;
|
|
684
|
+
const embeddingWeight = flags['embedding-weight'] !== undefined ? parseFloat(String(flags['embedding-weight'])) : undefined;
|
|
685
|
+
const entries = loadAllEntries(hippoRoot);
|
|
686
|
+
// Bootstrap mode: emit a synthetic corpus and exit.
|
|
687
|
+
if (flags['bootstrap']) {
|
|
688
|
+
const outPath = flags['out'] ? String(flags['out']) : null;
|
|
689
|
+
const max = flags['max-cases'] !== undefined ? parseInt(String(flags['max-cases']), 10) : 50;
|
|
690
|
+
const corpus = bootstrapCorpus(entries, max);
|
|
691
|
+
const payload = JSON.stringify({ cases: corpus }, null, 2);
|
|
692
|
+
if (outPath) {
|
|
693
|
+
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
|
694
|
+
fs.writeFileSync(outPath, payload, 'utf8');
|
|
695
|
+
console.log(`Wrote ${corpus.length} bootstrap cases to ${outPath}`);
|
|
696
|
+
}
|
|
697
|
+
else {
|
|
698
|
+
console.log(payload);
|
|
699
|
+
}
|
|
700
|
+
return;
|
|
701
|
+
}
|
|
702
|
+
if (!corpusPath) {
|
|
703
|
+
console.error('Usage: hippo eval <corpus.json> OR hippo eval --bootstrap [--out <path>]');
|
|
704
|
+
process.exit(1);
|
|
705
|
+
}
|
|
706
|
+
if (!fs.existsSync(corpusPath)) {
|
|
707
|
+
console.error(`Corpus file not found: ${corpusPath}`);
|
|
708
|
+
process.exit(1);
|
|
709
|
+
}
|
|
710
|
+
let cases;
|
|
711
|
+
try {
|
|
712
|
+
const raw = JSON.parse(fs.readFileSync(corpusPath, 'utf8'));
|
|
713
|
+
cases = Array.isArray(raw) ? raw : raw.cases;
|
|
714
|
+
if (!Array.isArray(cases))
|
|
715
|
+
throw new Error('Corpus JSON must be an array or { cases: [...] }');
|
|
716
|
+
}
|
|
717
|
+
catch (err) {
|
|
718
|
+
console.error(`Failed to read corpus: ${err instanceof Error ? err.message : err}`);
|
|
719
|
+
process.exit(1);
|
|
720
|
+
}
|
|
721
|
+
const globalRoot = getGlobalRoot();
|
|
722
|
+
const localBump = flags['equal-sources']
|
|
723
|
+
? 1.0
|
|
724
|
+
: flags['local-bump'] !== undefined
|
|
725
|
+
? parseFloat(String(flags['local-bump']))
|
|
726
|
+
: loadConfig(hippoRoot).search.localBump;
|
|
727
|
+
const summary = await runEval(cases, entries, {
|
|
728
|
+
hippoRoot,
|
|
729
|
+
globalRoot,
|
|
730
|
+
mmr: !noMmr,
|
|
731
|
+
mmrLambda,
|
|
732
|
+
embeddingWeight,
|
|
733
|
+
localBump,
|
|
734
|
+
});
|
|
735
|
+
if (asJson) {
|
|
736
|
+
console.log(JSON.stringify(summary, null, 2));
|
|
737
|
+
}
|
|
738
|
+
else {
|
|
739
|
+
console.log(`Eval: ${summary.cases.length} cases, ${summary.durationMs}ms`);
|
|
740
|
+
console.log();
|
|
741
|
+
console.log(`MRR: ${fmt(summary.meanMrr, 4)}`);
|
|
742
|
+
console.log(`Recall@5: ${fmt(summary.meanRecallAt5, 4)}`);
|
|
743
|
+
console.log(`Recall@10: ${fmt(summary.meanRecallAt10, 4)}`);
|
|
744
|
+
console.log(`NDCG@10: ${fmt(summary.meanNdcgAt10, 4)}`);
|
|
745
|
+
if (showCases) {
|
|
746
|
+
console.log();
|
|
747
|
+
console.log('Case details:');
|
|
748
|
+
for (const c of summary.cases) {
|
|
749
|
+
const exp = c.case.expectedIds.length;
|
|
750
|
+
const expectedSet = new Set(c.case.expectedIds);
|
|
751
|
+
const hitTop10 = c.returnedIds.slice(0, 10).filter((id) => expectedSet.has(id));
|
|
752
|
+
const missed = c.case.expectedIds.filter((id) => !c.returnedIds.slice(0, 10).includes(id));
|
|
753
|
+
console.log();
|
|
754
|
+
console.log(`[${c.case.id}] R@10=${fmt(c.recallAt10, 2)} MRR=${fmt(c.mrr, 2)} expected=${exp} hit=${hitTop10.length}`);
|
|
755
|
+
console.log(` query: ${c.case.query}`);
|
|
756
|
+
console.log(` top 3: ${c.returnedIds.slice(0, 3).join(', ') || '(none)'}`);
|
|
757
|
+
if (missed.length > 0) {
|
|
758
|
+
const shown = missed.slice(0, 4);
|
|
759
|
+
const more = missed.length > shown.length ? ` +${missed.length - shown.length} more` : '';
|
|
760
|
+
console.log(` missed: ${shown.join(', ')}${more}`);
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
console.log();
|
|
765
|
+
const failing = summary.cases.filter((c) => c.mrr === 0);
|
|
766
|
+
if (failing.length > 0) {
|
|
767
|
+
console.log(`${failing.length} case(s) returned zero relevant results:`);
|
|
768
|
+
for (const f of failing.slice(0, 10)) {
|
|
769
|
+
console.log(` [${f.case.id}] "${f.case.query.slice(0, 60)}"`);
|
|
770
|
+
}
|
|
771
|
+
if (failing.length > 10)
|
|
772
|
+
console.log(` ...and ${failing.length - 10} more`);
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
if (minMrr !== null && summary.meanMrr < minMrr) {
|
|
776
|
+
console.error(`MRR ${fmt(summary.meanMrr, 4)} below threshold ${minMrr}`);
|
|
777
|
+
process.exit(1);
|
|
778
|
+
}
|
|
779
|
+
if (comparePath) {
|
|
780
|
+
if (!fs.existsSync(comparePath)) {
|
|
781
|
+
console.error(`Baseline file not found: ${comparePath}`);
|
|
782
|
+
process.exit(1);
|
|
783
|
+
}
|
|
784
|
+
let baseline;
|
|
785
|
+
try {
|
|
786
|
+
baseline = JSON.parse(fs.readFileSync(comparePath, 'utf8'));
|
|
787
|
+
}
|
|
788
|
+
catch (err) {
|
|
789
|
+
console.error(`Failed to parse baseline: ${err instanceof Error ? err.message : err}`);
|
|
790
|
+
process.exit(1);
|
|
791
|
+
}
|
|
792
|
+
const cmp = compareSummaries(baseline, summary);
|
|
793
|
+
if (asJson) {
|
|
794
|
+
// The main JSON output already emitted; append comparison to stderr so
|
|
795
|
+
// both can be captured independently.
|
|
796
|
+
console.error(JSON.stringify({ compare: cmp }, null, 2));
|
|
797
|
+
}
|
|
798
|
+
else {
|
|
799
|
+
console.log();
|
|
800
|
+
console.log('Compare vs baseline:');
|
|
801
|
+
const sign = (d) => (d >= 0 ? '+' : '') + fmt(d, 4);
|
|
802
|
+
console.log(` MRR: ${sign(cmp.aggregate.mrr)}`);
|
|
803
|
+
console.log(` Recall@5: ${sign(cmp.aggregate.recallAt5)}`);
|
|
804
|
+
console.log(` Recall@10: ${sign(cmp.aggregate.recallAt10)}`);
|
|
805
|
+
console.log(` NDCG@10: ${sign(cmp.aggregate.ndcgAt10)}`);
|
|
806
|
+
console.log();
|
|
807
|
+
console.log(` improved: ${cmp.improved.length} regressed: ${cmp.regressed.length} unchanged: ${cmp.unchanged}`);
|
|
808
|
+
if (cmp.onlyInBaseline.length > 0)
|
|
809
|
+
console.log(` only in baseline: ${cmp.onlyInBaseline.length}`);
|
|
810
|
+
if (cmp.onlyInCurrent.length > 0)
|
|
811
|
+
console.log(` only in current: ${cmp.onlyInCurrent.length}`);
|
|
812
|
+
const showPerCase = cmp.improved.length + cmp.regressed.length > 0;
|
|
813
|
+
if (showPerCase) {
|
|
814
|
+
for (const d of cmp.improved.slice(0, 5)) {
|
|
815
|
+
const delta = d.ndcgAfter - d.ndcgBefore;
|
|
816
|
+
console.log(` + [${d.id}] NDCG ${fmt(d.ndcgBefore, 2)} -> ${fmt(d.ndcgAfter, 2)} (+${fmt(delta, 3)})`);
|
|
817
|
+
}
|
|
818
|
+
for (const d of cmp.regressed.slice(0, 5)) {
|
|
819
|
+
const delta = d.ndcgAfter - d.ndcgBefore;
|
|
820
|
+
console.log(` - [${d.id}] NDCG ${fmt(d.ndcgBefore, 2)} -> ${fmt(d.ndcgAfter, 2)} (${fmt(delta, 3)})`);
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
function cmdTrace(hippoRoot, id, flags) {
|
|
827
|
+
requireInit(hippoRoot);
|
|
828
|
+
const asJson = Boolean(flags['json']);
|
|
829
|
+
// Look in local store first, then global.
|
|
830
|
+
let entry = readEntry(hippoRoot, id);
|
|
831
|
+
let sourceLabel = 'local';
|
|
832
|
+
const globalRoot = getGlobalRoot();
|
|
833
|
+
if (!entry && isInitialized(globalRoot)) {
|
|
834
|
+
entry = readEntry(globalRoot, id);
|
|
835
|
+
sourceLabel = 'global';
|
|
836
|
+
}
|
|
837
|
+
if (!entry) {
|
|
838
|
+
console.error(`Memory not found: ${id}`);
|
|
839
|
+
process.exit(1);
|
|
840
|
+
}
|
|
841
|
+
const now = new Date();
|
|
842
|
+
const strength = calculateStrength(entry, now);
|
|
843
|
+
const halfLife = deriveHalfLife(7, entry);
|
|
844
|
+
const rewardFactor = calculateRewardFactor(entry);
|
|
845
|
+
const effHalfLife = halfLife * rewardFactor;
|
|
846
|
+
const createdMs = new Date(entry.created).getTime();
|
|
847
|
+
const ageDays = (now.getTime() - createdMs) / 86_400_000;
|
|
848
|
+
const lastMs = new Date(entry.last_retrieved).getTime();
|
|
849
|
+
const sinceLast = (now.getTime() - lastMs) / 86_400_000;
|
|
850
|
+
const conf = resolveConfidence(entry, now);
|
|
851
|
+
// Projected strength: same decay curve, just push `now` out.
|
|
852
|
+
const projectedAt = (days) => calculateStrength(entry, new Date(now.getTime() + days * 86_400_000));
|
|
853
|
+
// Parents (consolidation lineage) — schema v9 field.
|
|
854
|
+
const parents = Array.isArray(entry.parents) ? entry.parents : [];
|
|
855
|
+
const parentPreviews = parents.map((pid) => {
|
|
856
|
+
const p = readEntry(hippoRoot, pid) ?? (isInitialized(globalRoot) ? readEntry(globalRoot, pid) : null);
|
|
857
|
+
return { id: pid, content: p ? p.content.replace(/\s+/g, ' ').slice(0, 70) : '(not found)' };
|
|
858
|
+
});
|
|
859
|
+
// Open conflicts involving this memory.
|
|
860
|
+
const allConflicts = [
|
|
861
|
+
...listMemoryConflicts(hippoRoot, 'open'),
|
|
862
|
+
...(isInitialized(globalRoot) ? listMemoryConflicts(globalRoot, 'open') : []),
|
|
863
|
+
];
|
|
864
|
+
const myConflicts = allConflicts.filter((c) => c.memory_a_id === id || c.memory_b_id === id);
|
|
865
|
+
if (asJson) {
|
|
866
|
+
console.log(JSON.stringify({
|
|
867
|
+
id: entry.id,
|
|
868
|
+
source: sourceLabel,
|
|
869
|
+
layer: entry.layer,
|
|
870
|
+
confidence: conf,
|
|
871
|
+
pinned: entry.pinned,
|
|
872
|
+
starred: entry.starred,
|
|
873
|
+
tags: entry.tags,
|
|
874
|
+
content: entry.content,
|
|
875
|
+
created: entry.created,
|
|
876
|
+
age_days: ageDays,
|
|
877
|
+
last_retrieved: entry.last_retrieved,
|
|
878
|
+
days_since_last_retrieval: sinceLast,
|
|
879
|
+
retrieval_count: entry.retrieval_count,
|
|
880
|
+
strength_now: strength,
|
|
881
|
+
half_life_days: halfLife,
|
|
882
|
+
reward_factor: rewardFactor,
|
|
883
|
+
effective_half_life_days: effHalfLife,
|
|
884
|
+
projected_strength_30d: projectedAt(30),
|
|
885
|
+
projected_strength_90d: projectedAt(90),
|
|
886
|
+
outcome_positive: entry.outcome_positive,
|
|
887
|
+
outcome_negative: entry.outcome_negative,
|
|
888
|
+
parents: parentPreviews,
|
|
889
|
+
open_conflicts: myConflicts,
|
|
890
|
+
}, null, 2));
|
|
891
|
+
return;
|
|
892
|
+
}
|
|
893
|
+
console.log(`Memory: ${entry.id} [${sourceLabel}]`);
|
|
894
|
+
console.log('='.repeat(50));
|
|
895
|
+
console.log(`Content: ${entry.content.replace(/\s+/g, ' ').slice(0, 160)}${entry.content.length > 160 ? '...' : ''}`);
|
|
896
|
+
console.log(`Layer: ${entry.layer.padEnd(10)} Confidence: ${conf.padEnd(10)} Pinned: ${entry.pinned ? 'yes' : 'no'}${entry.starred ? ' Starred: yes' : ''}`);
|
|
897
|
+
console.log(`Tags: ${entry.tags.join(', ') || '(none)'}`);
|
|
898
|
+
console.log(`Created: ${entry.created} (${fmt(ageDays, 1)} days ago)`);
|
|
899
|
+
console.log();
|
|
900
|
+
console.log(`Strength trajectory:`);
|
|
901
|
+
console.log(` now: ${fmt(strength, 3)}`);
|
|
902
|
+
console.log(` in 30 days: ${fmt(projectedAt(30), 3)}`);
|
|
903
|
+
console.log(` in 90 days: ${fmt(projectedAt(90), 3)}`);
|
|
904
|
+
console.log(` half-life: ${fmt(halfLife, 1)}d (base) x ${fmt(rewardFactor, 2)} reward = ${fmt(effHalfLife, 1)}d effective`);
|
|
905
|
+
console.log();
|
|
906
|
+
console.log(`Retrieval:`);
|
|
907
|
+
console.log(` count: ${entry.retrieval_count}`);
|
|
908
|
+
console.log(` last: ${entry.last_retrieved} (${fmt(sinceLast, 1)} days ago)`);
|
|
909
|
+
console.log();
|
|
910
|
+
console.log(`Outcomes: +${entry.outcome_positive} / -${entry.outcome_negative}`);
|
|
911
|
+
if (parentPreviews.length > 0) {
|
|
912
|
+
console.log();
|
|
913
|
+
console.log(`Parents (consolidation lineage):`);
|
|
914
|
+
for (const p of parentPreviews) {
|
|
915
|
+
console.log(` - ${p.id}: ${p.content}`);
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
if (myConflicts.length > 0) {
|
|
919
|
+
console.log();
|
|
920
|
+
console.log(`Open conflicts: ${myConflicts.length}`);
|
|
921
|
+
for (const c of myConflicts) {
|
|
922
|
+
const other = c.memory_a_id === id ? c.memory_b_id : c.memory_a_id;
|
|
923
|
+
console.log(` - with ${other}: ${c.reason} (score=${fmt(c.score, 2)})`);
|
|
924
|
+
}
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
async function cmdRefine(hippoRoot, flags) {
|
|
928
|
+
requireInit(hippoRoot);
|
|
929
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
930
|
+
if (!apiKey) {
|
|
931
|
+
console.error('hippo refine needs ANTHROPIC_API_KEY in the environment.');
|
|
932
|
+
process.exit(1);
|
|
933
|
+
}
|
|
934
|
+
const dryRun = Boolean(flags['dry-run']);
|
|
935
|
+
const all = Boolean(flags['all']);
|
|
936
|
+
const limit = flags['limit'] !== undefined ? parseInt(String(flags['limit']), 10) : undefined;
|
|
937
|
+
const model = flags['model'] ? String(flags['model']) : undefined;
|
|
938
|
+
const asJson = Boolean(flags['json']);
|
|
939
|
+
const result = await refineStore(hippoRoot, { apiKey, model, limit, dryRun, all });
|
|
940
|
+
if (asJson) {
|
|
941
|
+
console.log(JSON.stringify(result, null, 2));
|
|
942
|
+
return;
|
|
943
|
+
}
|
|
944
|
+
console.log(`Scanned: ${result.scanned} consolidated semantic memories`);
|
|
945
|
+
console.log(`Refined: ${result.refined}${dryRun ? ' (dry-run — no writes)' : ''}`);
|
|
946
|
+
console.log(`Skipped: ${result.skipped}`);
|
|
947
|
+
console.log(`Failed: ${result.failed}`);
|
|
948
|
+
if (result.failed > 0) {
|
|
949
|
+
console.log('\nFailures:');
|
|
950
|
+
for (const d of result.details.filter((x) => x.status === 'failed').slice(0, 5)) {
|
|
951
|
+
console.log(` ${d.id}: ${d.reason}`);
|
|
952
|
+
}
|
|
953
|
+
}
|
|
954
|
+
}
|
|
527
955
|
/**
|
|
528
956
|
* Scan for Claude Code MEMORY.md files and import new entries into hippo.
|
|
529
957
|
* Looks in ~/.claude/projects/<project>/memory/ for .md files with YAML frontmatter.
|
|
@@ -2638,8 +3066,41 @@ Commands:
|
|
|
2638
3066
|
--global Store in global store ($HIPPO_HOME or ~/.hippo/)
|
|
2639
3067
|
recall <query> Search and retrieve memories (local + global)
|
|
2640
3068
|
--budget <n> Token budget (default: 4000)
|
|
3069
|
+
--min-results <n> Minimum results regardless of budget (default: 1)
|
|
2641
3070
|
--json Output as JSON
|
|
2642
3071
|
--why Show match reasons and source annotations
|
|
3072
|
+
--no-mmr Disable MMR diversity re-ranking
|
|
3073
|
+
--mmr-lambda <f> MMR balance 0..1 (default: 0.7, 1.0 = pure relevance)
|
|
3074
|
+
explain <query> Show full score breakdown for each retrieved memory
|
|
3075
|
+
--budget <n> Token budget (default: 4000)
|
|
3076
|
+
--limit <n> Cap the number of results displayed
|
|
3077
|
+
--json Output as JSON
|
|
3078
|
+
--physics | --classic Force search mode (default: from config)
|
|
3079
|
+
--no-mmr Disable MMR diversity re-ranking
|
|
3080
|
+
--mmr-lambda <f> MMR balance 0..1 (default: 0.7, 1.0 = pure relevance)
|
|
3081
|
+
trace <id> Memory dossier: content, decay trajectory, retrievals,
|
|
3082
|
+
outcomes, consolidation parents, open conflicts
|
|
3083
|
+
--json Output as JSON
|
|
3084
|
+
refine Rewrite consolidated semantic memories with Claude
|
|
3085
|
+
--limit <n> Cap the number of memories processed this run
|
|
3086
|
+
--all Ignore \`llm-refined\` tag and re-refine everything
|
|
3087
|
+
--dry-run Call the API but don't write results back
|
|
3088
|
+
--model <id> Override the default model (claude-sonnet-4-6)
|
|
3089
|
+
--json Output summary as JSON
|
|
3090
|
+
(requires ANTHROPIC_API_KEY in env)
|
|
3091
|
+
eval [<corpus.json>] Measure recall quality against a test corpus
|
|
3092
|
+
--bootstrap Generate a synthetic corpus from current memories
|
|
3093
|
+
--out <path> With --bootstrap, write to file instead of stdout
|
|
3094
|
+
--max-cases <n> With --bootstrap, cap case count (default: 50)
|
|
3095
|
+
--show-cases Print per-case details (query, R@10, missed, top 3)
|
|
3096
|
+
--compare <path> JSON from a prior \`eval --json\` run; print deltas
|
|
3097
|
+
--no-mmr Disable MMR for this eval run
|
|
3098
|
+
--mmr-lambda <f> Override MMR lambda for this run
|
|
3099
|
+
--embedding-weight <f> Override cosine weight (default: 0.6)
|
|
3100
|
+
--local-bump <f> Local-over-global priority multiplier (default: 1.2)
|
|
3101
|
+
--equal-sources Shortcut for --local-bump 1.0
|
|
3102
|
+
--min-mrr <f> Exit non-zero if mean MRR falls below this
|
|
3103
|
+
--json Output full summary as JSON
|
|
2643
3104
|
context Smart context injection for AI agents
|
|
2644
3105
|
--auto Auto-detect task from git state
|
|
2645
3106
|
--budget <n> Token budget (default: 1500)
|
|
@@ -2841,6 +3302,32 @@ async function main() {
|
|
|
2841
3302
|
await cmdRecall(hippoRoot, query, flags);
|
|
2842
3303
|
break;
|
|
2843
3304
|
}
|
|
3305
|
+
case 'explain': {
|
|
3306
|
+
const query = args.join(' ').trim();
|
|
3307
|
+
if (!query) {
|
|
3308
|
+
console.error('Please provide a search query.');
|
|
3309
|
+
process.exit(1);
|
|
3310
|
+
}
|
|
3311
|
+
await cmdExplain(hippoRoot, query, flags);
|
|
3312
|
+
break;
|
|
3313
|
+
}
|
|
3314
|
+
case 'eval': {
|
|
3315
|
+
const corpusPath = args[0] ? String(args[0]) : null;
|
|
3316
|
+
await cmdEval(hippoRoot, corpusPath, flags);
|
|
3317
|
+
break;
|
|
3318
|
+
}
|
|
3319
|
+
case 'trace': {
|
|
3320
|
+
const id = args[0] ? String(args[0]) : null;
|
|
3321
|
+
if (!id) {
|
|
3322
|
+
console.error('Usage: hippo trace <memory-id>');
|
|
3323
|
+
process.exit(1);
|
|
3324
|
+
}
|
|
3325
|
+
cmdTrace(hippoRoot, id, flags);
|
|
3326
|
+
break;
|
|
3327
|
+
}
|
|
3328
|
+
case 'refine':
|
|
3329
|
+
await cmdRefine(hippoRoot, flags);
|
|
3330
|
+
break;
|
|
2844
3331
|
case 'sleep':
|
|
2845
3332
|
cmdSleep(hippoRoot, flags);
|
|
2846
3333
|
break;
|