npm - hippo-memory - Versions diffs - 0.27.0 → 0.28.0 - Mend

hippo-memory 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +8 -0
package/dist/cli.js +116 -4
package/dist/cli.js.map +1 -1
package/dist/config.d.ts +5 -0
package/dist/config.d.ts.map +1 -1
package/dist/config.js +4 -0
package/dist/config.js.map +1 -1
package/dist/eval.d.ts +35 -0
package/dist/eval.d.ts.map +1 -1
package/dist/eval.js +68 -8
package/dist/eval.js.map +1 -1
package/dist/refine-llm.d.ts +53 -0
package/dist/refine-llm.d.ts.map +1 -0
package/dist/refine-llm.js +147 -0
package/dist/refine-llm.js.map +1 -0
package/dist/search.d.ts +26 -0
package/dist/search.d.ts.map +1 -1
package/dist/search.js +70 -26
package/dist/search.js.map +1 -1
package/dist/shared.d.ts +4 -0
package/dist/shared.d.ts.map +1 -1
package/dist/shared.js +19 -18
package/dist/shared.js.map +1 -1
package/extensions/openclaw-plugin/openclaw.plugin.json +1 -1
package/extensions/openclaw-plugin/package.json +1 -1
package/openclaw.plugin.json +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -60,6 +60,14 @@ hippo recall "data pipeline issues" --budget 2000
 ---
+### What's new in v0.28.0
+- **Budget saturation fix.** Large memories (14k+ chars) no longer starve retrieval. New `minResults` option guarantees at least N results regardless of token budget. `hippo recall <q> --min-results 5`.
+- **LongMemEval parity restored.** The 35pp R@10 gap vs v0.11 was a benchmark methodology issue (budget-limited vs unlimited comparison). Corrected: v0.28 R@3 67.0% (+0.4pp), answer_in_content@5 49.6% (+3.0pp), R@10 81.0% (-1.6pp). Top-5 results now more often contain the actual answer.
+- **MMR performance.** Re-ranking capped at top-100 candidates, dropping per-query time from ~50s to ~9s. `preparedCorpus` option skips per-query tokenization for batch callers.
+- **RRF scoring option.** `hybridSearch` accepts `scoring: 'rrf'` for reciprocal rank fusion as an alternative to score blending.
+- **`hippo refine` command.** LLM-powered semantic rewrite of memories for improved recall quality.
 ### What's new in v0.27.0
 - **Recall is now debuggable.** `hippo explain <query>` prints the full score breakdown for each retrieved memory: BM25 + cosine, every multiplier (strength, recency, decision, path, source-bump, outcome), age, and final composite. Read-only so it's safe to run as a diagnostic.

package/dist/cli.js CHANGED Viewed

@@ -47,7 +47,8 @@ import { DAILY_TASK_NAME, buildDailyRunnerCommand, listRegisteredWorkspaces, reg
 import { importChatGPT, importClaude, importCursor, importGenericFile, importMarkdown, } from './importers.js';
 import { cmdCapture } from './capture.js';
 import { auditMemories } from './audit.js';
-import { runEval, bootstrapCorpus } from './eval.js';
+import { runEval, bootstrapCorpus, compareSummaries } from './eval.js';
+import { refineStore } from './refine-llm.js';
 import { wmPush, wmRead, wmClear, wmFlush } from './working-memory.js';
 // ---------------------------------------------------------------------------
 // Helpers
@@ -445,23 +446,32 @@ async function cmdRecall(hippoRoot, query, flags) {
         ? parseFloat(String(flags['mmr-lambda']))
         : config.mmr.lambda;
     const mmrEnabled = !noMmr && config.mmr.enabled;
+    const localBump = flags['equal-sources']
+        ? 1.0
+        : flags['local-bump'] !== undefined
+            ? parseFloat(String(flags['local-bump']))
+            : config.search.localBump;
+    const minResults = flags['min-results'] !== undefined
+        ? parseInt(String(flags['min-results']), 10)
+        : undefined;
     let results;
     if (usePhysics && !hasGlobal) {
         results = await physicsSearch(query, localEntries, {
             budget,
             hippoRoot,
             physicsConfig: config.physics,
+            minResults,
         });
     }
     else if (hasGlobal) {
         // Use searchBothHybrid for merged results with embedding support
         results = await searchBothHybrid(query, hippoRoot, globalRoot, {
-            budget, mmr: mmrEnabled, mmrLambda,
+            budget, mmr: mmrEnabled, mmrLambda, localBump, minResults,
         });
     }
     else {
         results = await hybridSearch(query, localEntries, {
-            budget, hippoRoot, mmr: mmrEnabled, mmrLambda,
+            budget, hippoRoot, mmr: mmrEnabled, mmrLambda, minResults,
         });
     }
     if (limit < results.length) {
@@ -553,6 +563,11 @@ async function cmdExplain(hippoRoot, query, flags) {
         ? parseFloat(String(flags['mmr-lambda']))
         : config.mmr.lambda;
     const mmrEnabled = !noMmr && config.mmr.enabled;
+    const localBump = flags['equal-sources']
+        ? 1.0
+        : flags['local-bump'] !== undefined
+            ? parseFloat(String(flags['local-bump']))
+            : config.search.localBump;
     let results;
     let modeUsed;
     if (usePhysics && !hasGlobal) {
@@ -566,7 +581,7 @@ async function cmdExplain(hippoRoot, query, flags) {
     }
     else if (hasGlobal) {
         results = await searchBothHybrid(query, hippoRoot, globalRoot, {
-            budget, explain: true, mmr: mmrEnabled, mmrLambda,
+            budget, explain: true, mmr: mmrEnabled, mmrLambda, localBump,
         });
         modeUsed = 'searchBothHybrid';
     }
@@ -663,6 +678,7 @@ async function cmdEval(hippoRoot, corpusPath, flags) {
     const asJson = Boolean(flags['json']);
     const minMrr = flags['min-mrr'] !== undefined ? parseFloat(String(flags['min-mrr'])) : null;
     const showCases = Boolean(flags['show-cases']);
+    const comparePath = flags['compare'] ? String(flags['compare']) : null;
     const noMmr = Boolean(flags['no-mmr']);
     const mmrLambda = flags['mmr-lambda'] !== undefined ? parseFloat(String(flags['mmr-lambda'])) : undefined;
     const embeddingWeight = flags['embedding-weight'] !== undefined ? parseFloat(String(flags['embedding-weight'])) : undefined;
@@ -702,11 +718,19 @@ async function cmdEval(hippoRoot, corpusPath, flags) {
         console.error(`Failed to read corpus: ${err instanceof Error ? err.message : err}`);
         process.exit(1);
     }
+    const globalRoot = getGlobalRoot();
+    const localBump = flags['equal-sources']
+        ? 1.0
+        : flags['local-bump'] !== undefined
+            ? parseFloat(String(flags['local-bump']))
+            : loadConfig(hippoRoot).search.localBump;
     const summary = await runEval(cases, entries, {
         hippoRoot,
+        globalRoot,
         mmr: !noMmr,
         mmrLambda,
         embeddingWeight,
+        localBump,
     });
     if (asJson) {
         console.log(JSON.stringify(summary, null, 2));
@@ -752,6 +776,52 @@ async function cmdEval(hippoRoot, corpusPath, flags) {
         console.error(`MRR ${fmt(summary.meanMrr, 4)} below threshold ${minMrr}`);
         process.exit(1);
     }
+    if (comparePath) {
+        if (!fs.existsSync(comparePath)) {
+            console.error(`Baseline file not found: ${comparePath}`);
+            process.exit(1);
+        }
+        let baseline;
+        try {
+            baseline = JSON.parse(fs.readFileSync(comparePath, 'utf8'));
+        }
+        catch (err) {
+            console.error(`Failed to parse baseline: ${err instanceof Error ? err.message : err}`);
+            process.exit(1);
+        }
+        const cmp = compareSummaries(baseline, summary);
+        if (asJson) {
+            // The main JSON output already emitted; append comparison to stderr so
+            // both can be captured independently.
+            console.error(JSON.stringify({ compare: cmp }, null, 2));
+        }
+        else {
+            console.log();
+            console.log('Compare vs baseline:');
+            const sign = (d) => (d >= 0 ? '+' : '') + fmt(d, 4);
+            console.log(`  MRR:        ${sign(cmp.aggregate.mrr)}`);
+            console.log(`  Recall@5:   ${sign(cmp.aggregate.recallAt5)}`);
+            console.log(`  Recall@10:  ${sign(cmp.aggregate.recallAt10)}`);
+            console.log(`  NDCG@10:    ${sign(cmp.aggregate.ndcgAt10)}`);
+            console.log();
+            console.log(`  improved: ${cmp.improved.length}   regressed: ${cmp.regressed.length}   unchanged: ${cmp.unchanged}`);
+            if (cmp.onlyInBaseline.length > 0)
+                console.log(`  only in baseline: ${cmp.onlyInBaseline.length}`);
+            if (cmp.onlyInCurrent.length > 0)
+                console.log(`  only in current:  ${cmp.onlyInCurrent.length}`);
+            const showPerCase = cmp.improved.length + cmp.regressed.length > 0;
+            if (showPerCase) {
+                for (const d of cmp.improved.slice(0, 5)) {
+                    const delta = d.ndcgAfter - d.ndcgBefore;
+                    console.log(`  + [${d.id}] NDCG ${fmt(d.ndcgBefore, 2)} -> ${fmt(d.ndcgAfter, 2)} (+${fmt(delta, 3)})`);
+                }
+                for (const d of cmp.regressed.slice(0, 5)) {
+                    const delta = d.ndcgAfter - d.ndcgBefore;
+                    console.log(`  - [${d.id}] NDCG ${fmt(d.ndcgBefore, 2)} -> ${fmt(d.ndcgAfter, 2)} (${fmt(delta, 3)})`);
+                }
+            }
+        }
+    }
 }
 function cmdTrace(hippoRoot, id, flags) {
     requireInit(hippoRoot);
@@ -854,6 +924,34 @@ function cmdTrace(hippoRoot, id, flags) {
         }
     }
 }
+async function cmdRefine(hippoRoot, flags) {
+    requireInit(hippoRoot);
+    const apiKey = process.env.ANTHROPIC_API_KEY;
+    if (!apiKey) {
+        console.error('hippo refine needs ANTHROPIC_API_KEY in the environment.');
+        process.exit(1);
+    }
+    const dryRun = Boolean(flags['dry-run']);
+    const all = Boolean(flags['all']);
+    const limit = flags['limit'] !== undefined ? parseInt(String(flags['limit']), 10) : undefined;
+    const model = flags['model'] ? String(flags['model']) : undefined;
+    const asJson = Boolean(flags['json']);
+    const result = await refineStore(hippoRoot, { apiKey, model, limit, dryRun, all });
+    if (asJson) {
+        console.log(JSON.stringify(result, null, 2));
+        return;
+    }
+    console.log(`Scanned:  ${result.scanned} consolidated semantic memories`);
+    console.log(`Refined:  ${result.refined}${dryRun ? '  (dry-run — no writes)' : ''}`);
+    console.log(`Skipped:  ${result.skipped}`);
+    console.log(`Failed:   ${result.failed}`);
+    if (result.failed > 0) {
+        console.log('\nFailures:');
+        for (const d of result.details.filter((x) => x.status === 'failed').slice(0, 5)) {
+            console.log(`  ${d.id}: ${d.reason}`);
+        }
+    }
+}
 /**
  * Scan for Claude Code MEMORY.md files and import new entries into hippo.
  * Looks in ~/.claude/projects/<project>/memory/ for .md files with YAML frontmatter.
@@ -2968,6 +3066,7 @@ Commands:
     --global               Store in global store ($HIPPO_HOME or ~/.hippo/)
   recall <query>           Search and retrieve memories (local + global)
     --budget <n>           Token budget (default: 4000)
+    --min-results <n>      Minimum results regardless of budget (default: 1)
     --json                 Output as JSON
     --why                  Show match reasons and source annotations
     --no-mmr               Disable MMR diversity re-ranking
@@ -2982,14 +3081,24 @@ Commands:
   trace <id>               Memory dossier: content, decay trajectory, retrievals,
                            outcomes, consolidation parents, open conflicts
     --json                 Output as JSON
+  refine                   Rewrite consolidated semantic memories with Claude
+    --limit <n>            Cap the number of memories processed this run
+    --all                  Ignore \`llm-refined\` tag and re-refine everything
+    --dry-run              Call the API but don't write results back
+    --model <id>           Override the default model (claude-sonnet-4-6)
+    --json                 Output summary as JSON
+    (requires ANTHROPIC_API_KEY in env)
   eval [<corpus.json>]     Measure recall quality against a test corpus
     --bootstrap            Generate a synthetic corpus from current memories
     --out <path>           With --bootstrap, write to file instead of stdout
     --max-cases <n>        With --bootstrap, cap case count (default: 50)
     --show-cases           Print per-case details (query, R@10, missed, top 3)
+    --compare <path>       JSON from a prior \`eval --json\` run; print deltas
     --no-mmr               Disable MMR for this eval run
     --mmr-lambda <f>       Override MMR lambda for this run
     --embedding-weight <f> Override cosine weight (default: 0.6)
+    --local-bump <f>       Local-over-global priority multiplier (default: 1.2)
+    --equal-sources        Shortcut for --local-bump 1.0
     --min-mrr <f>          Exit non-zero if mean MRR falls below this
     --json                 Output full summary as JSON
   context                  Smart context injection for AI agents
@@ -3216,6 +3325,9 @@ async function main() {
             cmdTrace(hippoRoot, id, flags);
             break;
         }
+        case 'refine':
+            await cmdRefine(hippoRoot, flags);
+            break;
         case 'sleep':
             cmdSleep(hippoRoot, flags);
             break;