hippo-memory 0.32.0 → 0.34.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/README.md +16 -0
  2. package/dist/ambient.d.ts +26 -0
  3. package/dist/ambient.d.ts.map +1 -0
  4. package/dist/ambient.js +147 -0
  5. package/dist/ambient.js.map +1 -0
  6. package/dist/capture.js +4 -0
  7. package/dist/capture.js.map +1 -1
  8. package/dist/cli.js +421 -29
  9. package/dist/cli.js.map +1 -1
  10. package/dist/config.d.ts +17 -0
  11. package/dist/config.d.ts.map +1 -1
  12. package/dist/config.js +21 -0
  13. package/dist/config.js.map +1 -1
  14. package/dist/consolidate.d.ts +5 -1
  15. package/dist/consolidate.d.ts.map +1 -1
  16. package/dist/consolidate.js +64 -10
  17. package/dist/consolidate.js.map +1 -1
  18. package/dist/dag.d.ts +20 -0
  19. package/dist/dag.d.ts.map +1 -0
  20. package/dist/dag.js +104 -0
  21. package/dist/dag.js.map +1 -0
  22. package/dist/db.d.ts.map +1 -1
  23. package/dist/db.js +132 -1
  24. package/dist/db.js.map +1 -1
  25. package/dist/eval-suite.d.ts +82 -0
  26. package/dist/eval-suite.d.ts.map +1 -0
  27. package/dist/eval-suite.js +289 -0
  28. package/dist/eval-suite.js.map +1 -0
  29. package/dist/extract.d.ts +14 -0
  30. package/dist/extract.d.ts.map +1 -0
  31. package/dist/extract.js +87 -0
  32. package/dist/extract.js.map +1 -0
  33. package/dist/importers.d.ts.map +1 -1
  34. package/dist/importers.js +5 -0
  35. package/dist/importers.js.map +1 -1
  36. package/dist/index.d.ts +5 -1
  37. package/dist/index.d.ts.map +1 -1
  38. package/dist/index.js +8 -1
  39. package/dist/index.js.map +1 -1
  40. package/dist/mcp/framing.d.ts +12 -0
  41. package/dist/mcp/framing.d.ts.map +1 -0
  42. package/dist/mcp/framing.js +45 -0
  43. package/dist/mcp/framing.js.map +1 -0
  44. package/dist/mcp/server.js +28 -33
  45. package/dist/mcp/server.js.map +1 -1
  46. package/dist/memory.d.ts +15 -0
  47. package/dist/memory.d.ts.map +1 -1
  48. package/dist/memory.js +7 -0
  49. package/dist/memory.js.map +1 -1
  50. package/dist/multihop.d.ts +11 -0
  51. package/dist/multihop.d.ts.map +1 -0
  52. package/dist/multihop.js +32 -0
  53. package/dist/multihop.js.map +1 -0
  54. package/dist/raw-archive.d.ts +16 -0
  55. package/dist/raw-archive.d.ts.map +1 -0
  56. package/dist/raw-archive.js +53 -0
  57. package/dist/raw-archive.js.map +1 -0
  58. package/dist/salience.d.ts +22 -0
  59. package/dist/salience.d.ts.map +1 -0
  60. package/dist/salience.js +74 -0
  61. package/dist/salience.js.map +1 -0
  62. package/dist/search.d.ts +18 -0
  63. package/dist/search.d.ts.map +1 -1
  64. package/dist/search.js +148 -7
  65. package/dist/search.js.map +1 -1
  66. package/dist/store.d.ts.map +1 -1
  67. package/dist/store.js +46 -8
  68. package/dist/store.js.map +1 -1
  69. package/extensions/openclaw-plugin/openclaw.plugin.json +46 -46
  70. package/extensions/openclaw-plugin/package.json +13 -13
  71. package/openclaw.plugin.json +45 -45
  72. package/package.json +74 -73
package/dist/cli.js CHANGED
@@ -51,8 +51,12 @@ import { importChatGPT, importClaude, importCursor, importGenericFile, importMar
51
51
  import { cmdCapture } from './capture.js';
52
52
  import { auditMemories } from './audit.js';
53
53
  import { runEval, bootstrapCorpus, compareSummaries } from './eval.js';
54
+ import { runFeatureEval, formatResult, resultToBaseline, detectRegressions } from './eval-suite.js';
54
55
  import { refineStore } from './refine-llm.js';
55
56
  import { wmPush, wmRead, wmClear, wmFlush } from './working-memory.js';
57
+ import { multihopSearch } from './multihop.js';
58
+ import { computeSalience } from './salience.js';
59
+ import { computeAmbientState, renderAmbientSummary } from './ambient.js';
56
60
  // ---------------------------------------------------------------------------
57
61
  // Helpers
58
62
  // ---------------------------------------------------------------------------
@@ -377,7 +381,7 @@ function setupDailySchedule(globalRoot) {
377
381
  }
378
382
  }
379
383
  }
380
- function cmdRemember(hippoRoot, text, flags) {
384
+ async function cmdRemember(hippoRoot, text, flags) {
381
385
  const useGlobal = Boolean(flags['global']);
382
386
  const targetRoot = useGlobal ? getGlobalRoot() : hippoRoot;
383
387
  if (useGlobal) {
@@ -400,6 +404,23 @@ function cmdRemember(hippoRoot, text, flags) {
400
404
  // Compute schema fit against existing memories
401
405
  const existing = loadAllEntries(targetRoot);
402
406
  const schemaFit = computeSchemaFit(text, rawTags, existing);
407
+ // A3 envelope flags
408
+ const kindFlagRaw = typeof flags['kind'] === 'string' ? flags['kind'] : undefined;
409
+ const kindFlag = kindFlagRaw === undefined ? undefined : kindFlagRaw.toLowerCase();
410
+ // CLI surface intentionally restricted: 'raw' is reserved for ingestion connectors
411
+ // (E1.x: Slack/Jira/Gmail) that route deletions through archiveRawMemory. Existing
412
+ // forget/consolidate/conflict-resolve paths abort on kind='raw' via the append-only
413
+ // trigger, so exposing --kind raw here would create unforgettable memories.
414
+ // 'archived' is an internal sentinel set only inside archiveRawMemory's transaction.
415
+ const userVisibleKinds = ['distilled', 'superseded'];
416
+ if (kindFlag !== undefined && !userVisibleKinds.includes(kindFlag)) {
417
+ console.error(`Invalid --kind: "${kindFlagRaw}". Must be one of: ${userVisibleKinds.join(', ')}`);
418
+ console.error(`(kind='raw' is reserved for ingestion connectors; kind='archived' is internal.)`);
419
+ process.exit(1);
420
+ }
421
+ const ownerFlag = typeof flags['owner'] === 'string' ? flags['owner'] : null;
422
+ const artifactRefFlag = typeof flags['artifact-ref'] === 'string' ? flags['artifact-ref'] : null;
423
+ const scopeForEnvelope = typeof flags['scope'] === 'string' ? flags['scope'].trim() || null : null;
403
424
  const entry = createMemory(text, {
404
425
  layer: Layer.Episodic,
405
426
  tags: rawTags,
@@ -407,6 +428,10 @@ function cmdRemember(hippoRoot, text, flags) {
407
428
  source: useGlobal ? 'cli-global' : 'cli',
408
429
  confidence,
409
430
  schema_fit: schemaFit,
431
+ kind: kindFlag,
432
+ scope: scopeForEnvelope,
433
+ owner: ownerFlag,
434
+ artifact_ref: artifactRefFlag,
410
435
  });
411
436
  // Auto-tag with path context
412
437
  const pathTags = extractPathTags(process.cwd());
@@ -422,6 +447,25 @@ function cmdRemember(hippoRoot, text, flags) {
422
447
  if (!entry.tags.includes(scopeTag))
423
448
  entry.tags.push(scopeTag);
424
449
  }
450
+ // Salience gate: decide if this memory is worth storing
451
+ const rememberConfig = loadConfig(targetRoot);
452
+ if (rememberConfig.salience.enabled && !Boolean(flags['pin']) && !Boolean(flags['force'])) {
453
+ const salienceResult = computeSalience(text, entry.tags, existing, {
454
+ recentWindow: rememberConfig.salience.recentWindow,
455
+ overlapThreshold: rememberConfig.salience.overlapThreshold,
456
+ minContentLength: rememberConfig.salience.minContentLength,
457
+ maxRepeatErrors: rememberConfig.salience.maxRepeatErrors,
458
+ });
459
+ if (salienceResult.decision === 'skip') {
460
+ console.log(`Skipped (salience: ${salienceResult.reason}, score ${salienceResult.score.toFixed(2)})`);
461
+ return;
462
+ }
463
+ if (salienceResult.decision === 'start_weak') {
464
+ entry.strength = salienceResult.score;
465
+ entry.half_life_days = Math.max(1, entry.half_life_days * 0.5);
466
+ console.log(`Weakened (salience: ${salienceResult.reason}, strength ${salienceResult.score.toFixed(2)})`);
467
+ }
468
+ }
425
469
  writeEntry(targetRoot, entry);
426
470
  updateStats(targetRoot, { remembered: 1 });
427
471
  const prefix = useGlobal ? '[global] ' : '';
@@ -437,6 +481,28 @@ function cmdRemember(hippoRoot, text, flags) {
437
481
  // Silently ignore embedding errors
438
482
  });
439
483
  }
484
+ const config = loadConfig(targetRoot);
485
+ const shouldExtract = flags['extract'] || config.extraction.enabled === true;
486
+ const apiKey = process.env.ANTHROPIC_API_KEY ?? '';
487
+ if (shouldExtract && apiKey) {
488
+ try {
489
+ const { extractFacts, storeExtractedFacts } = await import('./extract.js');
490
+ const facts = await extractFacts(entry.content, {
491
+ apiKey,
492
+ model: config.extraction.model,
493
+ });
494
+ if (facts.length > 0) {
495
+ storeExtractedFacts(targetRoot, entry, facts);
496
+ console.error(` extracted ${facts.length} fact(s)`);
497
+ }
498
+ }
499
+ catch {
500
+ // Extraction is best-effort — never block remember
501
+ }
502
+ }
503
+ else if (shouldExtract && !apiKey) {
504
+ console.error(' (extraction skipped: ANTHROPIC_API_KEY not set)');
505
+ }
440
506
  }
441
507
  function cmdSupersede(hippoRoot, oldId, newContent, flags) {
442
508
  requireInit(hippoRoot);
@@ -535,8 +601,19 @@ async function cmdRecall(hippoRoot, query, flags) {
535
601
  : undefined;
536
602
  const recallExplicitScope = flags['scope'] !== undefined ? String(flags['scope']).trim() : null;
537
603
  const recallActiveScope = recallExplicitScope || detectScope();
604
+ const useMultihop = flags['multihop'] === true || config.multihop.enabled;
538
605
  let results;
539
- if (usePhysics && !hasGlobal) {
606
+ if (useMultihop) {
607
+ const allEntries = [...localEntries, ...globalEntries];
608
+ results = multihopSearch(query, allEntries, {
609
+ budget,
610
+ hippoRoot,
611
+ minResults,
612
+ includeSuperseded,
613
+ asOf,
614
+ });
615
+ }
616
+ else if (usePhysics && !hasGlobal) {
540
617
  results = await physicsSearch(query, localEntries, {
541
618
  budget,
542
619
  hippoRoot,
@@ -556,6 +633,155 @@ async function cmdRecall(hippoRoot, query, flags) {
556
633
  budget, hippoRoot, mmr: mmrEnabled, mmrLambda, minResults, scope: recallActiveScope,
557
634
  });
558
635
  }
636
+ // ACC EVC-adaptive recall (RESEARCH.md §PFC.ACC). When the initial top-K is
637
+ // dominated by lexically similar but distinct memories (high pairwise token
638
+ // overlap = same topic, different facts = conflict), allocate extra retrieval
639
+ // effort: take a wider candidate pool, drop low-relevance distractors, and
640
+ // re-rank by recency to surface the most up-to-date item from the cluster.
641
+ // Default off; opt-in via --evc-adaptive.
642
+ if (flags['evc-adaptive'] && results.length >= 2) {
643
+ const sliceSize = Math.min(3, results.length);
644
+ const slice = results.slice(0, sliceSize);
645
+ let pairs = 0;
646
+ let overlapSum = 0;
647
+ for (let i = 0; i < slice.length; i++) {
648
+ for (let j = i + 1; j < slice.length; j++) {
649
+ overlapSum += textOverlap(slice[i].entry.content, slice[j].entry.content);
650
+ pairs++;
651
+ }
652
+ }
653
+ const avgOverlap = pairs > 0 ? overlapSum / pairs : 0;
654
+ if (avgOverlap >= 0.4) {
655
+ const poolSize = Math.min(results.length, Math.max(sliceSize * 3, 9));
656
+ const pool = results.slice(0, poolSize);
657
+ const tail = results.slice(poolSize);
658
+ const maxScore = pool.reduce((m, r) => Math.max(m, r.score), 0);
659
+ const scoreFloor = maxScore * 0.5;
660
+ const onTopic = [];
661
+ const offTopic = [];
662
+ for (const r of pool) {
663
+ (r.score >= scoreFloor ? onTopic : offTopic).push(r);
664
+ }
665
+ onTopic.sort((a, b) => {
666
+ const ta = new Date(a.entry.created).getTime();
667
+ const tb = new Date(b.entry.created).getTime();
668
+ return tb - ta;
669
+ });
670
+ results = [...onTopic, ...offTopic, ...tail];
671
+ }
672
+ }
673
+ // vlPFC interference filter (RESEARCH.md §PFC.vlPFC). Suppress task-irrelevant
674
+ // memories using *recorded* supersession + conflict structure only. Default
675
+ // off; opt-in via --filter-conflicts. Two effects, both surgical:
676
+ // 1. Drop entries with `superseded_by` set. (No-op under default recall,
677
+ // which already filters them; matters when `--include-superseded` was
678
+ // passed. The flag re-asserts the gate.)
679
+ // 2. Apply a 0.3x score multiplier to entries whose `conflicts_with` list
680
+ // references another entry that ALSO appears in the result set. The
681
+ // multiplier is conservative — we never delete on conflict, only
682
+ // down-rank, so the user can still surface the loser via --include-*.
683
+ // We never infer conflicts from lexical overlap. The v1 salience gate did
684
+ // that and destroyed LoCoMo (0.28 → 0.02). Recorded structure only.
685
+ if (flags['filter-conflicts']) {
686
+ results = results.filter((r) => !r.entry.superseded_by);
687
+ const presentIds = new Set(results.map((r) => r.entry.id));
688
+ results = results.map((r) => {
689
+ const peers = r.entry.conflicts_with || [];
690
+ const hasPeerInResults = peers.some((peerId) => presentIds.has(peerId));
691
+ return hasPeerInResults ? { ...r, score: r.score * 0.3 } : r;
692
+ });
693
+ results.sort((a, b) => b.score - a.score);
694
+ }
695
+ // vmPFC continuous value attribution (RESEARCH.md §PFC.vmPFC). Continuous
696
+ // value scoring per memory based on cumulative outcome attribution. Memories
697
+ // with positive cumulative outcomes are boosted; those with negative outcomes
698
+ // are demoted. The multiplier is a tanh-shaped function clamped to [0.7, 1.3]
699
+ // — wider than the always-on outcomeBoost (which clamps [0.85, 1.15]) so this
700
+ // flag has additional decisive effect when value attribution should drive
701
+ // ranking. Default off; opt-in via --value-aware. Reuses outcome_positive /
702
+ // outcome_negative columns; no schema change.
703
+ if (flags['value-aware'] && results.length >= 1) {
704
+ results = results.map((r) => {
705
+ const pos = r.entry.outcome_positive ?? 0;
706
+ const neg = r.entry.outcome_negative ?? 0;
707
+ if (pos === 0 && neg === 0)
708
+ return r;
709
+ const raw = 1 + 0.3 * Math.tanh(pos - neg);
710
+ const valueMult = Math.max(0.7, Math.min(1.3, raw));
711
+ return { ...r, score: r.score * valueMult };
712
+ });
713
+ results.sort((a, b) => b.score - a.score);
714
+ }
715
+ // OFC option-value re-ranker MVP (RESEARCH.md §PFC.OFC). Combine relevance,
716
+ // strength, and integration cost into a single utility score and re-sort.
717
+ // OFC neurons encode a "common currency" across heterogeneous attributes
718
+ // (Rangel et al., 2008); this is the simplest demonstration of that mechanism.
719
+ // Default off; opt-in via --rerank-utility.
720
+ //
721
+ // utility = score * (0.5 + 0.5 * strength) * (1 - cost_factor)
722
+ // cost_factor = min(0.3, tokens / 10000)
723
+ //
724
+ // The full OFC spec (option_valuation table in RESEARCH.md) decomposes value
725
+ // into reward / cost / risk / confidence components. The MVP collapses these
726
+ // to: score (relevance proxy), strength (persistence proxy), tokens (cost).
727
+ // CAVEAT: cost penalty is monotone with token count; LoCoMo's harder QAs
728
+ // often live in long evidence-rich memories. Default off — needs LoCoMo
729
+ // eval before enabling broadly.
730
+ if (flags['rerank-utility']) {
731
+ results = results
732
+ .map((r) => {
733
+ const strength = typeof r.entry.strength === 'number' ? r.entry.strength : 1.0;
734
+ const costFactor = Math.min(0.3, (r.tokens || 0) / 10000);
735
+ const utility = r.score * (0.5 + 0.5 * strength) * (1 - costFactor);
736
+ return { ...r, score: utility };
737
+ })
738
+ .sort((a, b) => b.score - a.score);
739
+ }
740
+ // dlPFC goal-conditioned recall MVP (RESEARCH.md §PFC.dlPFC). When --goal
741
+ // <tag> is set, memories whose `tags` array contains the goal tag receive
742
+ // a 1.5x score boost and results are re-sorted. The full dlPFC spec
743
+ // (goal_stack + retrieval_policy tables) maintains a hierarchical task
744
+ // stack with weighted retrieval policies; this MVP collapses that to a
745
+ // single-tag boost — the smallest demonstrable goal-conditioning signal.
746
+ // Default off; opt-in via --goal <tag>. No schema change.
747
+ const goalTag = flags['goal'] !== undefined ? String(flags['goal']).trim() : '';
748
+ if (goalTag) {
749
+ results = results
750
+ .map((r) => (r.entry.tags?.includes(goalTag) ? { ...r, score: r.score * 1.5 } : r))
751
+ .sort((a, b) => b.score - a.score);
752
+ }
753
+ // Pineal salience MVP (RESEARCH.md §"AI Pineal Gland — Intuition and Awareness
754
+ // Module"). When --salience-threshold T is set (T > 0), memories whose
755
+ // retrieval_count is below T are downweighted: score *= max(0.5, count / T).
756
+ // At or above T, no change. This makes salience emerge from USE — high-recall
757
+ // memories earn full ranking weight, low-recall memories are softly demoted.
758
+ //
759
+ // CRITICAL HISTORY: The v1 salience gate (60% lexical-overlap gate at memory
760
+ // CREATION time) destroyed LoCoMo recall (0.28 -> 0.02) by dropping same-
761
+ // session relevant turns at intake. See MEMORY.md "Hippo salience gate
762
+ // destroys benchmark recall". This v2 is the inverse:
763
+ // - retrieval-side only (no creation-time gating)
764
+ // - retrieval_count signal only (no lexical overlap, no novelty heuristic)
765
+ // - default OFF, opt-in via the flag (no behaviour change without it)
766
+ // - 0.5 floor so non-salient entries stay reachable, never dropped
767
+ // Reuses the existing retrieval_count column; no schema change.
768
+ const salienceThresholdRaw = flags['salience-threshold'];
769
+ if (salienceThresholdRaw !== undefined) {
770
+ const T = Number(salienceThresholdRaw);
771
+ if (!Number.isFinite(T) || T <= 0) {
772
+ console.error(`Invalid --salience-threshold: "${salienceThresholdRaw}". Must be a positive number.`);
773
+ process.exit(1);
774
+ }
775
+ results = results
776
+ .map((r) => {
777
+ const count = r.entry.retrieval_count ?? 0;
778
+ if (count >= T)
779
+ return r;
780
+ const mult = Math.max(0.5, count / T);
781
+ return { ...r, score: r.score * mult };
782
+ })
783
+ .sort((a, b) => b.score - a.score);
784
+ }
559
785
  // --outcome filter: drop trace entries whose trace_outcome !== target.
560
786
  // Non-trace entries pass through unaffected (traces are the only layer with
561
787
  // a meaningful outcome; filtering non-traces by outcome would be incoherent).
@@ -631,6 +857,9 @@ async function cmdRecall(hippoRoot, query, flags) {
631
857
  base.reason = explanation.reason;
632
858
  base.bm25 = r.bm25;
633
859
  base.cosine = r.cosine;
860
+ if (explanation.envelope) {
861
+ base.envelope = explanation.envelope;
862
+ }
634
863
  }
635
864
  return base;
636
865
  });
@@ -654,6 +883,19 @@ async function cmdRecall(hippoRoot, query, flags) {
654
883
  const explanation = explainMatch(query, r);
655
884
  console.log(` source:${sourceMark} | layer: [${e.layer}] | confidence: [${conf}]`);
656
885
  console.log(` reason: ${explanation.reason}`);
886
+ if (explanation.envelope) {
887
+ const env = explanation.envelope;
888
+ console.log(` kind: ${env.kind}`);
889
+ if (env.scope)
890
+ console.log(` scope: ${env.scope}`);
891
+ if (env.owner)
892
+ console.log(` owner: ${env.owner}`);
893
+ if (env.artifact_ref)
894
+ console.log(` artifact_ref: ${env.artifact_ref}`);
895
+ if (env.session_id)
896
+ console.log(` session_id: ${env.session_id}`);
897
+ console.log(` confidence: ${env.confidence}`);
898
+ }
657
899
  }
658
900
  console.log();
659
901
  console.log(e.content);
@@ -831,7 +1073,6 @@ async function cmdExplain(hippoRoot, query, flags) {
831
1073
  console.log('Note: explain does not mark memories as retrieved (read-only).');
832
1074
  }
833
1075
  async function cmdEval(hippoRoot, corpusPath, flags) {
834
- requireInit(hippoRoot);
835
1076
  const asJson = Boolean(flags['json']);
836
1077
  const minMrr = flags['min-mrr'] !== undefined ? parseFloat(String(flags['min-mrr'])) : null;
837
1078
  const showCases = Boolean(flags['show-cases']);
@@ -839,7 +1080,14 @@ async function cmdEval(hippoRoot, corpusPath, flags) {
839
1080
  const noMmr = Boolean(flags['no-mmr']);
840
1081
  const mmrLambda = flags['mmr-lambda'] !== undefined ? parseFloat(String(flags['mmr-lambda'])) : undefined;
841
1082
  const embeddingWeight = flags['embedding-weight'] !== undefined ? parseFloat(String(flags['embedding-weight'])) : undefined;
842
- const entries = loadAllEntries(hippoRoot);
1083
+ // Suite mode doesn't need an initialized store
1084
+ if (flags['suite']) {
1085
+ // handled below after bootstrap check
1086
+ }
1087
+ else {
1088
+ requireInit(hippoRoot);
1089
+ }
1090
+ const entries = flags['suite'] ? [] : loadAllEntries(hippoRoot);
843
1091
  // Bootstrap mode: emit a synthetic corpus and exit.
844
1092
  if (flags['bootstrap']) {
845
1093
  const outPath = flags['out'] ? String(flags['out']) : null;
@@ -856,8 +1104,41 @@ async function cmdEval(hippoRoot, corpusPath, flags) {
856
1104
  }
857
1105
  return;
858
1106
  }
1107
+ // Suite mode: run built-in feature eval (no corpus file needed, no init needed)
1108
+ if (flags['suite']) {
1109
+ const pkg = JSON.parse(fs.readFileSync(path.join(path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1')), '..', 'package.json'), 'utf8'));
1110
+ const version = pkg.version || 'unknown';
1111
+ const baselinePath = flags['baseline'] ? String(flags['baseline']) : path.join(hippoRoot, 'eval-baseline.json');
1112
+ let baseline;
1113
+ if (fs.existsSync(baselinePath)) {
1114
+ try {
1115
+ baseline = JSON.parse(fs.readFileSync(baselinePath, 'utf8'));
1116
+ }
1117
+ catch { }
1118
+ }
1119
+ const result = await runFeatureEval(version);
1120
+ if (asJson) {
1121
+ console.log(JSON.stringify(result, null, 2));
1122
+ }
1123
+ else {
1124
+ console.log(formatResult(result, baseline));
1125
+ }
1126
+ if (flags['save-baseline']) {
1127
+ const newBaseline = resultToBaseline(result);
1128
+ fs.mkdirSync(path.dirname(baselinePath), { recursive: true });
1129
+ fs.writeFileSync(baselinePath, JSON.stringify(newBaseline, null, 2), 'utf8');
1130
+ console.log(`\nBaseline saved to ${baselinePath}`);
1131
+ }
1132
+ if (baseline) {
1133
+ const report = detectRegressions(baseline, result);
1134
+ if (report.verdict === 'REGRESSION' && minMrr === null) {
1135
+ process.exit(1);
1136
+ }
1137
+ }
1138
+ return;
1139
+ }
859
1140
  if (!corpusPath) {
860
- console.error('Usage: hippo eval <corpus.json> OR hippo eval --bootstrap [--out <path>]');
1141
+ console.error('Usage: hippo eval <corpus.json> OR hippo eval --suite [--save-baseline] OR hippo eval --bootstrap');
861
1142
  process.exit(1);
862
1143
  }
863
1144
  if (!fs.existsSync(corpusPath)) {
@@ -1296,7 +1577,7 @@ function cmdDedup(hippoRoot, flags) {
1296
1577
  console.log(` ... and ${result.pairs.length - 15} more (run with --dry-run to see all)`);
1297
1578
  }
1298
1579
  }
1299
- function cmdSleep(hippoRoot, flags) {
1580
+ async function cmdSleep(hippoRoot, flags) {
1300
1581
  // Tee stdout/stderr to a log file when --log-file is set. The SessionEnd
1301
1582
  // hook uses this so the output is captured somewhere the SessionStart hook
1302
1583
  // can re-display it next time the agent UI starts.
@@ -1335,7 +1616,7 @@ function cmdSleep(hippoRoot, flags) {
1335
1616
  }
1336
1617
  }
1337
1618
  try {
1338
- cmdSleepCore(hippoRoot, flags);
1619
+ await cmdSleepCore(hippoRoot, flags);
1339
1620
  if (logFile)
1340
1621
  console.log('[hippo] sleep complete');
1341
1622
  }
@@ -1349,7 +1630,7 @@ function cmdSleep(hippoRoot, flags) {
1349
1630
  restoreStdout();
1350
1631
  }
1351
1632
  }
1352
- function cmdSleepCore(hippoRoot, flags) {
1633
+ async function cmdSleepCore(hippoRoot, flags) {
1353
1634
  requireInit(hippoRoot);
1354
1635
  // Auto-learn from git before consolidating (unless --no-learn)
1355
1636
  if (!flags['no-learn']) {
@@ -1366,7 +1647,7 @@ function cmdSleepCore(hippoRoot, flags) {
1366
1647
  }
1367
1648
  const dryRun = Boolean(flags['dry-run']);
1368
1649
  console.log(`Running consolidation${dryRun ? ' (dry run)' : ''}...`);
1369
- const result = consolidate(hippoRoot, { dryRun });
1650
+ const result = await consolidate(hippoRoot, { dryRun });
1370
1651
  console.log(`\nResults:`);
1371
1652
  console.log(` Active memories: ${result.decayed}`);
1372
1653
  console.log(` Removed (decayed): ${result.removed}`);
@@ -1425,6 +1706,17 @@ function cmdSleepCore(hippoRoot, flags) {
1425
1706
  }
1426
1707
  }
1427
1708
  }
1709
+ // Post-sleep ambient state summary
1710
+ if (!dryRun) {
1711
+ const postSleepConfig = loadConfig(hippoRoot);
1712
+ if (postSleepConfig.ambient.enabled) {
1713
+ const postSleepEntries = loadAllEntries(hippoRoot).filter(e => !e.superseded_by);
1714
+ if (postSleepEntries.length > 0) {
1715
+ const ambientState = computeAmbientState(postSleepEntries);
1716
+ console.log(`\n${renderAmbientSummary(ambientState)}`);
1717
+ }
1718
+ }
1719
+ }
1428
1720
  }
1429
1721
  /**
1430
1722
  * Print the contents of the SessionEnd sleep log to stdout, then clear it.
@@ -2337,17 +2629,20 @@ async function cmdContext(hippoRoot, args, flags) {
2337
2629
  // Default context always filters superseded (no --include-superseded / --as-of for context)
2338
2630
  localEntries = localEntries.filter(e => !e.superseded_by);
2339
2631
  globalEntries = globalEntries.filter(e => !e.superseded_by);
2340
- const allEntries = [...localEntries];
2341
- if (allEntries.length === 0 && globalEntries.length === 0)
2342
- return; // no memories, zero output
2343
2632
  let selectedItems = [];
2344
2633
  let totalTokens = 0;
2345
2634
  // Task snapshots / session events live in the local store. Skip when
2346
2635
  // local isn't initialized — loading would auto-create .hippo in the cwd.
2347
2636
  const activeSnapshot = hasLocal ? loadActiveTaskSnapshot(hippoRoot) : null;
2637
+ const sessionHandoff = hasLocal && activeSnapshot?.session_id
2638
+ ? loadLatestHandoff(hippoRoot, activeSnapshot.session_id)
2639
+ : null;
2348
2640
  const recentSessionEvents = hasLocal && activeSnapshot?.session_id
2349
2641
  ? listSessionEvents(hippoRoot, { session_id: activeSnapshot.session_id, limit: 5 })
2350
2642
  : [];
2643
+ if (localEntries.length === 0 && globalEntries.length === 0 && !activeSnapshot && !sessionHandoff && recentSessionEvents.length === 0) {
2644
+ return;
2645
+ }
2351
2646
  // --pinned-only: restrict to pinned entries only. Used by the Claude Code
2352
2647
  // UserPromptSubmit hook so invariants stay in context every turn.
2353
2648
  // (pinnedOnly and hasLocal are declared at the top of this function.)
@@ -2448,7 +2743,7 @@ async function cmdContext(hippoRoot, args, flags) {
2448
2743
  selectedItems = selectedItems.slice(0, limit);
2449
2744
  totalTokens = selectedItems.reduce((sum, r) => sum + r.tokens, 0);
2450
2745
  }
2451
- if (selectedItems.length === 0 && !activeSnapshot && recentSessionEvents.length === 0)
2746
+ if (selectedItems.length === 0 && !activeSnapshot && !sessionHandoff && recentSessionEvents.length === 0)
2452
2747
  return;
2453
2748
  // --pinned-only is called by the UserPromptSubmit hook every turn. Treat it
2454
2749
  // as read-only so pinned memories don't inflate retrieval_count or extend
@@ -2482,7 +2777,7 @@ async function cmdContext(hippoRoot, args, flags) {
2482
2777
  content: r.entry.content,
2483
2778
  global: r.isGlobal ?? false,
2484
2779
  }));
2485
- console.log(JSON.stringify({ query, activeSnapshot, recentSessionEvents, memories: output, tokens: totalTokens }));
2780
+ console.log(JSON.stringify({ query, activeSnapshot, sessionHandoff, recentSessionEvents, memories: output, tokens: totalTokens }));
2486
2781
  }
2487
2782
  else if (format === 'additional-context') {
2488
2783
  // Claude Code UserPromptSubmit hook JSON shape. Capture the markdown that
@@ -2493,14 +2788,18 @@ async function cmdContext(hippoRoot, args, flags) {
2493
2788
  try {
2494
2789
  if (activeSnapshot)
2495
2790
  printActiveTaskSnapshot(activeSnapshot);
2791
+ if (sessionHandoff)
2792
+ printHandoff(sessionHandoff);
2496
2793
  if (recentSessionEvents.length > 0)
2497
2794
  printSessionEvents(recentSessionEvents);
2498
- printContextMarkdown(selectedItems.map((r) => ({
2499
- entry: updatedEntries.find((u) => u.id === r.entry.id) ?? r.entry,
2500
- score: r.score,
2501
- tokens: r.tokens,
2502
- isGlobal: r.isGlobal ?? false,
2503
- })), totalTokens, framing);
2795
+ if (selectedItems.length > 0) {
2796
+ printContextMarkdown(selectedItems.map((r) => ({
2797
+ entry: updatedEntries.find((u) => u.id === r.entry.id) ?? r.entry,
2798
+ score: r.score,
2799
+ tokens: r.tokens,
2800
+ isGlobal: r.isGlobal ?? false,
2801
+ })), totalTokens, framing);
2802
+ }
2504
2803
  }
2505
2804
  finally {
2506
2805
  console.log = realLog;
@@ -2520,15 +2819,29 @@ async function cmdContext(hippoRoot, args, flags) {
2520
2819
  if (activeSnapshot) {
2521
2820
  printActiveTaskSnapshot(activeSnapshot);
2522
2821
  }
2822
+ if (sessionHandoff) {
2823
+ printHandoff(sessionHandoff);
2824
+ }
2523
2825
  if (recentSessionEvents.length > 0) {
2524
2826
  printSessionEvents(recentSessionEvents);
2525
2827
  }
2526
- printContextMarkdown(selectedItems.map((r) => ({
2527
- entry: updatedEntries.find((u) => u.id === r.entry.id) ?? r.entry,
2528
- score: r.score,
2529
- tokens: r.tokens,
2530
- isGlobal: r.isGlobal ?? false,
2531
- })), totalTokens, framing);
2828
+ if (selectedItems.length > 0) {
2829
+ printContextMarkdown(selectedItems.map((r) => ({
2830
+ entry: updatedEntries.find((u) => u.id === r.entry.id) ?? r.entry,
2831
+ score: r.score,
2832
+ tokens: r.tokens,
2833
+ isGlobal: r.isGlobal ?? false,
2834
+ })), totalTokens, framing);
2835
+ }
2836
+ // Ambient state summary (one-line landscape overview)
2837
+ const ambientConfig = loadConfig(hippoRoot);
2838
+ if (ambientConfig.ambient.enabled && !pinnedOnly) {
2839
+ const allForAmbient = [...localEntries, ...globalEntries];
2840
+ if (allForAmbient.length > 0) {
2841
+ const ambientState = computeAmbientState(allForAmbient);
2842
+ console.log(`\n${renderAmbientSummary(ambientState)}`);
2843
+ }
2844
+ }
2532
2845
  }
2533
2846
  }
2534
2847
  function printContextMarkdown(items, totalTokens, framing = 'observe') {
@@ -3374,6 +3687,44 @@ function cmdWm(hippoRoot, args, flags) {
3374
3687
  console.error('Usage: hippo wm <push|read|clear|flush>');
3375
3688
  process.exit(1);
3376
3689
  }
3690
+ function cmdDag(hippoRoot, flags) {
3691
+ requireInit(hippoRoot);
3692
+ const entries = loadAllEntries(hippoRoot);
3693
+ const isStats = flags['stats'] === true;
3694
+ const byLevel = new Map();
3695
+ let unlinked = 0;
3696
+ for (const entry of entries) {
3697
+ const level = entry.dag_level ?? 0;
3698
+ byLevel.set(level, (byLevel.get(level) ?? 0) + 1);
3699
+ if (level === 1 && !entry.dag_parent_id)
3700
+ unlinked++;
3701
+ }
3702
+ if (isStats) {
3703
+ console.log('DAG Structure:');
3704
+ console.log(` Level 3 (entity profiles): ${byLevel.get(3) ?? 0}`);
3705
+ console.log(` Level 2 (topic summaries): ${byLevel.get(2) ?? 0}`);
3706
+ console.log(` Level 1 (extracted facts): ${byLevel.get(1) ?? 0}`);
3707
+ console.log(` Level 0 (raw memories): ${byLevel.get(0) ?? 0}`);
3708
+ console.log(` Unlinked facts: ${unlinked}`);
3709
+ return;
3710
+ }
3711
+ // Tree view: show summaries and their children
3712
+ const summaries = entries.filter((e) => e.dag_level === 2);
3713
+ if (summaries.length === 0) {
3714
+ console.log('No DAG summaries yet. Run `hippo sleep` with ANTHROPIC_API_KEY set.');
3715
+ return;
3716
+ }
3717
+ for (const summary of summaries) {
3718
+ const summaryTags = summary.tags.filter((t) => t !== 'dag-summary').join(', ');
3719
+ console.log(`\n📌 ${summary.content.slice(0, 80)}`);
3720
+ if (summaryTags)
3721
+ console.log(` [${summaryTags}]`);
3722
+ const children = entries.filter((e) => e.dag_parent_id === summary.id);
3723
+ for (const child of children) {
3724
+ console.log(` └─ ${child.content.slice(0, 70)}`);
3725
+ }
3726
+ }
3727
+ }
3377
3728
  function printUsage() {
3378
3729
  console.log(`
3379
3730
  Hippo - biologically-inspired memory system for AI agents
@@ -3403,6 +3754,38 @@ Commands:
3403
3754
  --why Show match reasons and source annotations
3404
3755
  --no-mmr Disable MMR diversity re-ranking
3405
3756
  --mmr-lambda <f> MMR balance 0..1 (default: 0.7, 1.0 = pure relevance)
3757
+ --evc-adaptive ACC-style: when top-K shows high inter-item overlap
3758
+ (= conflict cluster), expand pool and re-rank by
3759
+ recency. Default off. RESEARCH.md §PFC.ACC.
3760
+ --filter-conflicts vlPFC interference filter: drop superseded entries
3761
+ and 0.3x-downweight entries flagged in an open
3762
+ conflict with a peer in the same result set.
3763
+ Uses recorded supersession + conflicts only — never
3764
+ lexical inference. Default off. RESEARCH.md §PFC.vlPFC.
3765
+ --value-aware vmPFC value attribution: boost memories with positive
3766
+ cumulative outcomes and demote those with negative
3767
+ outcomes during ranking. Multiplier
3768
+ clip(1 + 0.3*tanh(pos - neg), 0.7, 1.3). Reuses
3769
+ outcome_positive / outcome_negative; no schema
3770
+ change. Default off. RESEARCH.md §PFC.vmPFC.
3771
+ --rerank-utility OFC option-value re-ranker: combine relevance,
3772
+ strength, and integration cost into a single utility
3773
+ = score * (0.5 + 0.5 * strength) * (1 - cost_factor)
3774
+ where cost_factor = min(0.3, tokens / 10000). Re-sorts
3775
+ results by utility. Default off. RESEARCH.md §PFC.OFC.
3776
+ --goal <tag> dlPFC goal-conditioned recall: memories tagged with
3777
+ the goal tag get a 1.5x score boost and results are
3778
+ re-sorted. Default off. RESEARCH.md §PFC.dlPFC.
3779
+ --salience-threshold <n>
3780
+ Pineal salience: down-weight memories whose
3781
+ retrieval_count is below n. score *= max(0.5,
3782
+ retrieval_count / n) for entries with count < n;
3783
+ entries at or above n are unchanged. Salience emerges
3784
+ from USE, not from lexical overlap. Default off.
3785
+ RESEARCH.md §"AI Pineal Gland". (v1's creation-time
3786
+ lexical gate destroyed LoCoMo 0.28 -> 0.02; this v2
3787
+ is retrieval-side, opt-in only — see MEMORY.md
3788
+ "Hippo salience gate destroys benchmark recall".)
3406
3789
  explain <query> Show full score breakdown for each retrieved memory
3407
3790
  --budget <n> Token budget (default: 4000)
3408
3791
  --limit <n> Cap the number of results displayed
@@ -3633,12 +4016,18 @@ async function main() {
3633
4016
  cmdInit(hippoRoot, flags);
3634
4017
  break;
3635
4018
  case 'remember': {
3636
- const text = args.join(' ').trim();
4019
+ let text;
4020
+ if (args.length === 1 && args[0] === '-') {
4021
+ text = fs.readFileSync(0, 'utf-8').trim();
4022
+ }
4023
+ else {
4024
+ text = args.join(' ').trim();
4025
+ }
3637
4026
  if (!text || text.length < 3) {
3638
4027
  console.error('Memory content too short (minimum 3 characters).');
3639
4028
  process.exit(1);
3640
4029
  }
3641
- cmdRemember(hippoRoot, text, flags);
4030
+ await cmdRemember(hippoRoot, text, flags);
3642
4031
  break;
3643
4032
  }
3644
4033
  case 'recall': {
@@ -3691,7 +4080,7 @@ async function main() {
3691
4080
  await cmdRefine(hippoRoot, flags);
3692
4081
  break;
3693
4082
  case 'sleep':
3694
- cmdSleep(hippoRoot, flags);
4083
+ await cmdSleep(hippoRoot, flags);
3695
4084
  break;
3696
4085
  case 'last-sleep':
3697
4086
  cmdLastSleep(flags);
@@ -3711,6 +4100,9 @@ async function main() {
3711
4100
  case 'dedup':
3712
4101
  cmdDedup(hippoRoot, flags);
3713
4102
  break;
4103
+ case 'dag':
4104
+ cmdDag(hippoRoot, flags);
4105
+ break;
3714
4106
  case 'audit': {
3715
4107
  requireInit(hippoRoot);
3716
4108
  const entries = loadAllEntries(hippoRoot);