hippo-memory 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -45,10 +45,12 @@ hippo init --scan ~
45
45
 
46
46
  `--scan` finds every git repo under your home directory, creates a `.hippo/` store in each one, and seeds it with lessons from your commit history. One command, instant memory across all your projects.
47
47
 
48
- After setup, `hippo sleep` runs automatically at session end (via agent hooks) and does three things:
48
+ After setup, `hippo sleep` runs automatically at session end (via agent hooks) and does five things:
49
49
  1. **Learns** from today's git commits
50
- 2. **Consolidates** memories (decay, merge, prune)
51
- 3. **Shares** high-value lessons to the global store so they surface in every project
50
+ 2. **Imports** new entries from Claude Code MEMORY.md files
51
+ 3. **Consolidates** memories (decay, merge, prune)
52
+ 4. **Deduplicates** near-identical memories (keeps the stronger copy)
53
+ 5. **Shares** high-value lessons to the global store so they surface in every project
52
54
 
53
55
  ```bash
54
56
  # Manual usage
@@ -58,6 +60,16 @@ hippo recall "data pipeline issues" --budget 2000
58
60
 
59
61
  ---
60
62
 
63
+ ### What's new in v0.20
64
+
65
+ - **`hippo dedup`.** Scans for near-duplicate memories, shows you what's duplicated and why (redundant semantic patterns, same lesson from multiple sources, cross-layer overlap), and removes the weaker copy. Runs automatically during `hippo sleep`.
66
+ - **MEMORY.md import.** `hippo init` and `hippo sleep` now scan Claude Code memory files and import new entries. Your agent memories from Claude Code flow into hippo automatically.
67
+
68
+ ### What's new in v0.19.1
69
+
70
+ - **Configured embedding models now work end to end.** `hippo embed`, hybrid search, and physics search all respect `embeddings.model` from `.hippo/config.json`.
71
+ - **Safe rebuild on model change.** If you switch embedding models, rerun `hippo embed`. Hippo now rebuilds cached embeddings and resets physics state so old vectors are not mixed with the new model.
72
+
61
73
  ### What's new in v0.18
62
74
 
63
75
  - **Multi-project auto-discovery.** `hippo init --scan [dir]` finds all git repos under a directory and initializes each one. Seeds with a full year of git history by default. One command to set up memory across all your projects.
@@ -126,6 +138,7 @@ hippo recall "data pipeline issues" --budget 2000
126
138
  ### What's new in v0.8.0
127
139
 
128
140
  - **Hybrid search** blends BM25 keywords with cosine embedding similarity. Install `@xenova/transformers`, run `hippo embed`, recall quality jumps. Falls back to BM25 otherwise.
141
+ - Configure a custom embedding model with `embeddings.model` in `.hippo/config.json`. If you change models later, rerun `hippo embed` so Hippo rebuilds cached embeddings and physics state for the new vector space.
129
142
  - **Schema acceleration** auto-computes how well new memories fit existing patterns. Familiar memories consolidate faster; novel ones decay faster if unused.
130
143
  - **Multi-agent shared memory** with `hippo share`, `hippo peers`, and transfer scoring. Universal lessons travel between projects; project-specific config stays local.
131
144
  - **Conflict resolution** via `hippo resolve <id> --keep <mem_id>`. Closes the detect-inspect-resolve loop.
package/dist/cli.js CHANGED
@@ -30,9 +30,9 @@ import * as os from 'os';
30
30
  import { execSync } from 'child_process';
31
31
  import { createMemory, calculateStrength, calculateRewardFactor, deriveHalfLife, resolveConfidence, applyOutcome, computeSchemaFit, Layer, DECISION_HALF_LIFE_DAYS, } from './memory.js';
32
32
  import { getHippoRoot, isInitialized, initStore, writeEntry, readEntry, deleteEntry, loadAllEntries, loadSearchEntries, loadIndex, saveIndex, loadStats, updateStats, saveActiveTaskSnapshot, loadActiveTaskSnapshot, clearActiveTaskSnapshot, appendSessionEvent, listSessionEvents, listMemoryConflicts, resolveConflict, saveSessionHandoff, loadLatestHandoff, loadHandoffById, } from './store.js';
33
- import { markRetrieved, estimateTokens, hybridSearch, physicsSearch, explainMatch } from './search.js';
33
+ import { markRetrieved, estimateTokens, hybridSearch, physicsSearch, explainMatch, textOverlap } from './search.js';
34
34
  import { consolidate } from './consolidate.js';
35
- import { isEmbeddingAvailable, embedAll, embedMemory, loadEmbeddingIndex, } from './embeddings.js';
35
+ import { isEmbeddingAvailable, embedAll, embedMemory, loadEmbeddingIndex, resolveEmbeddingModel, } from './embeddings.js';
36
36
  import { loadPhysicsState, resetAllPhysicsState } from './physics-state.js';
37
37
  import { computeSystemEnergy, vecNorm } from './physics.js';
38
38
  import { loadConfig } from './config.js';
@@ -218,6 +218,11 @@ function cmdInit(hippoRoot, flags) {
218
218
  console.log(` No matching commits found in git history.`);
219
219
  }
220
220
  }
221
+ // Also import from Claude Code / agent MEMORY.md files
222
+ const memImported = learnFromMemoryMd(hippoRoot);
223
+ if (memImported > 0) {
224
+ console.log(` Imported ${memImported} memories from agent MEMORY.md files.`);
225
+ }
221
226
  }
222
227
  }
223
228
  /**
@@ -492,6 +497,149 @@ async function cmdRecall(hippoRoot, query, flags) {
492
497
  console.log();
493
498
  }
494
499
  }
500
+ /**
501
+ * Scan for Claude Code MEMORY.md files and import new entries into hippo.
502
+ * Looks in ~/.claude/projects/<project>/memory/ for .md files with YAML frontmatter.
503
+ */
504
+ function learnFromMemoryMd(hippoRoot) {
505
+ const home = os.homedir();
506
+ const memoryDirs = [];
507
+ // Claude Code project memories
508
+ const claudeProjectsDir = path.join(home, '.claude', 'projects');
509
+ if (fs.existsSync(claudeProjectsDir)) {
510
+ try {
511
+ for (const project of fs.readdirSync(claudeProjectsDir)) {
512
+ const memDir = path.join(claudeProjectsDir, project, 'memory');
513
+ if (fs.existsSync(memDir))
514
+ memoryDirs.push(memDir);
515
+ }
516
+ }
517
+ catch { /* permission denied */ }
518
+ }
519
+ if (memoryDirs.length === 0)
520
+ return 0;
521
+ const existing = loadAllEntries(hippoRoot);
522
+ let imported = 0;
523
+ for (const memDir of memoryDirs) {
524
+ try {
525
+ const files = fs.readdirSync(memDir).filter(f => f.endsWith('.md') && f !== 'MEMORY.md');
526
+ for (const file of files) {
527
+ const raw = fs.readFileSync(path.join(memDir, file), 'utf8');
528
+ // Parse YAML frontmatter
529
+ const fmMatch = raw.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/);
530
+ if (!fmMatch)
531
+ continue;
532
+ const body = fmMatch[2].trim();
533
+ if (!body || body.length < 10)
534
+ continue;
535
+ // Truncate to reasonable size
536
+ const content = body.length > 1500 ? body.slice(0, 1500) + ' [truncated]' : body;
537
+ // Dedup: check if substantially similar content already exists
538
+ const isDup = existing.some(e => {
539
+ const overlap = textOverlap(content.slice(0, 200), e.content.slice(0, 200));
540
+ return overlap > 0.6;
541
+ });
542
+ if (isDup)
543
+ continue;
544
+ const entry = createMemory(content, {
545
+ layer: Layer.Episodic,
546
+ tags: ['claude-code-memory'],
547
+ source: `claude-memory:${file}`,
548
+ confidence: 'observed',
549
+ });
550
+ writeEntry(hippoRoot, entry);
551
+ existing.push(entry); // prevent self-dedup within batch
552
+ imported++;
553
+ }
554
+ }
555
+ catch { /* skip broken dirs */ }
556
+ }
557
+ return imported;
558
+ }
559
+ function deduplicateStore(hippoRoot, options = {}) {
560
+ const threshold = options.threshold ?? 0.7;
561
+ const dryRun = options.dryRun ?? false;
562
+ const entries = loadAllEntries(hippoRoot);
563
+ // Sort by strength desc, then retrieval count, so we keep the most valuable copy
564
+ entries.sort((a, b) => {
565
+ const sDiff = (b.strength ?? 0) - (a.strength ?? 0);
566
+ if (Math.abs(sDiff) > 0.01)
567
+ return sDiff;
568
+ return (b.retrieval_count ?? 0) - (a.retrieval_count ?? 0);
569
+ });
570
+ const removed = new Set();
571
+ const pairs = [];
572
+ for (let i = 0; i < entries.length; i++) {
573
+ if (removed.has(entries[i].id))
574
+ continue;
575
+ for (let j = i + 1; j < entries.length; j++) {
576
+ if (removed.has(entries[j].id))
577
+ continue;
578
+ const similarity = textOverlap(entries[i].content, entries[j].content);
579
+ if (similarity <= threshold)
580
+ continue;
581
+ removed.add(entries[j].id);
582
+ pairs.push({
583
+ kept: entries[i].id,
584
+ keptContent: entries[i].content,
585
+ keptLayer: entries[i].layer,
586
+ keptStrength: entries[i].strength ?? 0,
587
+ removed: entries[j].id,
588
+ removedContent: entries[j].content,
589
+ removedLayer: entries[j].layer,
590
+ removedStrength: entries[j].strength ?? 0,
591
+ similarity,
592
+ });
593
+ }
594
+ }
595
+ if (!dryRun) {
596
+ for (const id of removed) {
597
+ deleteEntry(hippoRoot, id);
598
+ }
599
+ }
600
+ return { removed: removed.size, pairs };
601
+ }
602
+ function cmdDedup(hippoRoot, flags) {
603
+ requireInit(hippoRoot);
604
+ const dryRun = Boolean(flags['dry-run']);
605
+ const threshold = parseFloat(String(flags['threshold'] ?? '0.7'));
606
+ const entries = loadAllEntries(hippoRoot);
607
+ console.log(`Scanning ${entries.length} memories for duplicates (>=${(threshold * 100).toFixed(0)}% text overlap)${dryRun ? ' (dry run)' : ''}...\n`);
608
+ const result = deduplicateStore(hippoRoot, { threshold, dryRun });
609
+ if (result.removed === 0) {
610
+ console.log('No duplicates found.');
611
+ return;
612
+ }
613
+ // Group by reason
614
+ const sameLayerSem = result.pairs.filter(p => p.keptLayer === 'semantic' && p.removedLayer === 'semantic');
615
+ const sameLayerEpi = result.pairs.filter(p => p.keptLayer === 'episodic' && p.removedLayer === 'episodic');
616
+ const crossLayer = result.pairs.filter(p => p.keptLayer !== p.removedLayer);
617
+ console.log(`${dryRun ? 'Would remove' : 'Removed'} ${result.removed} duplicates:`);
618
+ if (sameLayerSem.length > 0) {
619
+ console.log(` ${sameLayerSem.length} redundant semantic memories (consolidation regenerated near-identical patterns)`);
620
+ }
621
+ if (sameLayerEpi.length > 0) {
622
+ console.log(` ${sameLayerEpi.length} duplicate episodic memories (same lesson learned from multiple sources)`);
623
+ }
624
+ if (crossLayer.length > 0) {
625
+ console.log(` ${crossLayer.length} cross-layer duplicates (episodic content already consolidated into semantic)`);
626
+ }
627
+ // Show detailed pairs
628
+ console.log('');
629
+ const shown = result.pairs.slice(0, 15);
630
+ for (const pair of shown) {
631
+ const simPct = (pair.similarity * 100).toFixed(0);
632
+ const action = dryRun ? 'Would remove' : 'Removed';
633
+ console.log(` ${simPct}% similar | kept [${pair.keptLayer}] strength=${pair.keptStrength.toFixed(2)}`);
634
+ console.log(` ${pair.keptContent.slice(0, 90)}`);
635
+ console.log(` ${action} [${pair.removedLayer}] strength=${pair.removedStrength.toFixed(2)}`);
636
+ console.log(` ${pair.removedContent.slice(0, 90)}`);
637
+ console.log('');
638
+ }
639
+ if (result.pairs.length > 15) {
640
+ console.log(` ... and ${result.pairs.length - 15} more (run with --dry-run to see all)`);
641
+ }
642
+ }
495
643
  function cmdSleep(hippoRoot, flags) {
496
644
  requireInit(hippoRoot);
497
645
  // Auto-learn from git before consolidating (unless --no-learn)
@@ -502,6 +650,10 @@ function cmdSleep(hippoRoot, flags) {
502
650
  if (added > 0)
503
651
  console.log(`Auto-learned ${added} lessons from today's git commits.`);
504
652
  }
653
+ // Also learn from Claude Code MEMORY.md files
654
+ const memImported = learnFromMemoryMd(hippoRoot);
655
+ if (memImported > 0)
656
+ console.log(`Imported ${memImported} memories from Claude Code MEMORY.md files.`);
505
657
  }
506
658
  const dryRun = Boolean(flags['dry-run']);
507
659
  console.log(`Running consolidation${dryRun ? ' (dry run)' : ''}...`);
@@ -519,6 +671,23 @@ function cmdSleep(hippoRoot, flags) {
519
671
  }
520
672
  if (dryRun)
521
673
  console.log('\n(dry run - nothing written)');
674
+ // Auto-dedup after consolidation (unless dry-run)
675
+ if (!dryRun) {
676
+ const dedupResult = deduplicateStore(hippoRoot);
677
+ if (dedupResult.removed > 0) {
678
+ const semDups = dedupResult.pairs.filter(p => p.keptLayer === 'semantic' && p.removedLayer === 'semantic').length;
679
+ const epiDups = dedupResult.pairs.filter(p => p.keptLayer === 'episodic' && p.removedLayer === 'episodic').length;
680
+ const crossDups = dedupResult.pairs.filter(p => p.keptLayer !== p.removedLayer).length;
681
+ const parts = [];
682
+ if (semDups > 0)
683
+ parts.push(`${semDups} redundant semantic patterns`);
684
+ if (epiDups > 0)
685
+ parts.push(`${epiDups} duplicate episodic lessons`);
686
+ if (crossDups > 0)
687
+ parts.push(`${crossDups} cross-layer duplicates`);
688
+ console.log(`\nDeduped ${dedupResult.removed} duplicates (${parts.join(', ')}). Kept stronger copies.`);
689
+ }
690
+ }
522
691
  // Auto-share high-transfer-score memories to global (unless --no-share or dry-run)
523
692
  if (!dryRun && !flags['no-share']) {
524
693
  const sleepConfig = loadConfig(hippoRoot);
@@ -1358,7 +1527,7 @@ async function cmdEmbed(hippoRoot, flags) {
1358
1527
  return;
1359
1528
  }
1360
1529
  console.log('Embedding all memories (this may take a moment on first run to download model)...');
1361
- const count = await embedAll(hippoRoot);
1530
+ const count = await embedAll(hippoRoot, resolveEmbeddingModel(hippoRoot));
1362
1531
  const entriesAfter = loadAllEntries(hippoRoot);
1363
1532
  const embIndexAfter = loadEmbeddingIndex(hippoRoot);
1364
1533
  console.log(`Done. ${count} new embeddings created. ${Object.keys(embIndexAfter).length}/${entriesAfter.length} total.`);
@@ -2040,10 +2209,13 @@ Commands:
2040
2209
  --budget <n> Token budget (default: 1500)
2041
2210
  --format <fmt> Output format: markdown (default) or json
2042
2211
  --framing <mode> Framing: observe (default), suggest, assert
2043
- sleep Run consolidation pass (auto-learns + auto-shares)
2212
+ sleep Run consolidation pass (auto-learns + dedup + auto-shares)
2044
2213
  --dry-run Preview without writing
2045
2214
  --no-learn Skip auto git-learn before consolidation
2046
2215
  --no-share Skip auto-sharing to global store
2216
+ dedup Remove duplicate memories (keeps stronger copy)
2217
+ --dry-run Preview without removing
2218
+ --threshold <n> Overlap threshold 0-1 (default: 0.7)
2047
2219
  status Show memory health stats
2048
2220
  outcome Apply feedback to last recall
2049
2221
  --good Memories were helpful
@@ -2220,6 +2392,9 @@ async function main() {
2220
2392
  case 'sleep':
2221
2393
  cmdSleep(hippoRoot, flags);
2222
2394
  break;
2395
+ case 'dedup':
2396
+ cmdDedup(hippoRoot, flags);
2397
+ break;
2223
2398
  case 'status':
2224
2399
  cmdStatus(hippoRoot);
2225
2400
  break;