clawmem 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/clawmem.ts +428 -34
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmem",
3
- "version": "0.1.6",
3
+ "version": "0.1.7",
4
4
  "description": "On-device context engine and memory for AI agents. Claude Code and OpenClaw. Hooks + MCP server + hybrid RAG search.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/clawmem.ts CHANGED
@@ -295,9 +295,9 @@ async function cmdEmbed(args: string[]) {
295
295
  const isCloudEmbed = !!process.env.CLAWMEM_EMBED_API_KEY;
296
296
  const CLOUD_BATCH_SIZE = 50;
297
297
  const CLOUD_TPM_LIMIT = parseInt(process.env.CLAWMEM_EMBED_TPM_LIMIT || "100000", 10);
298
- const CLOUD_TPM_SAFETY = 0.85;
298
+ const CLOUD_TPM_SAFETY = 0.85; // use 85% of limit to leave headroom for retries
299
299
  const CHARS_PER_TOKEN = 4;
300
- let lastBatchSentAt = 0;
300
+ let lastBatchSentAt = 0; // global timestamp of last batch send
301
301
 
302
302
  for (let docIdx = 0; docIdx < hashes.length; docIdx++) {
303
303
  const { hash, body, path, title: docTitle, collection } = hashes[docIdx]!;
@@ -903,6 +903,7 @@ async function cmdServe(args: string[]) {
903
903
  console.log(`ClawMem HTTP server listening on http://${host}:${port}`);
904
904
  console.log(`Token auth: ${process.env.CLAWMEM_API_TOKEN ? "enabled" : "disabled (set CLAWMEM_API_TOKEN)"}`);
905
905
  console.log(`Press Ctrl+C to stop.`);
906
+ // Keep alive
906
907
  await new Promise(() => {});
907
908
  }
908
909
 
@@ -962,11 +963,6 @@ async function cmdSetupHooks(args: string[]) {
962
963
  console.log(`${c.green}Removed ClawMem hooks from ${settingsPath}${c.reset}`);
963
964
  } else {
964
965
  // Install clawmem hooks
965
- // Production-validated hook set:
966
- // - session-bootstrap/staleness-check omitted: context-surfacing on first prompt
967
- // handles retrieval more precisely, and postcompact-inject covers post-compaction.
968
- // session-bootstrap adds ~2000 tokens before the user types anything.
969
- // - timeout wrappers prevent hooks from blocking the session on GPU timeouts.
970
966
  const hookConfig: Record<string, string[]> = {
971
967
  UserPromptSubmit: ["context-surfacing"],
972
968
  SessionStart: ["postcompact-inject", "curator-nudge"],
@@ -974,7 +970,6 @@ async function cmdSetupHooks(args: string[]) {
974
970
  Stop: ["decision-extractor", "handoff-generator", "feedback-loop"],
975
971
  };
976
972
 
977
- // Timeout per event type (seconds)
978
973
  const timeouts: Record<string, number> = {
979
974
  UserPromptSubmit: 8,
980
975
  SessionStart: 5,
@@ -1087,6 +1082,7 @@ async function cmdSetupOpenClaw(args: string[]) {
1087
1082
  return;
1088
1083
  }
1089
1084
 
1085
+ // Check that the OpenClaw plugin files exist
1090
1086
  if (!existsSync(pathResolve(pluginDir, "index.ts"))) {
1091
1087
  die(`OpenClaw plugin files not found at ${pluginDir}`);
1092
1088
  }
@@ -1160,20 +1156,43 @@ async function cmdWatch() {
1160
1156
  const col = collections.find(c => fullPath.startsWith(c.path));
1161
1157
  if (!col) return;
1162
1158
 
1163
- const relativePath = fullPath.slice(col.path.length + 1);
1164
- console.log(`${c.dim}[${event}]${c.reset} ${col.name}/${relativePath}`);
1165
-
1166
1159
  // Beads: trigger sync on any change within .beads/ directory
1167
1160
  // Dolt backend writes to .beads/dolt/ — watch for any file change there
1168
1161
  if (fullPath.includes(".beads/")) {
1169
1162
  const projectDir = detectBeadsProject(fullPath.replace(/\/\.beads\/.*$/, ""));
1170
1163
  if (projectDir) {
1164
+ const relativePath = fullPath.slice(col.path.length + 1);
1165
+ console.log(`${c.dim}[${event}]${c.reset} ${col.name}/${relativePath}`);
1171
1166
  const result = await s.syncBeadsIssues(projectDir);
1172
1167
  console.log(` beads: +${result.created} ~${result.synced}`);
1173
1168
  }
1174
1169
  return;
1175
1170
  }
1176
1171
 
1172
+ // Quick pattern check: skip files that can't match the collection pattern
1173
+ // before touching the DB. This prevents broad path collections (e.g. ~/Projects)
1174
+ // with narrow patterns (e.g. single filename) from triggering DB access on
1175
+ // every .md change under the tree.
1176
+ const relativePath = fullPath.slice(col.path.length + 1);
1177
+ if (col.pattern && col.pattern !== "**/*.md") {
1178
+ const patterns = col.pattern.includes("{")
1179
+ ? col.pattern.replace(/^\{|\}$/g, "").split(",")
1180
+ : [col.pattern];
1181
+ const couldMatch = patterns.some(p => {
1182
+ // Simple glob check: if pattern has no wildcards, it's a filename match
1183
+ if (!p.includes("*") && !p.includes("?")) return relativePath === p || relativePath.endsWith("/" + p);
1184
+ // If pattern starts with **/, any relative path could match
1185
+ if (p.startsWith("**/")) return true;
1186
+ // If pattern has a directory prefix, check it
1187
+ const patternDir = p.substring(0, p.lastIndexOf("/") + 1);
1188
+ if (patternDir) return relativePath.startsWith(patternDir);
1189
+ return true; // Fallback: let indexCollection handle it
1190
+ });
1191
+ if (!couldMatch) return;
1192
+ }
1193
+
1194
+ console.log(`${c.dim}[${event}]${c.reset} ${col.name}/${relativePath}`);
1195
+
1177
1196
  // Re-index just this collection
1178
1197
  const stats = await indexCollection(s, col.name, col.path, col.pattern);
1179
1198
  if (stats.added > 0 || stats.updated > 0 || stats.removed > 0) {
@@ -1185,9 +1204,43 @@ async function cmdWatch() {
1185
1204
  },
1186
1205
  });
1187
1206
 
1207
+ // Skill vault watcher: watch _clawmem-skills/ content root if configured
1208
+ let skillWatcher: { close: () => void } | null = null;
1209
+ try {
1210
+ const { getVaultPath, getSkillContentRoot } = await import("./config.ts");
1211
+ const { resolveStore } = await import("./store.ts");
1212
+ const skillVaultPath = getVaultPath("skill");
1213
+ const skillRoot = getSkillContentRoot();
1214
+
1215
+ if (skillVaultPath && existsSync(skillRoot)) {
1216
+ const skillStore = resolveStore("skill");
1217
+ console.log(`${c.bold}Watching skill vault content root...${c.reset}`);
1218
+ console.log(` ${c.dim}skill: ${skillRoot} → ${skillVaultPath}${c.reset}`);
1219
+
1220
+ skillWatcher = startWatcher([skillRoot], {
1221
+ debounceMs: 2000,
1222
+ onChanged: async (fullPath, event) => {
1223
+ const relativePath = fullPath.slice(skillRoot.length + 1);
1224
+ console.log(`${c.dim}[${event}]${c.reset} skill/${relativePath}`);
1225
+
1226
+ const stats = await indexCollection(skillStore, "skill-observations", skillRoot, "**/*.md");
1227
+ if (stats.added > 0 || stats.updated > 0 || stats.removed > 0) {
1228
+ console.log(` skill: +${stats.added} ~${stats.updated} -${stats.removed}`);
1229
+ }
1230
+ },
1231
+ onError: (err) => {
1232
+ console.error(`${c.red}Skill watch error: ${err.message}${c.reset}`);
1233
+ },
1234
+ });
1235
+ }
1236
+ } catch {
1237
+ // Skill vault not configured — skip
1238
+ }
1239
+
1188
1240
  // Keep running until Ctrl+C
1189
1241
  process.on("SIGINT", () => {
1190
1242
  watcher.close();
1243
+ skillWatcher?.close();
1191
1244
  closeStore();
1192
1245
  process.exit(0);
1193
1246
  });
@@ -1585,6 +1638,9 @@ async function main() {
1585
1638
  case "doctor":
1586
1639
  await cmdDoctor();
1587
1640
  break;
1641
+ case "path":
1642
+ cmdPath();
1643
+ break;
1588
1644
  case "bootstrap":
1589
1645
  await cmdBootstrap(subArgs);
1590
1646
  break;
@@ -1600,15 +1656,18 @@ async function main() {
1600
1656
  case "surface":
1601
1657
  await cmdSurface(subArgs);
1602
1658
  break;
1659
+ case "lifecycle":
1660
+ await cmdLifecycle(subArgs);
1661
+ break;
1603
1662
  case "reflect":
1604
1663
  await cmdReflect(subArgs);
1605
1664
  break;
1606
- case "path":
1607
- cmdPath();
1608
- break;
1609
1665
  case "consolidate":
1610
1666
  await cmdConsolidate(subArgs);
1611
1667
  break;
1668
+ case "curate":
1669
+ await cmdCurate(subArgs);
1670
+ break;
1612
1671
  case "help":
1613
1672
  case "--help":
1614
1673
  case "-h":
@@ -1623,17 +1682,157 @@ async function main() {
1623
1682
  }
1624
1683
  }
1625
1684
 
1685
+ async function cmdLifecycle(args: string[]) {
1686
+ const subCmd = args[0];
1687
+ const subArgs = args.slice(1);
1688
+
1689
+ switch (subCmd) {
1690
+ case "status": {
1691
+ const store = getStore();
1692
+ const stats = store.getLifecycleStats();
1693
+ const { loadVaultConfig } = await import("./config.ts");
1694
+ const config = loadVaultConfig();
1695
+ const policy = config.lifecycle;
1696
+
1697
+ console.log(`Active: ${stats.active}`);
1698
+ console.log(`Archived (auto): ${stats.archived}`);
1699
+ console.log(`Forgotten (manual): ${stats.forgotten}`);
1700
+ console.log(`Pinned: ${stats.pinned}`);
1701
+ console.log(`Snoozed: ${stats.snoozed}`);
1702
+ console.log(`Never accessed: ${stats.neverAccessed}`);
1703
+ console.log(`Oldest access: ${stats.oldestAccess?.slice(0, 10) || "n/a"}`);
1704
+ console.log();
1705
+ if (policy) {
1706
+ console.log(`Policy: archive after ${policy.archive_after_days}d, purge after ${policy.purge_after_days ?? "never"}, dry_run=${policy.dry_run}`);
1707
+ if (policy.exempt_collections.length > 0) {
1708
+ console.log(`Exempt: ${policy.exempt_collections.join(", ")}`);
1709
+ }
1710
+ if (Object.keys(policy.type_overrides).length > 0) {
1711
+ const overrides = Object.entries(policy.type_overrides)
1712
+ .map(([k, v]) => `${k}=${v === null ? "never" : v + "d"}`)
1713
+ .join(", ");
1714
+ console.log(`Type overrides: ${overrides}`);
1715
+ }
1716
+ } else {
1717
+ console.log("Policy: none configured");
1718
+ }
1719
+ break;
1720
+ }
1721
+
1722
+ case "sweep": {
1723
+ const { values } = parseArgs({
1724
+ args: subArgs,
1725
+ options: { "dry-run": { type: "boolean", default: false } },
1726
+ allowPositionals: false,
1727
+ });
1728
+ const dryRun = values["dry-run"];
1729
+
1730
+ const { loadVaultConfig } = await import("./config.ts");
1731
+ const config = loadVaultConfig();
1732
+ const policy = config.lifecycle;
1733
+ if (!policy) {
1734
+ die("No lifecycle policy configured in config.yaml");
1735
+ return;
1736
+ }
1737
+
1738
+ const store = getStore();
1739
+ const candidates = store.getArchiveCandidates(policy);
1740
+
1741
+ if (dryRun || policy.dry_run) {
1742
+ console.log(`Would archive ${candidates.length} document(s):`);
1743
+ for (const c of candidates) {
1744
+ console.log(` - ${c.collection}/${c.path} (${c.content_type}, modified ${c.modified_at.slice(0, 10)}, accessed ${c.last_accessed_at?.slice(0, 10) || "never"})`);
1745
+ }
1746
+ if (candidates.length === 0) console.log(" (none)");
1747
+ return;
1748
+ }
1749
+
1750
+ const archived = store.archiveDocuments(candidates.map(c => c.id));
1751
+ let purged = 0;
1752
+ if (policy.purge_after_days) {
1753
+ purged = store.purgeArchivedDocuments(policy.purge_after_days);
1754
+ }
1755
+ console.log(`Lifecycle sweep: archived ${archived}, purged ${purged}`);
1756
+ break;
1757
+ }
1758
+
1759
+ case "restore": {
1760
+ const { values } = parseArgs({
1761
+ args: subArgs,
1762
+ options: {
1763
+ query: { type: "string" },
1764
+ collection: { type: "string" },
1765
+ all: { type: "boolean", default: false },
1766
+ },
1767
+ allowPositionals: false,
1768
+ });
1769
+
1770
+ const store = getStore();
1771
+
1772
+ if (values.query) {
1773
+ const results = store.searchArchived(values.query, 20);
1774
+
1775
+ if (results.length === 0) {
1776
+ console.log("No archived documents match that query.");
1777
+ return;
1778
+ }
1779
+
1780
+ const restored = store.restoreArchivedDocuments({ ids: results.map(r => r.id) });
1781
+ console.log(`Restored ${restored}:`);
1782
+ for (const r of results) {
1783
+ console.log(` - ${r.collection}/${r.path} (archived ${r.archived_at?.slice(0, 10)})`);
1784
+ }
1785
+ } else if (values.collection) {
1786
+ const restored = store.restoreArchivedDocuments({ collection: values.collection });
1787
+ console.log(`Restored ${restored} documents from collection "${values.collection}"`);
1788
+ } else if (values.all) {
1789
+ const restored = store.restoreArchivedDocuments({});
1790
+ console.log(`Restored ${restored} archived documents`);
1791
+ } else {
1792
+ die("Usage: clawmem lifecycle restore --query <term> | --collection <name> | --all");
1793
+ }
1794
+ break;
1795
+ }
1796
+
1797
+ case "search": {
1798
+ const query = subArgs.join(" ").trim();
1799
+ if (!query) {
1800
+ die("Usage: clawmem lifecycle search <query>");
1801
+ return;
1802
+ }
1803
+
1804
+ const store = getStore();
1805
+ const results = store.searchArchived(query);
1806
+
1807
+ if (results.length === 0) {
1808
+ console.log("No archived documents match that query.");
1809
+ return;
1810
+ }
1811
+
1812
+ console.log(`Found ${results.length} archived document(s):\n`);
1813
+ for (const r of results) {
1814
+ console.log(` [${r.score.toFixed(3)}] ${r.collection}/${r.path}`);
1815
+ console.log(` ${r.title} (archived ${r.archived_at?.slice(0, 10)})`);
1816
+ }
1817
+ break;
1818
+ }
1819
+
1820
+ default:
1821
+ die("Usage: clawmem lifecycle <status|sweep|search|restore>");
1822
+ }
1823
+ }
1824
+
1626
1825
  // =============================================================================
1627
- // Cross-Session Reflection
1826
+ // Cross-Session Reflection (E5)
1628
1827
  // =============================================================================
1629
1828
 
1630
1829
  async function cmdReflect(args: string[]) {
1631
- const s = getStore();
1830
+ const store = getStore();
1632
1831
  const days = parseInt(args[0] || "14");
1633
1832
  const cutoff = new Date();
1634
1833
  cutoff.setDate(cutoff.getDate() - days);
1635
1834
 
1636
- const recentDocs = s.getDocumentsByType("decision", 50)
1835
+ const recentDocs = store.getDocumentsByType("decision", 50)
1637
1836
  .filter(d => d.modifiedAt && d.modifiedAt >= cutoff.toISOString());
1638
1837
 
1639
1838
  if (recentDocs.length === 0) {
@@ -1648,15 +1847,15 @@ async function cmdReflect(args: string[]) {
1648
1847
  const stopWords = new Set(["the", "that", "this", "with", "from", "have", "will", "been", "were", "they", "their", "what", "when", "which", "about", "into", "more", "some", "than", "them", "then", "very", "also", "just", "should", "would", "could", "does", "make", "like", "using", "used"]);
1649
1848
 
1650
1849
  for (const d of recentDocs) {
1651
- const doc = s.findDocument(d.path);
1850
+ const doc = store.findDocument(d.path);
1652
1851
  if ("error" in doc) continue;
1653
- const body = s.getDocumentBody(doc) || "";
1852
+ const body = store.getDocumentBody(doc) || "";
1654
1853
  const words = body.toLowerCase()
1655
1854
  .replace(/[^a-z0-9\s-]/g, " ")
1656
1855
  .split(/\s+/)
1657
1856
  .filter(w => w.length > 3 && !stopWords.has(w));
1658
1857
 
1659
- // Ordered bigrams (preserve phrase direction)
1858
+ // M2: Ordered bigrams (preserve phrase direction)
1660
1859
  for (let i = 0; i < words.length - 1; i++) {
1661
1860
  const pair = `${words[i]!} ${words[i + 1]!}`;
1662
1861
  phrases.set(pair, (phrases.get(pair) || 0) + 1);
@@ -1672,6 +1871,7 @@ async function cmdReflect(args: string[]) {
1672
1871
  const patterns = [...phrases.entries()]
1673
1872
  .filter(([, count]) => count >= 3)
1674
1873
  .sort((a, b) => {
1874
+ // Prefer trigrams over bigrams at same count
1675
1875
  const lenDiff = b[0].split(" ").length - a[0].split(" ").length;
1676
1876
  return b[1] - a[1] || lenDiff;
1677
1877
  })
@@ -1687,7 +1887,7 @@ async function cmdReflect(args: string[]) {
1687
1887
  }
1688
1888
 
1689
1889
  // Also report antipatterns
1690
- const antiDocs = s.getDocumentsByType("antipattern", 10)
1890
+ const antiDocs = store.getDocumentsByType("antipattern", 10)
1691
1891
  .filter(d => d.modifiedAt && d.modifiedAt >= cutoff.toISOString());
1692
1892
 
1693
1893
  if (antiDocs.length > 0) {
@@ -1698,7 +1898,7 @@ async function cmdReflect(args: string[]) {
1698
1898
  }
1699
1899
 
1700
1900
  // Co-activation clusters
1701
- const coActs = s.db.prepare(`
1901
+ const coActs = store.db.prepare(`
1702
1902
  SELECT doc_a, doc_b, count FROM co_activations
1703
1903
  WHERE count >= 3
1704
1904
  ORDER BY count DESC
@@ -1718,12 +1918,12 @@ async function cmdReflect(args: string[]) {
1718
1918
  // =============================================================================
1719
1919
 
1720
1920
  async function cmdConsolidate(args: string[]) {
1721
- const s = getStore();
1921
+ const store = getStore();
1722
1922
  const dryRun = args.includes("--dry-run");
1723
1923
  const maxDocs = parseInt(args.find(a => /^\d+$/.test(a)) || "50");
1724
1924
 
1725
1925
  // Find low-confidence documents that might be duplicates
1726
- const candidates = s.db.prepare(`
1926
+ const candidates = store.db.prepare(`
1727
1927
  SELECT id, collection, path, title, hash, confidence, modified_at
1728
1928
  FROM documents
1729
1929
  WHERE active = 1 AND confidence < 0.4
@@ -1742,18 +1942,18 @@ async function cmdConsolidate(args: string[]) {
1742
1942
 
1743
1943
  for (const candidate of candidates) {
1744
1944
  // BM25 search with title as query to find similar docs
1745
- const similar = s.searchFTS(candidate.title, 5);
1746
- const candidateBody = s.getDocumentBody({ filepath: `clawmem://${candidate.collection}/${candidate.path}` } as any) || "";
1945
+ const similar = store.searchFTS(candidate.title, 5);
1946
+ const candidateBody = store.getDocumentBody({ filepath: `clawmem://${candidate.collection}/${candidate.path}` } as any) || "";
1747
1947
 
1748
1948
  const matches = similar.filter(r => {
1749
1949
  if (r.filepath === `clawmem://${candidate.collection}/${candidate.path}`) return false;
1750
1950
  if (r.score < 0.7) return false;
1751
1951
 
1752
- // Require same collection
1952
+ // M1: Require same collection
1753
1953
  const rCollection = r.collectionName;
1754
1954
  if (rCollection !== candidate.collection) return false;
1755
1955
 
1756
- // Require body similarity (Jaccard on word sets)
1956
+ // M1: Require body similarity (Jaccard on word sets)
1757
1957
  const matchBody = r.body || "";
1758
1958
  if (matchBody.length === 0 || candidateBody.length === 0) return false;
1759
1959
  const wordsA = new Set(candidateBody.toLowerCase().split(/\s+/).filter(w => w.length > 3));
@@ -1774,7 +1974,8 @@ async function cmdConsolidate(args: string[]) {
1774
1974
  console.log(` ${c.green}Keep:${c.reset} ${bestMatch.displayPath} (score: ${bestMatch.score.toFixed(3)})`);
1775
1975
 
1776
1976
  if (!dryRun) {
1777
- s.archiveDocuments([candidate.id]);
1977
+ // Archive the lower-confidence duplicate
1978
+ store.archiveDocuments([candidate.id]);
1778
1979
  mergeCount++;
1779
1980
  }
1780
1981
  console.log();
@@ -1783,6 +1984,192 @@ async function cmdConsolidate(args: string[]) {
1783
1984
  console.log(`${dryRun ? "Would consolidate" : "Consolidated"}: ${mergeCount} document(s)`);
1784
1985
  }
1785
1986
 
1987
+ // =============================================================================
1988
+ // Curate — automated maintenance (designed for cron/timer)
1989
+ // =============================================================================
1990
+
1991
+ interface CuratorReport {
1992
+ timestamp: string;
1993
+ health: {
1994
+ active: number;
1995
+ archived: number;
1996
+ forgotten: number;
1997
+ pinned: number;
1998
+ snoozed: number;
1999
+ neverAccessed: number;
2000
+ embeddingBacklog: number;
2001
+ infrastructure: string;
2002
+ };
2003
+ sweep: { candidates: number };
2004
+ consolidation: { candidates: number };
2005
+ retrieval: { bm25Pass: boolean; topScore: number };
2006
+ collections: { total: number; orphaned: string[]; neverAccessedPct: number };
2007
+ actions: string[];
2008
+ }
2009
+
2010
+ async function cmdCurate(_args: string[]) {
2011
+ const s = getStore();
2012
+ const report: CuratorReport = {
2013
+ timestamp: new Date().toISOString(),
2014
+ health: { active: 0, archived: 0, forgotten: 0, pinned: 0, snoozed: 0, neverAccessed: 0, embeddingBacklog: 0, infrastructure: "healthy" },
2015
+ sweep: { candidates: 0 },
2016
+ consolidation: { candidates: 0 },
2017
+ retrieval: { bm25Pass: false, topScore: 0 },
2018
+ collections: { total: 0, orphaned: [], neverAccessedPct: 0 },
2019
+ actions: [],
2020
+ };
2021
+
2022
+ console.log(`${c.bold}ClawMem Curator${c.reset} — ${new Date().toISOString().slice(0, 10)}\n`);
2023
+
2024
+ // Phase 0: Health snapshot
2025
+ try {
2026
+ const stats = s.getLifecycleStats();
2027
+ const status = s.getStatus();
2028
+ report.health = {
2029
+ active: stats.active,
2030
+ archived: stats.archived,
2031
+ forgotten: stats.forgotten,
2032
+ pinned: stats.pinned,
2033
+ snoozed: stats.snoozed,
2034
+ neverAccessed: stats.neverAccessed,
2035
+ embeddingBacklog: status.needsEmbedding,
2036
+ infrastructure: "healthy",
2037
+ };
2038
+ console.log(` Documents: ${stats.active} active, ${stats.archived} archived, ${stats.forgotten} forgotten`);
2039
+ console.log(` Pinned: ${stats.pinned} | Snoozed: ${stats.snoozed} | Never accessed: ${stats.neverAccessed}`);
2040
+ console.log(` Embedding backlog: ${status.needsEmbedding}`);
2041
+ if (status.needsEmbedding > 0) {
2042
+ report.actions.push(`${status.needsEmbedding} documents need embedding`);
2043
+ }
2044
+ } catch (err) {
2045
+ console.log(` ${c.red}Health snapshot failed:${c.reset} ${err}`);
2046
+ report.health.infrastructure = "error";
2047
+ }
2048
+
2049
+ // Phase 1: Doctor (infrastructure)
2050
+ try {
2051
+ let issues = 0;
2052
+ const collections = collectionsList();
2053
+ for (const col of collections) {
2054
+ if (!existsSync(col.path)) {
2055
+ report.collections.orphaned.push(col.name);
2056
+ issues++;
2057
+ }
2058
+ }
2059
+ report.collections.total = collections.length;
2060
+ if (issues > 0) {
2061
+ report.health.infrastructure = `${issues} issue(s)`;
2062
+ report.actions.push(`${issues} orphaned collection(s): ${report.collections.orphaned.join(", ")}`);
2063
+ }
2064
+ console.log(` Infrastructure: ${issues === 0 ? `${c.green}healthy${c.reset}` : `${c.yellow}${issues} issue(s)${c.reset}`}`);
2065
+ } catch (err) {
2066
+ console.log(` ${c.red}Doctor failed:${c.reset} ${err}`);
2067
+ }
2068
+
2069
+ // Phase 2: Lifecycle sweep (dry-run)
2070
+ console.log();
2071
+ try {
2072
+ const { loadVaultConfig } = await import("./config.ts");
2073
+ const config = loadVaultConfig();
2074
+ if (config.lifecycle) {
2075
+ const candidates = s.getArchiveCandidates(config.lifecycle);
2076
+ report.sweep.candidates = candidates.length;
2077
+ console.log(` Sweep: ${candidates.length} archive candidate(s) [dry-run]`);
2078
+ if (candidates.length > 0) {
2079
+ report.actions.push(`${candidates.length} documents eligible for archival`);
2080
+ }
2081
+ } else {
2082
+ console.log(` Sweep: no lifecycle policy configured`);
2083
+ }
2084
+ } catch (err) {
2085
+ console.log(` ${c.red}Sweep failed:${c.reset} ${err}`);
2086
+ }
2087
+
2088
+ // Phase 3: Consolidation (dry-run)
2089
+ try {
2090
+ const candidates = s.db.prepare(`
2091
+ SELECT id, collection, path, title, hash, confidence
2092
+ FROM documents WHERE active = 1 AND confidence < 0.4
2093
+ ORDER BY confidence ASC LIMIT 50
2094
+ `).all() as { id: number; collection: string; path: string; title: string; hash: string; confidence: number }[];
2095
+
2096
+ let dupes = 0;
2097
+ for (const candidate of candidates) {
2098
+ const similar = s.searchFTS(candidate.title, 5);
2099
+ const candidateBody = s.getDocumentBody({ filepath: `clawmem://${candidate.collection}/${candidate.path}` } as any) || "";
2100
+ for (const r of similar) {
2101
+ if (r.filepath === `clawmem://${candidate.collection}/${candidate.path}`) continue;
2102
+ if (r.score < 0.7 || r.collectionName !== candidate.collection) continue;
2103
+ const matchBody = r.body || "";
2104
+ if (!matchBody || !candidateBody) continue;
2105
+ const wordsA = new Set(candidateBody.toLowerCase().split(/\s+/).filter(w => w.length > 3));
2106
+ const wordsB = new Set(matchBody.toLowerCase().split(/\s+/).filter(w => w.length > 3));
2107
+ if (wordsA.size === 0 || wordsB.size === 0) continue;
2108
+ let intersection = 0;
2109
+ for (const w of wordsA) { if (wordsB.has(w)) intersection++; }
2110
+ const jaccard = intersection / (wordsA.size + wordsB.size - intersection);
2111
+ if (jaccard >= 0.4) { dupes++; break; }
2112
+ }
2113
+ }
2114
+ report.consolidation.candidates = dupes;
2115
+ console.log(` Consolidation: ${dupes} duplicate candidate(s) [dry-run]`);
2116
+ if (dupes > 0) {
2117
+ report.actions.push(`${dupes} duplicate documents found — run \`clawmem consolidate\` to review`);
2118
+ }
2119
+ } catch (err) {
2120
+ console.log(` ${c.red}Consolidation check failed:${c.reset} ${err}`);
2121
+ }
2122
+
2123
+ // Phase 4: Retrieval probe (BM25)
2124
+ try {
2125
+ const results = s.searchFTS("architecture decision", 3);
2126
+ const topScore = results[0]?.score || 0;
2127
+ report.retrieval.bm25Pass = results.length > 0 && topScore > 0.3;
2128
+ report.retrieval.topScore = topScore;
2129
+ console.log(` Retrieval: ${report.retrieval.bm25Pass ? `${c.green}OK${c.reset}` : `${c.red}DEGRADED${c.reset}`} (BM25 top=${topScore.toFixed(3)})`);
2130
+ if (!report.retrieval.bm25Pass) {
2131
+ report.actions.push("Retrieval degraded — BM25 probe returned no strong results");
2132
+ }
2133
+ } catch (err) {
2134
+ console.log(` ${c.red}Retrieval probe failed:${c.reset} ${err}`);
2135
+ report.actions.push("Retrieval probe failed");
2136
+ }
2137
+
2138
+ // Phase 5: Collection hygiene
2139
+ try {
2140
+ const naPct = report.health.active > 0
2141
+ ? Math.round((report.health.neverAccessed / report.health.active) * 100)
2142
+ : 0;
2143
+ report.collections.neverAccessedPct = naPct;
2144
+ if (naPct > 30) {
2145
+ report.actions.push(`${report.health.neverAccessed} documents never accessed (${naPct}%) — consider review`);
2146
+ }
2147
+ } catch {
2148
+ // non-critical
2149
+ }
2150
+
2151
+ // Write report
2152
+ const reportPath = pathResolve(process.env.HOME || "~", ".cache", "clawmem", "curator-report.json");
2153
+ try {
2154
+ mkdirSync(pathResolve(reportPath, ".."), { recursive: true });
2155
+ Bun.write(reportPath, JSON.stringify(report, null, 2));
2156
+ console.log(`\n Report: ${reportPath}`);
2157
+ } catch (err) {
2158
+ console.log(` ${c.red}Failed to write report:${c.reset} ${err}`);
2159
+ }
2160
+
2161
+ // Summary
2162
+ console.log();
2163
+ if (report.actions.length === 0) {
2164
+ console.log(`${c.green}No actions needed.${c.reset}`);
2165
+ } else {
2166
+ console.log(`${c.bold}Actions (${report.actions.length}):${c.reset}`);
2167
+ for (const a of report.actions) {
2168
+ console.log(` ${c.yellow}→${c.reset} ${a}`);
2169
+ }
2170
+ }
2171
+ }
2172
+
1786
2173
  function printHelp() {
1787
2174
  console.log(`
1788
2175
  ${c.bold}ClawMem${c.reset} - Hybrid Agent Memory
@@ -1795,7 +2182,6 @@ ${c.bold}Setup:${c.reset}
1795
2182
  clawmem collection remove <name>
1796
2183
  clawmem setup hooks [--remove] Install/remove Claude Code hooks
1797
2184
  clawmem setup mcp [--remove] Register/remove MCP in ~/.claude.json
1798
- clawmem setup curator [--remove] Install/remove curator agent to ~/.claude/agents/
1799
2185
  clawmem setup openclaw [--remove] Show OpenClaw plugin installation steps
1800
2186
  clawmem install-service [--enable] Install systemd watcher service
1801
2187
 
@@ -1822,16 +2208,24 @@ ${c.bold}Hooks:${c.reset}
1822
2208
  clawmem surface --context --stdin IO6a: pre-prompt context injection
1823
2209
  clawmem surface --bootstrap --stdin IO6b: per-session bootstrap injection
1824
2210
 
1825
- ${c.bold}Analysis:${c.reset}
1826
- clawmem reflect [N] Cross-session reflection (last N days, default 14)
1827
- clawmem consolidate [--dry-run] [N] Find and archive duplicate low-confidence docs
2211
+ ${c.bold}Lifecycle:${c.reset}
2212
+ clawmem lifecycle status Show lifecycle stats + policy
2213
+ clawmem lifecycle sweep [--dry-run] Archive stale docs per policy
2214
+ clawmem lifecycle search <query> Search archived docs (FTS, no restore)
2215
+ clawmem lifecycle restore --query Q Restore archived docs by keyword
2216
+ clawmem lifecycle restore --collection N Restore by collection
2217
+ clawmem lifecycle restore --all Restore all archived docs
2218
+
2219
+ ${c.bold}Intelligence:${c.reset}
2220
+ clawmem reflect [days] Cross-session pattern analysis
2221
+ clawmem consolidate [--dry-run] Merge duplicate low-confidence docs
2222
+ clawmem curate Automated maintenance (health, sweep, dedup, hygiene)
1828
2223
 
1829
2224
  ${c.bold}Integration:${c.reset}
1830
2225
  clawmem mcp Start stdio MCP server
1831
2226
  clawmem serve [--port 7438] [--host 127.0.0.1] Start HTTP REST API server
1832
2227
  clawmem update-context Regenerate all directory CLAUDE.md files
1833
2228
  clawmem doctor Full health check
1834
- clawmem path Print database path
1835
2229
 
1836
2230
  ${c.bold}Options:${c.reset}
1837
2231
  -n, --num <N> Number of results