clawmem 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmem",
3
- "version": "0.1.6",
3
+ "version": "0.1.8",
4
4
  "description": "On-device context engine and memory for AI agents. Claude Code and OpenClaw. Hooks + MCP server + hybrid RAG search.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/clawmem.ts CHANGED
@@ -295,9 +295,9 @@ async function cmdEmbed(args: string[]) {
295
295
  const isCloudEmbed = !!process.env.CLAWMEM_EMBED_API_KEY;
296
296
  const CLOUD_BATCH_SIZE = 50;
297
297
  const CLOUD_TPM_LIMIT = parseInt(process.env.CLAWMEM_EMBED_TPM_LIMIT || "100000", 10);
298
- const CLOUD_TPM_SAFETY = 0.85;
298
+ const CLOUD_TPM_SAFETY = 0.85; // use 85% of limit to leave headroom for retries
299
299
  const CHARS_PER_TOKEN = 4;
300
- let lastBatchSentAt = 0;
300
+ let lastBatchSentAt = 0; // global timestamp of last batch send
301
301
 
302
302
  for (let docIdx = 0; docIdx < hashes.length; docIdx++) {
303
303
  const { hash, body, path, title: docTitle, collection } = hashes[docIdx]!;
@@ -903,6 +903,7 @@ async function cmdServe(args: string[]) {
903
903
  console.log(`ClawMem HTTP server listening on http://${host}:${port}`);
904
904
  console.log(`Token auth: ${process.env.CLAWMEM_API_TOKEN ? "enabled" : "disabled (set CLAWMEM_API_TOKEN)"}`);
905
905
  console.log(`Press Ctrl+C to stop.`);
906
+ // Keep alive
906
907
  await new Promise(() => {});
907
908
  }
908
909
 
@@ -962,11 +963,6 @@ async function cmdSetupHooks(args: string[]) {
962
963
  console.log(`${c.green}Removed ClawMem hooks from ${settingsPath}${c.reset}`);
963
964
  } else {
964
965
  // Install clawmem hooks
965
- // Production-validated hook set:
966
- // - session-bootstrap/staleness-check omitted: context-surfacing on first prompt
967
- // handles retrieval more precisely, and postcompact-inject covers post-compaction.
968
- // session-bootstrap adds ~2000 tokens before the user types anything.
969
- // - timeout wrappers prevent hooks from blocking the session on GPU timeouts.
970
966
  const hookConfig: Record<string, string[]> = {
971
967
  UserPromptSubmit: ["context-surfacing"],
972
968
  SessionStart: ["postcompact-inject", "curator-nudge"],
@@ -974,7 +970,6 @@ async function cmdSetupHooks(args: string[]) {
974
970
  Stop: ["decision-extractor", "handoff-generator", "feedback-loop"],
975
971
  };
976
972
 
977
- // Timeout per event type (seconds)
978
973
  const timeouts: Record<string, number> = {
979
974
  UserPromptSubmit: 8,
980
975
  SessionStart: 5,
@@ -1087,6 +1082,7 @@ async function cmdSetupOpenClaw(args: string[]) {
1087
1082
  return;
1088
1083
  }
1089
1084
 
1085
+ // Check that the OpenClaw plugin files exist
1090
1086
  if (!existsSync(pathResolve(pluginDir, "index.ts"))) {
1091
1087
  die(`OpenClaw plugin files not found at ${pluginDir}`);
1092
1088
  }
@@ -1160,20 +1156,43 @@ async function cmdWatch() {
1160
1156
  const col = collections.find(c => fullPath.startsWith(c.path));
1161
1157
  if (!col) return;
1162
1158
 
1163
- const relativePath = fullPath.slice(col.path.length + 1);
1164
- console.log(`${c.dim}[${event}]${c.reset} ${col.name}/${relativePath}`);
1165
-
1166
1159
  // Beads: trigger sync on any change within .beads/ directory
1167
1160
  // Dolt backend writes to .beads/dolt/ — watch for any file change there
1168
1161
  if (fullPath.includes(".beads/")) {
1169
1162
  const projectDir = detectBeadsProject(fullPath.replace(/\/\.beads\/.*$/, ""));
1170
1163
  if (projectDir) {
1164
+ const relativePath = fullPath.slice(col.path.length + 1);
1165
+ console.log(`${c.dim}[${event}]${c.reset} ${col.name}/${relativePath}`);
1171
1166
  const result = await s.syncBeadsIssues(projectDir);
1172
1167
  console.log(` beads: +${result.created} ~${result.synced}`);
1173
1168
  }
1174
1169
  return;
1175
1170
  }
1176
1171
 
1172
+ // Quick pattern check: skip files that can't match the collection pattern
1173
+ // before touching the DB. This prevents broad path collections (e.g. ~/Projects)
1174
+ // with narrow patterns (e.g. single filename) from triggering DB access on
1175
+ // every .md change under the tree.
1176
+ const relativePath = fullPath.slice(col.path.length + 1);
1177
+ if (col.pattern && col.pattern !== "**/*.md") {
1178
+ const patterns = col.pattern.includes("{")
1179
+ ? col.pattern.replace(/^\{|\}$/g, "").split(",")
1180
+ : [col.pattern];
1181
+ const couldMatch = patterns.some(p => {
1182
+ // Simple glob check: if pattern has no wildcards, it's a filename match
1183
+ if (!p.includes("*") && !p.includes("?")) return relativePath === p || relativePath.endsWith("/" + p);
1184
+ // If pattern starts with **/, any relative path could match
1185
+ if (p.startsWith("**/")) return true;
1186
+ // If pattern has a directory prefix, check it
1187
+ const patternDir = p.substring(0, p.lastIndexOf("/") + 1);
1188
+ if (patternDir) return relativePath.startsWith(patternDir);
1189
+ return true; // Fallback: let indexCollection handle it
1190
+ });
1191
+ if (!couldMatch) return;
1192
+ }
1193
+
1194
+ console.log(`${c.dim}[${event}]${c.reset} ${col.name}/${relativePath}`);
1195
+
1177
1196
  // Re-index just this collection
1178
1197
  const stats = await indexCollection(s, col.name, col.path, col.pattern);
1179
1198
  if (stats.added > 0 || stats.updated > 0 || stats.removed > 0) {
@@ -1185,9 +1204,57 @@ async function cmdWatch() {
1185
1204
  },
1186
1205
  });
1187
1206
 
1207
+ // Skill vault watcher: watch _clawmem-skills/ content root if configured
1208
+ let skillWatcher: { close: () => void } | null = null;
1209
+ try {
1210
+ const { getVaultPath, getSkillContentRoot } = await import("./config.ts");
1211
+ const { resolveStore } = await import("./store.ts");
1212
+ const skillVaultPath = getVaultPath("skill");
1213
+ const skillRoot = getSkillContentRoot();
1214
+
1215
+ if (skillVaultPath && existsSync(skillRoot)) {
1216
+ const skillStore = resolveStore("skill");
1217
+ console.log(`${c.bold}Watching skill vault content root...${c.reset}`);
1218
+ console.log(` ${c.dim}skill: ${skillRoot} → ${skillVaultPath}${c.reset}`);
1219
+
1220
+ skillWatcher = startWatcher([skillRoot], {
1221
+ debounceMs: 2000,
1222
+ onChanged: async (fullPath, event) => {
1223
+ const relativePath = fullPath.slice(skillRoot.length + 1);
1224
+ console.log(`${c.dim}[${event}]${c.reset} skill/${relativePath}`);
1225
+
1226
+ const stats = await indexCollection(skillStore, "skill-observations", skillRoot, "**/*.md");
1227
+ if (stats.added > 0 || stats.updated > 0 || stats.removed > 0) {
1228
+ console.log(` skill: +${stats.added} ~${stats.updated} -${stats.removed}`);
1229
+ }
1230
+ },
1231
+ onError: (err) => {
1232
+ console.error(`${c.red}Skill watch error: ${err.message}${c.reset}`);
1233
+ },
1234
+ });
1235
+ }
1236
+ } catch {
1237
+ // Skill vault not configured — skip
1238
+ }
1239
+
1240
+ // Periodic WAL checkpoint: the watcher holds a long-lived DB connection which
1241
+ // prevents SQLite auto-checkpoint from shrinking the WAL file. Without this,
1242
+ // the WAL grows unbounded (observed 77MB+), slowing every concurrent DB access
1243
+ // (hooks, MCP) and eventually causing UserPromptSubmit hook timeouts.
1244
+ const WAL_CHECKPOINT_INTERVAL = 5 * 60 * 1000; // 5 minutes
1245
+ const checkpointTimer = setInterval(() => {
1246
+ try {
1247
+ s.db.exec("PRAGMA wal_checkpoint(PASSIVE)");
1248
+ } catch {
1249
+ // Checkpoint failed (busy) — will retry next interval
1250
+ }
1251
+ }, WAL_CHECKPOINT_INTERVAL);
1252
+
1188
1253
  // Keep running until Ctrl+C
1189
1254
  process.on("SIGINT", () => {
1255
+ clearInterval(checkpointTimer);
1190
1256
  watcher.close();
1257
+ skillWatcher?.close();
1191
1258
  closeStore();
1192
1259
  process.exit(0);
1193
1260
  });
@@ -1585,6 +1652,9 @@ async function main() {
1585
1652
  case "doctor":
1586
1653
  await cmdDoctor();
1587
1654
  break;
1655
+ case "path":
1656
+ cmdPath();
1657
+ break;
1588
1658
  case "bootstrap":
1589
1659
  await cmdBootstrap(subArgs);
1590
1660
  break;
@@ -1600,15 +1670,18 @@ async function main() {
1600
1670
  case "surface":
1601
1671
  await cmdSurface(subArgs);
1602
1672
  break;
1673
+ case "lifecycle":
1674
+ await cmdLifecycle(subArgs);
1675
+ break;
1603
1676
  case "reflect":
1604
1677
  await cmdReflect(subArgs);
1605
1678
  break;
1606
- case "path":
1607
- cmdPath();
1608
- break;
1609
1679
  case "consolidate":
1610
1680
  await cmdConsolidate(subArgs);
1611
1681
  break;
1682
+ case "curate":
1683
+ await cmdCurate(subArgs);
1684
+ break;
1612
1685
  case "help":
1613
1686
  case "--help":
1614
1687
  case "-h":
@@ -1623,17 +1696,157 @@ async function main() {
1623
1696
  }
1624
1697
  }
1625
1698
 
1699
+ async function cmdLifecycle(args: string[]) {
1700
+ const subCmd = args[0];
1701
+ const subArgs = args.slice(1);
1702
+
1703
+ switch (subCmd) {
1704
+ case "status": {
1705
+ const store = getStore();
1706
+ const stats = store.getLifecycleStats();
1707
+ const { loadVaultConfig } = await import("./config.ts");
1708
+ const config = loadVaultConfig();
1709
+ const policy = config.lifecycle;
1710
+
1711
+ console.log(`Active: ${stats.active}`);
1712
+ console.log(`Archived (auto): ${stats.archived}`);
1713
+ console.log(`Forgotten (manual): ${stats.forgotten}`);
1714
+ console.log(`Pinned: ${stats.pinned}`);
1715
+ console.log(`Snoozed: ${stats.snoozed}`);
1716
+ console.log(`Never accessed: ${stats.neverAccessed}`);
1717
+ console.log(`Oldest access: ${stats.oldestAccess?.slice(0, 10) || "n/a"}`);
1718
+ console.log();
1719
+ if (policy) {
1720
+ console.log(`Policy: archive after ${policy.archive_after_days}d, purge after ${policy.purge_after_days ?? "never"}, dry_run=${policy.dry_run}`);
1721
+ if (policy.exempt_collections.length > 0) {
1722
+ console.log(`Exempt: ${policy.exempt_collections.join(", ")}`);
1723
+ }
1724
+ if (Object.keys(policy.type_overrides).length > 0) {
1725
+ const overrides = Object.entries(policy.type_overrides)
1726
+ .map(([k, v]) => `${k}=${v === null ? "never" : v + "d"}`)
1727
+ .join(", ");
1728
+ console.log(`Type overrides: ${overrides}`);
1729
+ }
1730
+ } else {
1731
+ console.log("Policy: none configured");
1732
+ }
1733
+ break;
1734
+ }
1735
+
1736
+ case "sweep": {
1737
+ const { values } = parseArgs({
1738
+ args: subArgs,
1739
+ options: { "dry-run": { type: "boolean", default: false } },
1740
+ allowPositionals: false,
1741
+ });
1742
+ const dryRun = values["dry-run"];
1743
+
1744
+ const { loadVaultConfig } = await import("./config.ts");
1745
+ const config = loadVaultConfig();
1746
+ const policy = config.lifecycle;
1747
+ if (!policy) {
1748
+ die("No lifecycle policy configured in config.yaml");
1749
+ return;
1750
+ }
1751
+
1752
+ const store = getStore();
1753
+ const candidates = store.getArchiveCandidates(policy);
1754
+
1755
+ if (dryRun || policy.dry_run) {
1756
+ console.log(`Would archive ${candidates.length} document(s):`);
1757
+ for (const c of candidates) {
1758
+ console.log(` - ${c.collection}/${c.path} (${c.content_type}, modified ${c.modified_at.slice(0, 10)}, accessed ${c.last_accessed_at?.slice(0, 10) || "never"})`);
1759
+ }
1760
+ if (candidates.length === 0) console.log(" (none)");
1761
+ return;
1762
+ }
1763
+
1764
+ const archived = store.archiveDocuments(candidates.map(c => c.id));
1765
+ let purged = 0;
1766
+ if (policy.purge_after_days) {
1767
+ purged = store.purgeArchivedDocuments(policy.purge_after_days);
1768
+ }
1769
+ console.log(`Lifecycle sweep: archived ${archived}, purged ${purged}`);
1770
+ break;
1771
+ }
1772
+
1773
+ case "restore": {
1774
+ const { values } = parseArgs({
1775
+ args: subArgs,
1776
+ options: {
1777
+ query: { type: "string" },
1778
+ collection: { type: "string" },
1779
+ all: { type: "boolean", default: false },
1780
+ },
1781
+ allowPositionals: false,
1782
+ });
1783
+
1784
+ const store = getStore();
1785
+
1786
+ if (values.query) {
1787
+ const results = store.searchArchived(values.query, 20);
1788
+
1789
+ if (results.length === 0) {
1790
+ console.log("No archived documents match that query.");
1791
+ return;
1792
+ }
1793
+
1794
+ const restored = store.restoreArchivedDocuments({ ids: results.map(r => r.id) });
1795
+ console.log(`Restored ${restored}:`);
1796
+ for (const r of results) {
1797
+ console.log(` - ${r.collection}/${r.path} (archived ${r.archived_at?.slice(0, 10)})`);
1798
+ }
1799
+ } else if (values.collection) {
1800
+ const restored = store.restoreArchivedDocuments({ collection: values.collection });
1801
+ console.log(`Restored ${restored} documents from collection "${values.collection}"`);
1802
+ } else if (values.all) {
1803
+ const restored = store.restoreArchivedDocuments({});
1804
+ console.log(`Restored ${restored} archived documents`);
1805
+ } else {
1806
+ die("Usage: clawmem lifecycle restore --query <term> | --collection <name> | --all");
1807
+ }
1808
+ break;
1809
+ }
1810
+
1811
+ case "search": {
1812
+ const query = subArgs.join(" ").trim();
1813
+ if (!query) {
1814
+ die("Usage: clawmem lifecycle search <query>");
1815
+ return;
1816
+ }
1817
+
1818
+ const store = getStore();
1819
+ const results = store.searchArchived(query);
1820
+
1821
+ if (results.length === 0) {
1822
+ console.log("No archived documents match that query.");
1823
+ return;
1824
+ }
1825
+
1826
+ console.log(`Found ${results.length} archived document(s):\n`);
1827
+ for (const r of results) {
1828
+ console.log(` [${r.score.toFixed(3)}] ${r.collection}/${r.path}`);
1829
+ console.log(` ${r.title} (archived ${r.archived_at?.slice(0, 10)})`);
1830
+ }
1831
+ break;
1832
+ }
1833
+
1834
+ default:
1835
+ die("Usage: clawmem lifecycle <status|sweep|search|restore>");
1836
+ }
1837
+ }
1838
+
1626
1839
  // =============================================================================
1627
- // Cross-Session Reflection
1840
+ // Cross-Session Reflection (E5)
1628
1841
  // =============================================================================
1629
1842
 
1630
1843
  async function cmdReflect(args: string[]) {
1631
- const s = getStore();
1844
+ const store = getStore();
1632
1845
  const days = parseInt(args[0] || "14");
1633
1846
  const cutoff = new Date();
1634
1847
  cutoff.setDate(cutoff.getDate() - days);
1635
1848
 
1636
- const recentDocs = s.getDocumentsByType("decision", 50)
1849
+ const recentDocs = store.getDocumentsByType("decision", 50)
1637
1850
  .filter(d => d.modifiedAt && d.modifiedAt >= cutoff.toISOString());
1638
1851
 
1639
1852
  if (recentDocs.length === 0) {
@@ -1648,15 +1861,15 @@ async function cmdReflect(args: string[]) {
1648
1861
  const stopWords = new Set(["the", "that", "this", "with", "from", "have", "will", "been", "were", "they", "their", "what", "when", "which", "about", "into", "more", "some", "than", "them", "then", "very", "also", "just", "should", "would", "could", "does", "make", "like", "using", "used"]);
1649
1862
 
1650
1863
  for (const d of recentDocs) {
1651
- const doc = s.findDocument(d.path);
1864
+ const doc = store.findDocument(d.path);
1652
1865
  if ("error" in doc) continue;
1653
- const body = s.getDocumentBody(doc) || "";
1866
+ const body = store.getDocumentBody(doc) || "";
1654
1867
  const words = body.toLowerCase()
1655
1868
  .replace(/[^a-z0-9\s-]/g, " ")
1656
1869
  .split(/\s+/)
1657
1870
  .filter(w => w.length > 3 && !stopWords.has(w));
1658
1871
 
1659
- // Ordered bigrams (preserve phrase direction)
1872
+ // M2: Ordered bigrams (preserve phrase direction)
1660
1873
  for (let i = 0; i < words.length - 1; i++) {
1661
1874
  const pair = `${words[i]!} ${words[i + 1]!}`;
1662
1875
  phrases.set(pair, (phrases.get(pair) || 0) + 1);
@@ -1672,6 +1885,7 @@ async function cmdReflect(args: string[]) {
1672
1885
  const patterns = [...phrases.entries()]
1673
1886
  .filter(([, count]) => count >= 3)
1674
1887
  .sort((a, b) => {
1888
+ // Prefer trigrams over bigrams at same count
1675
1889
  const lenDiff = b[0].split(" ").length - a[0].split(" ").length;
1676
1890
  return b[1] - a[1] || lenDiff;
1677
1891
  })
@@ -1687,7 +1901,7 @@ async function cmdReflect(args: string[]) {
1687
1901
  }
1688
1902
 
1689
1903
  // Also report antipatterns
1690
- const antiDocs = s.getDocumentsByType("antipattern", 10)
1904
+ const antiDocs = store.getDocumentsByType("antipattern", 10)
1691
1905
  .filter(d => d.modifiedAt && d.modifiedAt >= cutoff.toISOString());
1692
1906
 
1693
1907
  if (antiDocs.length > 0) {
@@ -1698,7 +1912,7 @@ async function cmdReflect(args: string[]) {
1698
1912
  }
1699
1913
 
1700
1914
  // Co-activation clusters
1701
- const coActs = s.db.prepare(`
1915
+ const coActs = store.db.prepare(`
1702
1916
  SELECT doc_a, doc_b, count FROM co_activations
1703
1917
  WHERE count >= 3
1704
1918
  ORDER BY count DESC
@@ -1718,12 +1932,12 @@ async function cmdReflect(args: string[]) {
1718
1932
  // =============================================================================
1719
1933
 
1720
1934
  async function cmdConsolidate(args: string[]) {
1721
- const s = getStore();
1935
+ const store = getStore();
1722
1936
  const dryRun = args.includes("--dry-run");
1723
1937
  const maxDocs = parseInt(args.find(a => /^\d+$/.test(a)) || "50");
1724
1938
 
1725
1939
  // Find low-confidence documents that might be duplicates
1726
- const candidates = s.db.prepare(`
1940
+ const candidates = store.db.prepare(`
1727
1941
  SELECT id, collection, path, title, hash, confidence, modified_at
1728
1942
  FROM documents
1729
1943
  WHERE active = 1 AND confidence < 0.4
@@ -1742,18 +1956,18 @@ async function cmdConsolidate(args: string[]) {
1742
1956
 
1743
1957
  for (const candidate of candidates) {
1744
1958
  // BM25 search with title as query to find similar docs
1745
- const similar = s.searchFTS(candidate.title, 5);
1746
- const candidateBody = s.getDocumentBody({ filepath: `clawmem://${candidate.collection}/${candidate.path}` } as any) || "";
1959
+ const similar = store.searchFTS(candidate.title, 5);
1960
+ const candidateBody = store.getDocumentBody({ filepath: `clawmem://${candidate.collection}/${candidate.path}` } as any) || "";
1747
1961
 
1748
1962
  const matches = similar.filter(r => {
1749
1963
  if (r.filepath === `clawmem://${candidate.collection}/${candidate.path}`) return false;
1750
1964
  if (r.score < 0.7) return false;
1751
1965
 
1752
- // Require same collection
1966
+ // M1: Require same collection
1753
1967
  const rCollection = r.collectionName;
1754
1968
  if (rCollection !== candidate.collection) return false;
1755
1969
 
1756
- // Require body similarity (Jaccard on word sets)
1970
+ // M1: Require body similarity (Jaccard on word sets)
1757
1971
  const matchBody = r.body || "";
1758
1972
  if (matchBody.length === 0 || candidateBody.length === 0) return false;
1759
1973
  const wordsA = new Set(candidateBody.toLowerCase().split(/\s+/).filter(w => w.length > 3));
@@ -1774,7 +1988,8 @@ async function cmdConsolidate(args: string[]) {
1774
1988
  console.log(` ${c.green}Keep:${c.reset} ${bestMatch.displayPath} (score: ${bestMatch.score.toFixed(3)})`);
1775
1989
 
1776
1990
  if (!dryRun) {
1777
- s.archiveDocuments([candidate.id]);
1991
+ // Archive the lower-confidence duplicate
1992
+ store.archiveDocuments([candidate.id]);
1778
1993
  mergeCount++;
1779
1994
  }
1780
1995
  console.log();
@@ -1783,6 +1998,192 @@ async function cmdConsolidate(args: string[]) {
1783
1998
  console.log(`${dryRun ? "Would consolidate" : "Consolidated"}: ${mergeCount} document(s)`);
1784
1999
  }
1785
2000
 
2001
+ // =============================================================================
2002
+ // Curate — automated maintenance (designed for cron/timer)
2003
+ // =============================================================================
2004
+
2005
+ interface CuratorReport {
2006
+ timestamp: string;
2007
+ health: {
2008
+ active: number;
2009
+ archived: number;
2010
+ forgotten: number;
2011
+ pinned: number;
2012
+ snoozed: number;
2013
+ neverAccessed: number;
2014
+ embeddingBacklog: number;
2015
+ infrastructure: string;
2016
+ };
2017
+ sweep: { candidates: number };
2018
+ consolidation: { candidates: number };
2019
+ retrieval: { bm25Pass: boolean; topScore: number };
2020
+ collections: { total: number; orphaned: string[]; neverAccessedPct: number };
2021
+ actions: string[];
2022
+ }
2023
+
2024
+ async function cmdCurate(_args: string[]) {
2025
+ const s = getStore();
2026
+ const report: CuratorReport = {
2027
+ timestamp: new Date().toISOString(),
2028
+ health: { active: 0, archived: 0, forgotten: 0, pinned: 0, snoozed: 0, neverAccessed: 0, embeddingBacklog: 0, infrastructure: "healthy" },
2029
+ sweep: { candidates: 0 },
2030
+ consolidation: { candidates: 0 },
2031
+ retrieval: { bm25Pass: false, topScore: 0 },
2032
+ collections: { total: 0, orphaned: [], neverAccessedPct: 0 },
2033
+ actions: [],
2034
+ };
2035
+
2036
+ console.log(`${c.bold}ClawMem Curator${c.reset} — ${new Date().toISOString().slice(0, 10)}\n`);
2037
+
2038
+ // Phase 0: Health snapshot
2039
+ try {
2040
+ const stats = s.getLifecycleStats();
2041
+ const status = s.getStatus();
2042
+ report.health = {
2043
+ active: stats.active,
2044
+ archived: stats.archived,
2045
+ forgotten: stats.forgotten,
2046
+ pinned: stats.pinned,
2047
+ snoozed: stats.snoozed,
2048
+ neverAccessed: stats.neverAccessed,
2049
+ embeddingBacklog: status.needsEmbedding,
2050
+ infrastructure: "healthy",
2051
+ };
2052
+ console.log(` Documents: ${stats.active} active, ${stats.archived} archived, ${stats.forgotten} forgotten`);
2053
+ console.log(` Pinned: ${stats.pinned} | Snoozed: ${stats.snoozed} | Never accessed: ${stats.neverAccessed}`);
2054
+ console.log(` Embedding backlog: ${status.needsEmbedding}`);
2055
+ if (status.needsEmbedding > 0) {
2056
+ report.actions.push(`${status.needsEmbedding} documents need embedding`);
2057
+ }
2058
+ } catch (err) {
2059
+ console.log(` ${c.red}Health snapshot failed:${c.reset} ${err}`);
2060
+ report.health.infrastructure = "error";
2061
+ }
2062
+
2063
+ // Phase 1: Doctor (infrastructure)
2064
+ try {
2065
+ let issues = 0;
2066
+ const collections = collectionsList();
2067
+ for (const col of collections) {
2068
+ if (!existsSync(col.path)) {
2069
+ report.collections.orphaned.push(col.name);
2070
+ issues++;
2071
+ }
2072
+ }
2073
+ report.collections.total = collections.length;
2074
+ if (issues > 0) {
2075
+ report.health.infrastructure = `${issues} issue(s)`;
2076
+ report.actions.push(`${issues} orphaned collection(s): ${report.collections.orphaned.join(", ")}`);
2077
+ }
2078
+ console.log(` Infrastructure: ${issues === 0 ? `${c.green}healthy${c.reset}` : `${c.yellow}${issues} issue(s)${c.reset}`}`);
2079
+ } catch (err) {
2080
+ console.log(` ${c.red}Doctor failed:${c.reset} ${err}`);
2081
+ }
2082
+
2083
+ // Phase 2: Lifecycle sweep (dry-run)
2084
+ console.log();
2085
+ try {
2086
+ const { loadVaultConfig } = await import("./config.ts");
2087
+ const config = loadVaultConfig();
2088
+ if (config.lifecycle) {
2089
+ const candidates = s.getArchiveCandidates(config.lifecycle);
2090
+ report.sweep.candidates = candidates.length;
2091
+ console.log(` Sweep: ${candidates.length} archive candidate(s) [dry-run]`);
2092
+ if (candidates.length > 0) {
2093
+ report.actions.push(`${candidates.length} documents eligible for archival`);
2094
+ }
2095
+ } else {
2096
+ console.log(` Sweep: no lifecycle policy configured`);
2097
+ }
2098
+ } catch (err) {
2099
+ console.log(` ${c.red}Sweep failed:${c.reset} ${err}`);
2100
+ }
2101
+
2102
+ // Phase 3: Consolidation (dry-run)
2103
+ try {
2104
+ const candidates = s.db.prepare(`
2105
+ SELECT id, collection, path, title, hash, confidence
2106
+ FROM documents WHERE active = 1 AND confidence < 0.4
2107
+ ORDER BY confidence ASC LIMIT 50
2108
+ `).all() as { id: number; collection: string; path: string; title: string; hash: string; confidence: number }[];
2109
+
2110
+ let dupes = 0;
2111
+ for (const candidate of candidates) {
2112
+ const similar = s.searchFTS(candidate.title, 5);
2113
+ const candidateBody = s.getDocumentBody({ filepath: `clawmem://${candidate.collection}/${candidate.path}` } as any) || "";
2114
+ for (const r of similar) {
2115
+ if (r.filepath === `clawmem://${candidate.collection}/${candidate.path}`) continue;
2116
+ if (r.score < 0.7 || r.collectionName !== candidate.collection) continue;
2117
+ const matchBody = r.body || "";
2118
+ if (!matchBody || !candidateBody) continue;
2119
+ const wordsA = new Set(candidateBody.toLowerCase().split(/\s+/).filter(w => w.length > 3));
2120
+ const wordsB = new Set(matchBody.toLowerCase().split(/\s+/).filter(w => w.length > 3));
2121
+ if (wordsA.size === 0 || wordsB.size === 0) continue;
2122
+ let intersection = 0;
2123
+ for (const w of wordsA) { if (wordsB.has(w)) intersection++; }
2124
+ const jaccard = intersection / (wordsA.size + wordsB.size - intersection);
2125
+ if (jaccard >= 0.4) { dupes++; break; }
2126
+ }
2127
+ }
2128
+ report.consolidation.candidates = dupes;
2129
+ console.log(` Consolidation: ${dupes} duplicate candidate(s) [dry-run]`);
2130
+ if (dupes > 0) {
2131
+ report.actions.push(`${dupes} duplicate documents found — run \`clawmem consolidate\` to review`);
2132
+ }
2133
+ } catch (err) {
2134
+ console.log(` ${c.red}Consolidation check failed:${c.reset} ${err}`);
2135
+ }
2136
+
2137
+ // Phase 4: Retrieval probe (BM25)
2138
+ try {
2139
+ const results = s.searchFTS("architecture decision", 3);
2140
+ const topScore = results[0]?.score || 0;
2141
+ report.retrieval.bm25Pass = results.length > 0 && topScore > 0.3;
2142
+ report.retrieval.topScore = topScore;
2143
+ console.log(` Retrieval: ${report.retrieval.bm25Pass ? `${c.green}OK${c.reset}` : `${c.red}DEGRADED${c.reset}`} (BM25 top=${topScore.toFixed(3)})`);
2144
+ if (!report.retrieval.bm25Pass) {
2145
+ report.actions.push("Retrieval degraded — BM25 probe returned no strong results");
2146
+ }
2147
+ } catch (err) {
2148
+ console.log(` ${c.red}Retrieval probe failed:${c.reset} ${err}`);
2149
+ report.actions.push("Retrieval probe failed");
2150
+ }
2151
+
2152
+ // Phase 5: Collection hygiene
2153
+ try {
2154
+ const naPct = report.health.active > 0
2155
+ ? Math.round((report.health.neverAccessed / report.health.active) * 100)
2156
+ : 0;
2157
+ report.collections.neverAccessedPct = naPct;
2158
+ if (naPct > 30) {
2159
+ report.actions.push(`${report.health.neverAccessed} documents never accessed (${naPct}%) — consider review`);
2160
+ }
2161
+ } catch {
2162
+ // non-critical
2163
+ }
2164
+
2165
+ // Write report
2166
+ const reportPath = pathResolve(process.env.HOME || "~", ".cache", "clawmem", "curator-report.json");
2167
+ try {
2168
+ mkdirSync(pathResolve(reportPath, ".."), { recursive: true });
2169
+ Bun.write(reportPath, JSON.stringify(report, null, 2));
2170
+ console.log(`\n Report: ${reportPath}`);
2171
+ } catch (err) {
2172
+ console.log(` ${c.red}Failed to write report:${c.reset} ${err}`);
2173
+ }
2174
+
2175
+ // Summary
2176
+ console.log();
2177
+ if (report.actions.length === 0) {
2178
+ console.log(`${c.green}No actions needed.${c.reset}`);
2179
+ } else {
2180
+ console.log(`${c.bold}Actions (${report.actions.length}):${c.reset}`);
2181
+ for (const a of report.actions) {
2182
+ console.log(` ${c.yellow}→${c.reset} ${a}`);
2183
+ }
2184
+ }
2185
+ }
2186
+
1786
2187
  function printHelp() {
1787
2188
  console.log(`
1788
2189
  ${c.bold}ClawMem${c.reset} - Hybrid Agent Memory
@@ -1795,7 +2196,6 @@ ${c.bold}Setup:${c.reset}
1795
2196
  clawmem collection remove <name>
1796
2197
  clawmem setup hooks [--remove] Install/remove Claude Code hooks
1797
2198
  clawmem setup mcp [--remove] Register/remove MCP in ~/.claude.json
1798
- clawmem setup curator [--remove] Install/remove curator agent to ~/.claude/agents/
1799
2199
  clawmem setup openclaw [--remove] Show OpenClaw plugin installation steps
1800
2200
  clawmem install-service [--enable] Install systemd watcher service
1801
2201
 
@@ -1822,16 +2222,24 @@ ${c.bold}Hooks:${c.reset}
1822
2222
  clawmem surface --context --stdin IO6a: pre-prompt context injection
1823
2223
  clawmem surface --bootstrap --stdin IO6b: per-session bootstrap injection
1824
2224
 
1825
- ${c.bold}Analysis:${c.reset}
1826
- clawmem reflect [N] Cross-session reflection (last N days, default 14)
1827
- clawmem consolidate [--dry-run] [N] Find and archive duplicate low-confidence docs
2225
+ ${c.bold}Lifecycle:${c.reset}
2226
+ clawmem lifecycle status Show lifecycle stats + policy
2227
+ clawmem lifecycle sweep [--dry-run] Archive stale docs per policy
2228
+ clawmem lifecycle search <query> Search archived docs (FTS, no restore)
2229
+ clawmem lifecycle restore --query Q Restore archived docs by keyword
2230
+ clawmem lifecycle restore --collection N Restore by collection
2231
+ clawmem lifecycle restore --all Restore all archived docs
2232
+
2233
+ ${c.bold}Intelligence:${c.reset}
2234
+ clawmem reflect [days] Cross-session pattern analysis
2235
+ clawmem consolidate [--dry-run] Merge duplicate low-confidence docs
2236
+ clawmem curate Automated maintenance (health, sweep, dedup, hygiene)
1828
2237
 
1829
2238
  ${c.bold}Integration:${c.reset}
1830
2239
  clawmem mcp Start stdio MCP server
1831
2240
  clawmem serve [--port 7438] [--host 127.0.0.1] Start HTTP REST API server
1832
2241
  clawmem update-context Regenerate all directory CLAUDE.md files
1833
2242
  clawmem doctor Full health check
1834
- clawmem path Print database path
1835
2243
 
1836
2244
  ${c.bold}Options:${c.reset}
1837
2245
  -n, --num <N> Number of results
@@ -51,6 +51,33 @@ export async function stalenessCheck(
51
51
  }
52
52
  }
53
53
 
54
+ // Auto-archive if lifecycle policy is configured (runs regardless of stale report results)
55
+ try {
56
+ const { loadVaultConfig } = await import("../config.ts");
57
+ const config = loadVaultConfig();
58
+ if (config.lifecycle) {
59
+ const candidates = store.getArchiveCandidates(config.lifecycle);
60
+ if (candidates.length > 0 && !config.lifecycle.dry_run) {
61
+ const archived = store.archiveDocuments(candidates.map(c => c.id));
62
+ if (archived > 0 && input.sessionId) {
63
+ store.insertUsage({
64
+ sessionId: input.sessionId,
65
+ timestamp: now.toISOString(),
66
+ hookName: "lifecycle-archive",
67
+ injectedPaths: candidates.map(c => `${c.collection}/${c.path}`),
68
+ estimatedTokens: 0,
69
+ wasReferenced: 0,
70
+ });
71
+ }
72
+ }
73
+ if (config.lifecycle.purge_after_days && !config.lifecycle.dry_run) {
74
+ store.purgeArchivedDocuments(config.lifecycle.purge_after_days);
75
+ }
76
+ }
77
+ } catch {
78
+ // Fail-open: lifecycle errors never block the hook
79
+ }
80
+
54
81
  if (allStale.size === 0) return makeEmptyOutput("staleness-check");
55
82
 
56
83
  // Build context within budget
package/src/hooks.ts CHANGED
@@ -84,6 +84,7 @@ const HOOK_EVENT_MAP: Record<string, string | null> = {
84
84
  "feedback-loop": null, // Stop — no hookSpecificOutput
85
85
  "precompact-extract": null, // PreCompact — side-effect only, no context injection
86
86
  "postcompact-inject": "SessionStart", // SessionStart(compact) — injects additionalContext
87
+ "curator-nudge": "SessionStart", // SessionStart — surfaces curator report actions
87
88
  "pretool-inject": null, // PreToolUse — disabled (cannot inject additionalContext; E13 folded into context-surfacing)
88
89
  };
89
90
 
@@ -19,6 +19,36 @@ import { resolveClawMemBin, execHook, parseHookOutput, extractContext } from "./
19
19
  import type { ClawMemConfig } from "./shell.js";
20
20
  import { createTools } from "./tools.js";
21
21
 
22
+ // =============================================================================
23
+ // Prompt Cleaning (strips OpenClaw noise for better retrieval)
24
+ // Pattern extracted from memory-core-plus (MIT, aloong-planet)
25
+ // =============================================================================
26
+
27
+ /**
28
+ * Strip OpenClaw-specific noise from the user prompt before using it as a
29
+ * search query. Gateway prompts contain metadata, system events, timestamps,
30
+ * and previously injected context that degrade embedding/BM25 quality.
31
+ */
32
+ function cleanPromptForSearch(prompt: string): string {
33
+ let cleaned = prompt;
34
+ // Strip previously injected vault-context (avoid re-searching our own output)
35
+ cleaned = cleaned.replace(/<vault-context>[\s\S]*?<\/vault-context>/g, "");
36
+ cleaned = cleaned.replace(/<vault-routing>[\s\S]*?<\/vault-routing>/g, "");
37
+ cleaned = cleaned.replace(/<vault-session>[\s\S]*?<\/vault-session>/g, "");
38
+ // Strip OpenClaw sender metadata block
39
+ cleaned = cleaned.replace(/Sender\s*\(untrusted metadata\)\s*:\s*```json\n[\s\S]*?```/g, "");
40
+ cleaned = cleaned.replace(/Sender\s*\(untrusted metadata\)\s*:\s*\{[\s\S]*?\}\s*/g, "");
41
+ // Strip OpenClaw runtime context blocks
42
+ cleaned = cleaned.replace(/OpenClaw runtime context \(internal\):[\s\S]*?(?=\n\n|\n?$)/g, "");
43
+ // Strip "System: ..." single-line event entries
44
+ cleaned = cleaned.replace(/^System:.*$/gm, "");
45
+ // Strip timestamp prefixes e.g. "[Sat 2026-03-14 16:19 GMT+8] "
46
+ cleaned = cleaned.replace(/^\[.*?GMT[+-]\d+\]\s*/gm, "");
47
+ // Collapse excessive whitespace
48
+ cleaned = cleaned.replace(/\n{3,}/g, "\n\n").trim();
49
+ return cleaned || prompt;
50
+ }
51
+
22
52
  // =============================================================================
23
53
  // Plugin Definition
24
54
  // =============================================================================
@@ -101,9 +131,11 @@ const clawmemPlugin = {
101
131
  }
102
132
 
103
133
  // Every turn: run context-surfacing for prompt-aware retrieval
134
+ // Clean the prompt to remove OpenClaw noise before search
135
+ const searchPrompt = cleanPromptForSearch(event.prompt);
104
136
  const surfacingResult = await execHook(cfg, "context-surfacing", {
105
137
  session_id: sessionId,
106
- prompt: event.prompt,
138
+ prompt: searchPrompt,
107
139
  });
108
140
 
109
141
  if (surfacingResult.exitCode === 0) {