@lh8ppl/claude-memory-kit 0.2.4 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +16 -10
  2. package/bin/cmk-capture-prompt.mjs +21 -1
  3. package/package.json +2 -1
  4. package/src/audit-log.mjs +1 -0
  5. package/src/auto-drain.mjs +17 -1
  6. package/src/auto-extract.mjs +72 -16
  7. package/src/auto-persona.mjs +86 -1
  8. package/src/capture-prompt.mjs +34 -1
  9. package/src/capture-turn.mjs +64 -6
  10. package/src/config-core.mjs +161 -0
  11. package/src/conflict-queue.mjs +20 -3
  12. package/src/content-hash.mjs +30 -0
  13. package/src/doctor.mjs +62 -3
  14. package/src/forget.mjs +13 -0
  15. package/src/frontmatter.mjs +4 -1
  16. package/src/import-anthropic-memory.mjs +25 -1
  17. package/src/import-claude-md.mjs +333 -0
  18. package/src/index-db.mjs +39 -0
  19. package/src/index-rebuild.mjs +48 -4
  20. package/src/index.mjs +10 -0
  21. package/src/inject-context.mjs +179 -7
  22. package/src/install.mjs +180 -1
  23. package/src/mcp-server.mjs +63 -8
  24. package/src/memory-health.mjs +229 -0
  25. package/src/memory-write.mjs +32 -10
  26. package/src/merge-facts.mjs +12 -0
  27. package/src/native-binding.mjs +142 -0
  28. package/src/poison-guard.mjs +55 -0
  29. package/src/provenance.mjs +4 -0
  30. package/src/remember-core.mjs +53 -8
  31. package/src/repair.mjs +20 -3
  32. package/src/result-shapes.mjs +1 -1
  33. package/src/scratchpad.mjs +5 -3
  34. package/src/search.mjs +96 -9
  35. package/src/semantic-backend.mjs +599 -0
  36. package/src/settings-hooks.mjs +4 -1
  37. package/src/subcommands.mjs +359 -42
  38. package/src/transcript-index.mjs +165 -0
  39. package/src/turn-tools.mjs +179 -0
  40. package/src/write-fact.mjs +34 -3
  41. package/template/.claude/skills/memory-search/SKILL.md +86 -0
  42. package/template/.gitattributes.fragment +16 -0
  43. package/template/CLAUDE.md.template +3 -1
package/src/repair.mjs CHANGED
@@ -159,12 +159,25 @@ function repairLocks({ projectRoot, userDir, staleLockMs, now, ts }) {
159
159
  * @param {Function} [opts.reindexer] test-injected reindex function; defaults to import('./index-rebuild.mjs').reindexFull
160
160
  */
161
161
  async function repairIndex({ projectRoot, userDir, reindexer }) {
162
+ // Production reindexFull requires a `db` (it calls db.exec) — repairIndex
163
+ // must open + own + close it, exactly like runReindex does. The earlier
164
+ // code called reindexFull({projectRoot,userDir}) with NO db, so
165
+ // `cmk repair --index`/`--all` threw "undefined (reading 'exec')" since
166
+ // Task 49 (cut-gate v0.3.1 finding — every test mocked the reindexer, so
167
+ // the real call-shape was never exercised). An injected reindexer (tests)
168
+ // takes whatever args it wants; we only open a db for the real one.
162
169
  let reindexFn = reindexer;
170
+ let db = null;
163
171
  if (!reindexFn) {
164
- const mod = await import('./index-rebuild.mjs');
165
- reindexFn = mod.reindexFull;
172
+ const [{ reindexFull }, { openIndexDb }] = await Promise.all([
173
+ import('./index-rebuild.mjs'),
174
+ import('./index-db.mjs'),
175
+ ]);
176
+ reindexFn = reindexFull;
177
+ db = openIndexDb({ projectRoot });
166
178
  }
167
179
  if (typeof reindexFn !== 'function') {
180
+ if (db) db.close();
168
181
  return {
169
182
  kind: 'index',
170
183
  changed: false,
@@ -172,7 +185,9 @@ async function repairIndex({ projectRoot, userDir, reindexer }) {
172
185
  };
173
186
  }
174
187
  try {
175
- const r = await reindexFn({ projectRoot, userDir });
188
+ const r = db
189
+ ? await reindexFn({ projectRoot, userDir, db })
190
+ : await reindexFn({ projectRoot, userDir });
176
191
  return {
177
192
  kind: 'index',
178
193
  changed: true,
@@ -184,6 +199,8 @@ async function repairIndex({ projectRoot, userDir, reindexer }) {
184
199
  changed: false,
185
200
  error: `reindex failed: ${err?.message ?? err}`,
186
201
  };
202
+ } finally {
203
+ if (db) db.close();
187
204
  }
188
205
  }
189
206
 
@@ -104,7 +104,7 @@ export const ERROR_CATEGORIES = Object.freeze({
104
104
  POISON_GUARD: 'poison_guard',
105
105
 
106
106
  // `cmk search` requested --mode=semantic or --mode=hybrid but the
107
- // Layer-5b semantic backend is not yet shipped (Task 30, design
107
+ // Layer-5b semantic backend's optional embedder is not installed (Task 30/65, design
108
108
  // §9.3). Pairs with `process.exitCode = 2` in subcommands.mjs per
109
109
  // tasks.md 30.2's explicit "exit 2 when unavailable" contract.
110
110
  // NO silent fallback to keyword — the user asked for semantic,
@@ -180,7 +180,9 @@ function findSectionRange(lines, sectionTitle) {
180
180
  }
181
181
 
182
182
  function insertIntoSection(text, sectionTitle, bullet) {
183
- const lines = text.split('\n');
183
+ // Task 139 (D-126): CRLF-tolerant read; the join below re-emits \n,
184
+ // so a CRLF-converted scratchpad self-heals on the next write.
185
+ const lines = text.split(/\r?\n/);
184
186
  const range = findSectionRange(lines, sectionTitle);
185
187
  if (!range) return null;
186
188
  // Insert before the next `## ` heading; skip trailing blank lines so the
@@ -208,7 +210,7 @@ function insertIntoSection(text, sectionTitle, bullet) {
208
210
  export function ensureSectionExists(scratchpadPath, sectionTitle) {
209
211
  if (!existsSync(scratchpadPath)) return { created: false, error: 'no-file' };
210
212
  const text = readFileSync(scratchpadPath, 'utf8');
211
- if (findSectionRange(text.split('\n'), sectionTitle)) return { created: false };
213
+ if (findSectionRange(text.split(/\r?\n/), sectionTitle)) return { created: false }; // Task 139: CRLF-tolerant
212
214
  const body = text.trimEnd(); // drop trailing whitespace/blank lines (no `\s+$` regex — trips ReDoS heuristics)
213
215
  // No leading blank lines for an empty/whitespace-only file (the scaffolded
214
216
  // scratchpads are never empty, but keep the output clean if one ever is).
@@ -220,7 +222,7 @@ export function ensureSectionExists(scratchpadPath, sectionTitle) {
220
222
  const EVICTED_ID_RE = /^- \(([PUL]-[A-Za-z0-9]+)\)/;
221
223
 
222
224
  function consolidate(text, { nowDate }) {
223
- const lines = text.split('\n');
225
+ const lines = text.split(/\r?\n/); // Task 139: CRLF-tolerant
224
226
  const removeIdx = new Set();
225
227
  const evicted = [];
226
228
  const staleCutoff = new Date(nowDate.getTime() - STALE_AFTER_DAYS * 24 * 60 * 60 * 1000);
package/src/search.mjs CHANGED
@@ -11,7 +11,7 @@
11
11
  // ~100ms for 10k bullets. Always available — the keyword
12
12
  // backend ships in v0.1.0 with no extra install.
13
13
  //
14
- // semantic the Layer-5b semantic backend (not yet shipped the embedded
14
+ // semantic the Layer-5b semantic backend (Task 65: sqlite-vec + local ONNX embedder; the embedded
15
15
  // vector backend is a future release; the DI seam below is the
16
16
  // drop-in point). Until then this mode errors with
17
17
  // ERROR_CATEGORIES.SEMANTIC_UNAVAILABLE when the caller
@@ -54,6 +54,15 @@ export const SEARCH_MODES = Object.freeze({
54
54
  export const DEFAULT_LIMIT = 20;
55
55
  const MAX_LIMIT = 1000;
56
56
 
57
+ // Task 104.2 (D-117) — search scopes. 'facts' = the curated observation
58
+ // index (L1, the default). 'transcripts' = the SEPARATE raw-transcript
59
+ // chunk index (the L3 last-resort tier) — reached ONLY when explicitly
60
+ // asked, so raw history never pollutes curated results.
61
+ export const SEARCH_SCOPES = Object.freeze({
62
+ FACTS: 'facts',
63
+ TRANSCRIPTS: 'transcripts',
64
+ });
65
+
57
66
  const TRUST_ORDINAL = Object.freeze({
58
67
  low: 1,
59
68
  medium: 2,
@@ -107,7 +116,24 @@ function validateInput(opts) {
107
116
  errors.push(`limit: must be a positive integer ≤ ${MAX_LIMIT}`);
108
117
  }
109
118
  }
110
- return { errors, mode };
119
+ const scope = opts.scope ?? SEARCH_SCOPES.FACTS;
120
+ if (scope !== SEARCH_SCOPES.FACTS && scope !== SEARCH_SCOPES.TRANSCRIPTS) {
121
+ errors.push(`scope: must be one of facts/transcripts (got ${JSON.stringify(scope)})`);
122
+ }
123
+ if (scope === SEARCH_SCOPES.TRANSCRIPTS) {
124
+ // Chunks carry no tier/trust/created_at — rejecting these is more honest
125
+ // than silently ignoring them (the explicit-vs-configured asymmetry rule).
126
+ for (const [key, label] of [
127
+ ['tier', 'tier'],
128
+ ['minTrust', 'minTrust'],
129
+ ['since', 'since'],
130
+ ]) {
131
+ if (opts[key] !== undefined) {
132
+ errors.push(`${label}: not supported under the transcripts scope (raw chunks carry no ${label})`);
133
+ }
134
+ }
135
+ }
136
+ return { errors, mode, scope };
111
137
  }
112
138
 
113
139
  // --- Keyword (FTS5 BM25) backend --------------------------------------
@@ -211,6 +237,60 @@ function runKeywordSearch(db, opts) {
211
237
  }));
212
238
  }
213
239
 
240
+ // --- Transcript-scope keyword backend (Task 104.2, the L3 raw tier) ----
241
+
242
+ const TRANSCRIPT_KEYWORD_SQL = `
243
+ SELECT
244
+ t.source_file AS source_file,
245
+ t.source_line AS source_line,
246
+ t.heading AS heading,
247
+ transcript_chunks_fts.rank AS score,
248
+ snippet(transcript_chunks_fts, 0, '<b>', '</b>', '...', 16) AS snippet
249
+ FROM transcript_chunks_fts
250
+ JOIN transcript_chunks t ON t.rowid = transcript_chunks_fts.rowid
251
+ WHERE transcript_chunks_fts MATCH @query
252
+ ORDER BY transcript_chunks_fts.rank
253
+ LIMIT @limit
254
+ `;
255
+
256
+ // Synthetic, readable id for a raw chunk (chunks are locations, not curated
257
+ // facts — no [PUL]-XXXXXXXX identity). Also the RRF fusion key in hybrid
258
+ // mode and the drill-back handle the memory-search skill surfaces.
259
+ function transcriptHitId(row) {
260
+ return `T:${row.source_file}:${row.source_line}`;
261
+ }
262
+
263
+ function runTranscriptKeywordSearch(db, opts) {
264
+ let rows;
265
+ try {
266
+ rows = db
267
+ .prepare(TRANSCRIPT_KEYWORD_SQL)
268
+ .all({ query: opts.query, limit: opts.limit ?? DEFAULT_LIMIT });
269
+ } catch (err) {
270
+ if (err?.code === 'SQLITE_ERROR' || /fts5:|no such column:/i.test(err?.message ?? '')) {
271
+ throw new FTS5ParseError(err, opts.query);
272
+ }
273
+ throw err;
274
+ }
275
+ return rows.map((r) => ({
276
+ id: transcriptHitId(r),
277
+ // Raw turns contain newlines (dialogue + Tools blocks) — flatten so the
278
+ // one-line-per-hit output contract holds across scopes.
279
+ snippet: flattenSnippet(r.snippet),
280
+ source_file: r.source_file,
281
+ source_line: r.source_line,
282
+ heading: r.heading,
283
+ score: r.score,
284
+ }));
285
+ }
286
+
287
+ const TRANSCRIPT_SNIPPET_MAX = 240;
288
+
289
+ function flattenSnippet(s) {
290
+ const flat = String(s ?? '').replace(/\s+/g, ' ').trim();
291
+ return flat.length > TRANSCRIPT_SNIPPET_MAX ? flat.slice(0, TRANSCRIPT_SNIPPET_MAX) + '…' : flat;
292
+ }
293
+
214
294
  // --- Reciprocal-rank fusion (hybrid mode) -----------------------------
215
295
 
216
296
  /**
@@ -255,10 +335,15 @@ export function reciprocalRankFusion({
255
335
  // --- Public boundary --------------------------------------------------
256
336
 
257
337
  export function search(opts = {}) {
258
- const { errors, mode } = validateInput(opts);
338
+ const { errors, mode, scope } = validateInput(opts);
259
339
  if (errors.length > 0) {
260
340
  return errorResult({ category: ERROR_CATEGORIES.SCHEMA, errors });
261
341
  }
342
+ // Scope dispatch (Task 104.2): the transcripts scope swaps the keyword
343
+ // backend; semantic/hybrid use the caller-prepared backend exactly like
344
+ // the facts scope (prepareSemanticBackend({scope}) embeds the right table).
345
+ const keywordBackend =
346
+ scope === SEARCH_SCOPES.TRANSCRIPTS ? runTranscriptKeywordSearch : runKeywordSearch;
262
347
 
263
348
  // Semantic + hybrid require an injected backend. Production v0.1.0
264
349
  // passes undefined → error with the not-yet-shipped hint. A future
@@ -268,7 +353,8 @@ export function search(opts = {}) {
268
353
  return errorResult({
269
354
  category: ERROR_CATEGORIES.SEMANTIC_UNAVAILABLE,
270
355
  errors: [
271
- 'the Layer-5b semantic backend is not yet shipped — semantic/hybrid search will land in a future release. ' +
356
+ 'no semantic backend provided — semantic/hybrid need the embedded Layer-5b backend prepared by the caller ' +
357
+ '(the CLI/MCP do this automatically when the optional @huggingface/transformers embedder is installed). ' +
272
358
  'Use --mode=keyword for the always-available FTS5 search.',
273
359
  ],
274
360
  });
@@ -278,15 +364,16 @@ export function search(opts = {}) {
278
364
  let results;
279
365
  try {
280
366
  if (mode === SEARCH_MODES.KEYWORD) {
281
- results = runKeywordSearch(opts.db, opts);
367
+ results = keywordBackend(opts.db, opts);
282
368
  } else if (mode === SEARCH_MODES.SEMANTIC) {
283
369
  // The semantic backend is an injected callable returning the same
284
- // shape as runKeywordSearch (array of {id, snippet, source_file,
285
- // source_line, tier, trust, score}).
370
+ // shape as the scope's keyword backend (facts: {id, snippet,
371
+ // source_file, source_line, tier, trust, score}; transcripts: the
372
+ // synthetic-T:-id shape without tier/trust).
286
373
  results = opts.semanticBackend(opts);
287
374
  } else {
288
375
  // hybrid: run both backends + fuse.
289
- const keywordResults = runKeywordSearch(opts.db, opts);
376
+ const keywordResults = keywordBackend(opts.db, opts);
290
377
  const semanticResults = opts.semanticBackend(opts);
291
378
  const fused = reciprocalRankFusion({
292
379
  keywordResults,
@@ -308,5 +395,5 @@ export function search(opts = {}) {
308
395
  throw err;
309
396
  }
310
397
 
311
- return { action: 'found', mode, results };
398
+ return { action: 'found', mode, scope, results };
312
399
  }