@lh8ppl/claude-memory-kit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/bin/cmk-compress-lazy.mjs +59 -0
  2. package/bin/cmk-daily-distill.mjs +67 -0
  3. package/bin/cmk-weekly-curate.mjs +56 -0
  4. package/bin/cmk.mjs +12 -0
  5. package/package.json +50 -0
  6. package/src/audit-log.mjs +103 -0
  7. package/src/auto-extract.mjs +742 -0
  8. package/src/capture-prompt.mjs +61 -0
  9. package/src/capture-turn.mjs +273 -0
  10. package/src/claude-md.mjs +212 -0
  11. package/src/compress-session.mjs +349 -0
  12. package/src/compressor.mjs +376 -0
  13. package/src/conflict-queue.mjs +796 -0
  14. package/src/cooldown.mjs +61 -0
  15. package/src/daily-distill.mjs +252 -0
  16. package/src/doctor.mjs +528 -0
  17. package/src/forget.mjs +335 -0
  18. package/src/frontmatter.mjs +73 -0
  19. package/src/import-anthropic-memory.mjs +266 -0
  20. package/src/index-db.mjs +154 -0
  21. package/src/index-rebuild.mjs +597 -0
  22. package/src/index.mjs +90 -0
  23. package/src/inject-context.mjs +484 -0
  24. package/src/install.mjs +327 -0
  25. package/src/lazy-compress.mjs +326 -0
  26. package/src/lock-discipline.mjs +166 -0
  27. package/src/mcp-server.mjs +498 -0
  28. package/src/memory-write.mjs +565 -0
  29. package/src/merge-facts.mjs +213 -0
  30. package/src/observe-edit.mjs +87 -0
  31. package/src/platform-commands.mjs +138 -0
  32. package/src/poison-guard.mjs +245 -0
  33. package/src/privacy.mjs +21 -0
  34. package/src/provenance.mjs +217 -0
  35. package/src/register-crons.mjs +354 -0
  36. package/src/reindex.mjs +134 -0
  37. package/src/repair.mjs +316 -0
  38. package/src/result-shapes.mjs +155 -0
  39. package/src/review-queue.mjs +345 -0
  40. package/src/roll.mjs +115 -0
  41. package/src/scratchpad.mjs +335 -0
  42. package/src/search.mjs +311 -0
  43. package/src/subcommands.mjs +1252 -0
  44. package/src/tier-paths.mjs +74 -0
  45. package/src/transcripts.mjs +234 -0
  46. package/src/trust.mjs +226 -0
  47. package/src/weekly-curate.mjs +454 -0
  48. package/src/write-fact.mjs +205 -0
  49. package/template/.claude/hooks/pre-tool-memory.js +78 -0
  50. package/template/.claude/hooks/transcript-capture.js +69 -0
  51. package/template/.claude/settings.json +27 -0
  52. package/template/.claude/skills/memory-write/SKILL.md +117 -0
  53. package/template/.gitignore.fragment +12 -0
  54. package/template/CLAUDE.md.template +49 -0
  55. package/template/docs/journey/journey-log.md.template +292 -0
  56. package/template/local/machine-paths.md.template +37 -0
  57. package/template/local/overrides.md.template +36 -0
  58. package/template/project/.index/.gitkeep +0 -0
  59. package/template/project/MEMORY.md.template +47 -0
  60. package/template/project/SOUL.md.template +35 -0
  61. package/template/project/memory/INDEX.md.template +47 -0
  62. package/template/project/memory/archive/superseded/.gitkeep +0 -0
  63. package/template/project/memory/archive/tombstones/.gitkeep +0 -0
  64. package/template/project/queues/.gitkeep +0 -0
  65. package/template/project/sessions/.gitkeep +0 -0
  66. package/template/project/transcripts/.gitkeep +0 -0
  67. package/template/support/cron-jobs/daily-memory-distill.md +15 -0
  68. package/template/support/cron-jobs/nightly-memsearch-index.md +17 -0
  69. package/template/support/cron-jobs/weekly-memory-curator.md +15 -0
  70. package/template/support/milvus-deploy/README.md +57 -0
  71. package/template/support/milvus-deploy/docker-compose.yml +66 -0
  72. package/template/support/scripts/auto-extract-memory.sh +102 -0
  73. package/template/support/scripts/memsearch-index-with-flush.sh +59 -0
  74. package/template/support/scripts/refresh-distill-timestamp.py +35 -0
  75. package/template/support/scripts/register-crons.py +242 -0
  76. package/template/support/scripts/run-daily-distill.sh +67 -0
  77. package/template/support/scripts/run-weekly-curate.sh +58 -0
  78. package/template/user/HABITS.md.template +18 -0
  79. package/template/user/LESSONS.md.template +18 -0
  80. package/template/user/USER.md.template +18 -0
  81. package/template/user/fragments/INDEX.md.template +23 -0
@@ -0,0 +1,335 @@
1
+ // Bounded scratchpad writer (Task 12, T-010). First Layer 3 module.
2
+ // First real consumer of the shared modules established at PR-2.
3
+ //
4
+ // Public boundary: appendScratchpadBullet(opts) → result.
5
+ // See design §2.1 + §4 + tasks.md 12.1-12.5.
6
+ //
7
+ // Uses shared modules per CLAUDE.md "Shared modules" rule:
8
+ // tier-paths.mjs — VALID_TIERS, SCRATCHPADS_BY_TIER, DEFAULT_SCRATCHPAD_CAPS,
9
+ // resolveTierRoot, resolveScratchpadPath
10
+ // audit-log.mjs — appendAuditEntry, nowIso, AUDIT_LOG_SCHEMA_VERSION
11
+ // result-shapes.mjs — ERROR_CATEGORIES, errorResult
12
+ // @lh8ppl/cmk-canonicalize — generateId (citation IDs derived from the bullet text)
13
+ //
14
+ // Frontmatter (HTML-comment provenance below the bullet) is hand-formatted
15
+ // inline for v0.1. Task 13 (Provenance frontmatter writer + reader) will
16
+ // extract this to a shared `writeBullet(text, provenance)` primitive that
17
+ // this module will call instead. The handoff is clean: format stays identical;
18
+ // only the location of the formatter moves.
19
+
20
+ import { existsSync, readFileSync, writeFileSync } from 'node:fs';
21
+ import { generateId } from '@lh8ppl/cmk-canonicalize';
22
+ import {
23
+ VALID_TIERS,
24
+ SCRATCHPADS_BY_TIER,
25
+ DEFAULT_SCRATCHPAD_CAPS,
26
+ resolveTierRoot,
27
+ resolveScratchpadPath,
28
+ } from './tier-paths.mjs';
29
+ import { appendAuditEntry, nowIso, REASON_CODES } from './audit-log.mjs';
30
+ import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
31
+ import { writeBullet, parseBulletProvenance } from './provenance.mjs';
32
+
33
+ const VALID_TRUST = new Set(['high', 'medium', 'low']);
34
+ const VALID_WRITE_SOURCES = new Set([
35
+ 'user-explicit',
36
+ 'auto-extract',
37
+ 'compressor',
38
+ 'manual-edit',
39
+ 'imported',
40
+ ]);
41
+ // Per Task 13.2 / provenance.mjs: 6 comment fields. `id` comes from the
42
+ // bullet line and is added by appendScratchpadBullet, not from caller.
43
+ const REQUIRED_PROVENANCE_FIELDS = [
44
+ 'source',
45
+ 'source_line',
46
+ 'sha1',
47
+ 'write',
48
+ 'trust',
49
+ 'at',
50
+ ];
51
+
52
+ const CONSOLIDATION_TRIGGER_RATIO = 0.95;
53
+ const STALE_AFTER_DAYS = 14;
54
+
55
+ function validateOptions(opts) {
56
+ const errors = [];
57
+
58
+ if (!opts.tier) {
59
+ errors.push("tier: required, one of 'U', 'P', 'L'");
60
+ } else if (!VALID_TIERS.has(opts.tier)) {
61
+ errors.push(`tier: must be 'U', 'P', or 'L' (got ${JSON.stringify(opts.tier)})`);
62
+ }
63
+
64
+ if (!opts.scratchpad) {
65
+ errors.push('scratchpad: required, one of the documented scratchpad filenames');
66
+ } else if (opts.tier && VALID_TIERS.has(opts.tier)) {
67
+ const allowed = SCRATCHPADS_BY_TIER[opts.tier];
68
+ if (!allowed.has(opts.scratchpad)) {
69
+ errors.push(
70
+ `scratchpad: ${opts.scratchpad} is not valid for tier ${opts.tier} (allowed: ${[...allowed].join(', ')})`,
71
+ );
72
+ }
73
+ }
74
+
75
+ if (!opts.section || typeof opts.section !== 'string') {
76
+ errors.push('section: required, non-empty string (must match a `## <section>` heading in the file)');
77
+ }
78
+
79
+ if (opts.text == null || typeof opts.text !== 'string' || !opts.text.trim()) {
80
+ errors.push('text: required, non-empty string');
81
+ }
82
+
83
+ if (!opts.provenance || typeof opts.provenance !== 'object') {
84
+ errors.push('provenance: required object with source/source_line/sha1/write/trust/at');
85
+ } else {
86
+ for (const f of REQUIRED_PROVENANCE_FIELDS) {
87
+ if (opts.provenance[f] === undefined || opts.provenance[f] === null || opts.provenance[f] === '') {
88
+ errors.push(`provenance.${f}: required, non-empty`);
89
+ }
90
+ }
91
+ if (opts.provenance.trust && !VALID_TRUST.has(opts.provenance.trust)) {
92
+ errors.push(`provenance.trust: must be one of high/medium/low (got ${JSON.stringify(opts.provenance.trust)})`);
93
+ }
94
+ if (opts.provenance.write && !VALID_WRITE_SOURCES.has(opts.provenance.write)) {
95
+ errors.push(
96
+ `provenance.write: must be one of user-explicit/auto-extract/compressor/manual-edit/imported (got ${JSON.stringify(opts.provenance.write)})`,
97
+ );
98
+ }
99
+ }
100
+
101
+ return errors;
102
+ }
103
+
104
+ // Bullet formatting is delegated to provenance.mjs's writeBullet (Task 13).
105
+ // scratchpad.mjs is responsible for "where the bullet goes" (which file,
106
+ // which section, cap enforcement); provenance.mjs is responsible for
107
+ // "what the bullet+comment look like on disk".
108
+ function formatBullet({ id, text, provenance }) {
109
+ const result = writeBullet({ id, text, provenance });
110
+ if (result.action !== 'formatted') {
111
+ // Shouldn't happen — we already validated above, but be defensive.
112
+ throw new Error(
113
+ `scratchpad.formatBullet: writeBullet returned ${result.action}: ${result.errors?.join('; ') ?? 'unknown'}`,
114
+ );
115
+ }
116
+ return result.lines;
117
+ }
118
+
119
+ function readJsonIfExists(path) {
120
+ if (!existsSync(path)) return null;
121
+ try {
122
+ return JSON.parse(readFileSync(path, 'utf8'));
123
+ } catch {
124
+ return null;
125
+ }
126
+ }
127
+
128
+ function resolveCap({ tier, scratchpad, projectRoot, userDir, settings }) {
129
+ // Test-injected settings short-circuit.
130
+ if (settings) {
131
+ return (
132
+ settings?.scratchpads?.[scratchpad]?.max_chars ??
133
+ DEFAULT_SCRATCHPAD_CAPS[scratchpad]
134
+ );
135
+ }
136
+ // Project tier wins (only meaningful for tiers P + L which share a project root).
137
+ if (tier === 'P' || tier === 'L') {
138
+ const projectSettings = readJsonIfExists(
139
+ resolveScratchpadPath({
140
+ tier: 'P',
141
+ scratchpad: 'settings.json',
142
+ projectRoot,
143
+ }),
144
+ );
145
+ const projectCap = projectSettings?.scratchpads?.[scratchpad]?.max_chars;
146
+ if (typeof projectCap === 'number') return projectCap;
147
+ }
148
+ // User tier fallback.
149
+ const userSettings = readJsonIfExists(
150
+ resolveScratchpadPath({
151
+ tier: 'U',
152
+ scratchpad: 'settings.json',
153
+ userDir,
154
+ }),
155
+ );
156
+ const userCap = userSettings?.scratchpads?.[scratchpad]?.max_chars;
157
+ if (typeof userCap === 'number') return userCap;
158
+ // Hardcoded default.
159
+ return DEFAULT_SCRATCHPAD_CAPS[scratchpad];
160
+ }
161
+
162
+ function findSectionRange(lines, sectionTitle) {
163
+ const startIdx = lines.findIndex(
164
+ (l) => l.trim() === `## ${sectionTitle}`,
165
+ );
166
+ if (startIdx === -1) return null;
167
+ let endIdx = lines.findIndex(
168
+ (l, i) => i > startIdx && /^##\s/.test(l),
169
+ );
170
+ if (endIdx === -1) endIdx = lines.length;
171
+ return { startIdx, endIdx };
172
+ }
173
+
174
+ function insertIntoSection(text, sectionTitle, bullet) {
175
+ const lines = text.split('\n');
176
+ const range = findSectionRange(lines, sectionTitle);
177
+ if (!range) return null;
178
+ // Insert before the next `## ` heading; skip trailing blank lines so the
179
+ // new bullet sits cleanly at the end of this section's content.
180
+ let insertAt = range.endIdx;
181
+ while (insertAt > range.startIdx + 1 && lines[insertAt - 1].trim() === '') {
182
+ insertAt--;
183
+ }
184
+ // Preserve a blank line after the new bullet pair when there's content
185
+ // beyond it (the next heading).
186
+ const bulletLines = bullet.split('\n');
187
+ lines.splice(insertAt, 0, ...bulletLines);
188
+ return lines.join('\n');
189
+ }
190
+
191
+ function consolidate(text, { nowDate }) {
192
+ const lines = text.split('\n');
193
+ const removeIdx = new Set();
194
+ const staleCutoff = new Date(nowDate.getTime() - STALE_AFTER_DAYS * 24 * 60 * 60 * 1000);
195
+ let bulletsRemoved = 0;
196
+
197
+ for (let i = 0; i < lines.length - 1; i++) {
198
+ if (removeIdx.has(i)) continue;
199
+ const bulletLine = lines[i];
200
+ const commentLine = lines[i + 1];
201
+ if (!bulletLine.startsWith('- (')) continue;
202
+ if (!commentLine || !/^\s*<!--.*-->\s*$/.test(commentLine)) continue;
203
+
204
+ const prov = parseBulletProvenance(commentLine);
205
+ if (!prov || !prov.at || !prov.trust) continue;
206
+ if (prov.trust === 'high') continue; // Preserve high-trust regardless of age.
207
+
208
+ const at = new Date(prov.at);
209
+ if (Number.isNaN(at.getTime())) continue;
210
+ if (at >= staleCutoff) continue; // <14d → keep
211
+
212
+ removeIdx.add(i);
213
+ removeIdx.add(i + 1);
214
+ bulletsRemoved++;
215
+ }
216
+
217
+ if (removeIdx.size === 0) {
218
+ return { text, bulletsRemoved: 0 };
219
+ }
220
+ const out = lines.filter((_, i) => !removeIdx.has(i)).join('\n');
221
+ return { text: out, bulletsRemoved };
222
+ }
223
+
224
+ export function appendScratchpadBullet(opts = {}) {
225
+ const errors = validateOptions(opts);
226
+ if (errors.length > 0) {
227
+ return errorResult({
228
+ category: ERROR_CATEGORIES.SCHEMA,
229
+ errors,
230
+ });
231
+ }
232
+
233
+ const {
234
+ tier,
235
+ scratchpad,
236
+ section,
237
+ text,
238
+ provenance,
239
+ projectRoot,
240
+ userDir,
241
+ now,
242
+ settings,
243
+ } = opts;
244
+
245
+ const tierRoot = resolveTierRoot({ tier, projectRoot, userDir });
246
+ const path = resolveScratchpadPath({ tier, scratchpad, projectRoot, userDir });
247
+
248
+ if (!existsSync(path)) {
249
+ return errorResult({
250
+ category: ERROR_CATEGORIES.NOT_FOUND,
251
+ errors: [
252
+ `scratchpad file does not exist at ${path} — run \`cmk install\` (project tier) or \`cmk init-user-tier\` (user tier) first`,
253
+ ],
254
+ path,
255
+ });
256
+ }
257
+
258
+ const original = readFileSync(path, 'utf8');
259
+ const id = opts.id ?? generateId(tier, text);
260
+ const cap = resolveCap({ tier, scratchpad, projectRoot, userDir, settings });
261
+ const bullet = formatBullet({ id, text, provenance });
262
+
263
+ // 1. Build candidate file content (bullet inserted into section)
264
+ const candidate = insertIntoSection(original, section, bullet);
265
+ if (candidate === null) {
266
+ return errorResult({
267
+ category: ERROR_CATEGORIES.SCHEMA,
268
+ errors: [
269
+ `section "${section}" not found in ${scratchpad} (expected a "## ${section}" heading)`,
270
+ ],
271
+ path,
272
+ });
273
+ }
274
+
275
+ // 2. Cap check: would the write push to >95%? If yes, consolidate.
276
+ let consolidationRan = false;
277
+ let bulletsConsolidated = 0;
278
+ let finalContent = candidate;
279
+ const candidateBytes = Buffer.byteLength(candidate, 'utf8');
280
+
281
+ if (candidateBytes > cap * CONSOLIDATION_TRIGGER_RATIO) {
282
+ consolidationRan = true;
283
+ const nowDate = new Date(now ?? nowIso());
284
+ const consolidated = consolidate(candidate, { nowDate });
285
+ bulletsConsolidated = consolidated.bulletsRemoved;
286
+ finalContent = consolidated.text;
287
+ }
288
+
289
+ // 3. Post-consolidation cap check
290
+ const finalBytes = Buffer.byteLength(finalContent, 'utf8');
291
+ if (finalBytes > cap) {
292
+ // File untouched. The original on-disk content is preserved verbatim.
293
+ return errorResult({
294
+ category: ERROR_CATEGORIES.CAP_EXCEEDED,
295
+ errors: [
296
+ `scratchpad cap exceeded: ${finalBytes} bytes would exceed cap of ${cap} bytes for ${scratchpad} (consolidator dropped ${bulletsConsolidated} bullet(s), still over). No silent truncation; resolve by raising the cap in settings.json or manually distilling.`,
297
+ ],
298
+ path,
299
+ cap,
300
+ bytes: finalBytes,
301
+ consolidationRan,
302
+ bulletsConsolidated,
303
+ });
304
+ }
305
+
306
+ // 4. Write + audit
307
+ writeFileSync(path, finalContent, 'utf8');
308
+ const ts = now ?? nowIso();
309
+ appendAuditEntry(tierRoot, {
310
+ ts,
311
+ action: 'appended',
312
+ tier,
313
+ id,
314
+ reasonCode: REASON_CODES.SCRATCHPAD_APPEND,
315
+ paths: { after: path },
316
+ extra: {
317
+ scratchpad,
318
+ section,
319
+ cap,
320
+ bytes: finalBytes,
321
+ consolidationRan,
322
+ bulletsConsolidated,
323
+ },
324
+ });
325
+
326
+ return {
327
+ action: 'appended',
328
+ id,
329
+ path,
330
+ cap,
331
+ bytes: finalBytes,
332
+ consolidationRan,
333
+ bulletsConsolidated,
334
+ };
335
+ }
package/src/search.mjs ADDED
@@ -0,0 +1,311 @@
1
+ // `cmk search` query engine (Task 30, T-026).
2
+ //
3
+ // Composes on top of:
4
+ // - index-db.mjs (Task 28) — observations + observations_fts schema
5
+ // - index-rebuild.mjs (Task 29) — populates the index
6
+ // - result-shapes.mjs — ERROR_CATEGORIES, errorResult
7
+ //
8
+ // Three search modes per design §9.3:
9
+ //
10
+ // keyword FTS5 BM25 over the body / heading_path / write_source columns.
11
+ // ~100ms for 10k bullets. Always available — the keyword
12
+ // backend ships in v0.1.0 with no extra install.
13
+ //
14
+ // semantic memsearch + Milvus (Layer 5b — optional install). The kit
15
+ // does NOT ship memsearch in v0.1.0; this mode errors with
16
+ // ERROR_CATEGORIES.SEMANTIC_UNAVAILABLE when the caller
17
+ // requests it without injecting a semantic backend. NO silent
18
+ // fallback to keyword — design §9.3's explicit "exit 2 when
19
+ // not installed" contract.
20
+ //
21
+ // hybrid Reciprocal-rank fusion of keyword + semantic, 0.5/0.5
22
+ // weight per design §9.3. Requires the semantic backend.
23
+ // Errors the same way when semantic is unavailable.
24
+ //
25
+ // Filter flags (per tasks.md 30.4):
26
+ // minTrust: 'low' | 'medium' | 'high' — uses ordinal compare
27
+ // tier: 'U' | 'P' | 'L' — exact match
28
+ // since: ISO 8601 string — `created_at >= since`
29
+ // limit: positive integer — default 20
30
+ // includeTombstoned: boolean — default false
31
+ // (default WHERE excludes rows with deleted_at IS NOT NULL)
32
+ //
33
+ // Public boundary:
34
+ // search({db, query, mode?, minTrust?, tier?, since?, limit?,
35
+ // includeTombstoned?, semanticBackend?})
36
+ // → { action: 'found', mode, results: [{id, snippet, source_file,
37
+ // source_line, trust, score}] }
38
+ // → errorResult({category, errors}) on semantic-unavailable / schema-error
39
+ //
40
+ // `semanticBackend` is a dependency-injection hook for testing the
41
+ // hybrid + semantic paths. Production callers (the `cmk search` CLI in
42
+ // subcommands.mjs) pass undefined; v0.1.x lands the real backend.
43
+
44
+ import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
45
+ import { VALID_TIERS } from './tier-paths.mjs';
46
+
47
+ export const SEARCH_MODES = Object.freeze({
48
+ KEYWORD: 'keyword',
49
+ SEMANTIC: 'semantic',
50
+ HYBRID: 'hybrid',
51
+ });
52
+
53
+ export const DEFAULT_LIMIT = 20;
54
+ const MAX_LIMIT = 1000;
55
+
56
+ const TRUST_ORDINAL = Object.freeze({
57
+ low: 1,
58
+ medium: 2,
59
+ high: 3,
60
+ });
61
+
62
+ // Reciprocal-rank fusion constant per design §9.3 (k=60 is the
63
+ // standard RRF default from the IR literature; smaller k weights the
64
+ // top results more heavily).
65
+ const RRF_K = 60;
66
+
67
+ // --- Validation -------------------------------------------------------
68
+
69
+ function validateInput(opts) {
70
+ const errors = [];
71
+ if (!opts.db || typeof opts.db.prepare !== 'function') {
72
+ errors.push('db: required, better-sqlite3 Database instance');
73
+ }
74
+ if (
75
+ typeof opts.query !== 'string' ||
76
+ opts.query.trim().length === 0
77
+ ) {
78
+ errors.push('query: required, non-empty string');
79
+ }
80
+ const mode = opts.mode ?? SEARCH_MODES.KEYWORD;
81
+ if (
82
+ mode !== SEARCH_MODES.KEYWORD &&
83
+ mode !== SEARCH_MODES.SEMANTIC &&
84
+ mode !== SEARCH_MODES.HYBRID
85
+ ) {
86
+ errors.push(`mode: must be one of keyword/semantic/hybrid (got ${JSON.stringify(mode)})`);
87
+ }
88
+ if (opts.minTrust !== undefined && !TRUST_ORDINAL[opts.minTrust]) {
89
+ errors.push(`minTrust: must be one of low/medium/high (got ${JSON.stringify(opts.minTrust)})`);
90
+ }
91
+ if (opts.tier !== undefined && !VALID_TIERS.has(opts.tier)) {
92
+ errors.push(`tier: must be one of U/P/L (got ${JSON.stringify(opts.tier)})`);
93
+ }
94
+ if (opts.since !== undefined) {
95
+ const t = Date.parse(opts.since);
96
+ if (!Number.isFinite(t)) {
97
+ errors.push(`since: must be an ISO 8601 date string (got ${JSON.stringify(opts.since)})`);
98
+ }
99
+ }
100
+ if (opts.limit !== undefined) {
101
+ if (
102
+ !Number.isInteger(opts.limit) ||
103
+ opts.limit <= 0 ||
104
+ opts.limit > MAX_LIMIT
105
+ ) {
106
+ errors.push(`limit: must be a positive integer ≤ ${MAX_LIMIT}`);
107
+ }
108
+ }
109
+ return { errors, mode };
110
+ }
111
+
112
+ // --- Keyword (FTS5 BM25) backend --------------------------------------
113
+
114
+ const KEYWORD_BASE_SQL = `
115
+ SELECT
116
+ o.id AS id,
117
+ o.body AS body,
118
+ o.heading_path AS heading_path,
119
+ o.source_file AS source_file,
120
+ o.source_line AS source_line,
121
+ o.tier AS tier,
122
+ o.trust AS trust,
123
+ o.created_at AS created_at,
124
+ o.deleted_at AS deleted_at,
125
+ observations_fts.rank AS score,
126
+ snippet(observations_fts, 0, '<b>', '</b>', '...', 16) AS snippet
127
+ FROM observations_fts
128
+ JOIN observations o ON o.rowid = observations_fts.rowid
129
+ WHERE observations_fts MATCH @query
130
+ `;
131
+
132
+ function buildKeywordSql(opts) {
133
+ const clauses = [];
134
+ const params = { query: opts.query };
135
+ if (opts.tier !== undefined) {
136
+ clauses.push('o.tier = @tier');
137
+ params.tier = opts.tier;
138
+ }
139
+ if (opts.minTrust !== undefined) {
140
+ // SQLite has no enum-ordinal builtin; CASE WHEN translates the
141
+ // string trust to its numeric rank, then compares.
142
+ clauses.push(`
143
+ CASE o.trust
144
+ WHEN 'high' THEN 3
145
+ WHEN 'medium' THEN 2
146
+ WHEN 'low' THEN 1
147
+ ELSE 0
148
+ END >= @min_trust_ord
149
+ `);
150
+ params.min_trust_ord = TRUST_ORDINAL[opts.minTrust];
151
+ }
152
+ if (opts.since !== undefined) {
153
+ clauses.push('o.created_at >= @since_ms');
154
+ params.since_ms = Date.parse(opts.since);
155
+ }
156
+ if (!opts.includeTombstoned) {
157
+ clauses.push('o.deleted_at IS NULL');
158
+ }
159
+ const where = clauses.length > 0 ? ' AND ' + clauses.join(' AND ') : '';
160
+ const sql =
161
+ KEYWORD_BASE_SQL + where + ' ORDER BY observations_fts.rank LIMIT @limit';
162
+ params.limit = opts.limit ?? DEFAULT_LIMIT;
163
+ return { sql, params };
164
+ }
165
+
166
+ // FTS5 parse errors aren't validation errors — they're query-syntax
167
+ // errors thrown by SQLite when the user's query violates FTS5 grammar
168
+ // (e.g., `"user-explicit"` parses as `user AND NOT explicit` because
169
+ // `-` is the NOT operator; `"AND"` / `"OR"` are reserved; `"foo:bar"`
170
+ // treats `foo` as a column name and crashes if no such column exists).
171
+ // The kit's `cmk search "user-explicit"` is a realistic user query —
172
+ // the kit's own `write_source` enum value uses that exact string —
173
+ // so the error must surface as a clean schema-error result, NOT as an
174
+ // uncaught SqliteError stack trace. Surfaced by the Task 30 code-review
175
+ // as Important finding I1.
176
+ class FTS5ParseError extends Error {
177
+ constructor(originalError, query) {
178
+ super(`FTS5 parse error on query ${JSON.stringify(query)}: ${originalError.message}`);
179
+ this.name = 'FTS5ParseError';
180
+ this.originalError = originalError;
181
+ }
182
+ }
183
+
184
+ function runKeywordSearch(db, opts) {
185
+ const { sql, params } = buildKeywordSql(opts);
186
+ let rows;
187
+ try {
188
+ rows = db.prepare(sql).all(params);
189
+ } catch (err) {
190
+ // FTS5's parser surfaces grammar violations as SqliteError. Recognize
191
+ // the documented FTS5-specific messages and re-throw as our typed
192
+ // class so the caller (`search()`) can translate to a schema-error
193
+ // result with a user-friendly hint.
194
+ if (
195
+ err?.code === 'SQLITE_ERROR' ||
196
+ /fts5:|no such column:/i.test(err?.message ?? '')
197
+ ) {
198
+ throw new FTS5ParseError(err, opts.query);
199
+ }
200
+ throw err;
201
+ }
202
+ return rows.map((r) => ({
203
+ id: r.id,
204
+ snippet: r.snippet ?? r.body,
205
+ source_file: r.source_file,
206
+ source_line: r.source_line,
207
+ tier: r.tier,
208
+ trust: r.trust,
209
+ score: r.score,
210
+ }));
211
+ }
212
+
213
+ // --- Reciprocal-rank fusion (hybrid mode) -----------------------------
214
+
215
+ /**
216
+ * Reciprocal-rank fusion of two ranked result lists into one. Design
217
+ * §9.3 specifies 0.5/0.5 weight; standard RRF formula is
218
+ * fused_score(d) = sum over backends b of: weight_b / (k + rank_b(d))
219
+ * where rank starts at 1 for the top hit. Documents missing from one
220
+ * backend contribute 0 from that backend.
221
+ *
222
+ * Exported for direct unit-test in isolation (the production search()
223
+ * call composes this with the keyword + semantic backends).
224
+ */
225
+ export function reciprocalRankFusion({
226
+ keywordResults,
227
+ semanticResults,
228
+ keywordWeight = 0.5,
229
+ semanticWeight = 0.5,
230
+ k = RRF_K,
231
+ }) {
232
+ const scores = new Map(); // id → fused score
233
+ const byId = new Map(); // id → result object (first-seen wins for snippet/source)
234
+
235
+ keywordResults.forEach((r, i) => {
236
+ const rank = i + 1;
237
+ const inc = keywordWeight / (k + rank);
238
+ scores.set(r.id, (scores.get(r.id) ?? 0) + inc);
239
+ if (!byId.has(r.id)) byId.set(r.id, r);
240
+ });
241
+ semanticResults.forEach((r, i) => {
242
+ const rank = i + 1;
243
+ const inc = semanticWeight / (k + rank);
244
+ scores.set(r.id, (scores.get(r.id) ?? 0) + inc);
245
+ if (!byId.has(r.id)) byId.set(r.id, r);
246
+ });
247
+
248
+ const fused = [...scores.entries()]
249
+ .map(([id, score]) => ({ ...byId.get(id), score }))
250
+ .sort((a, b) => b.score - a.score);
251
+ return fused;
252
+ }
253
+
254
+ // --- Public boundary --------------------------------------------------
255
+
256
+ export function search(opts = {}) {
257
+ const { errors, mode } = validateInput(opts);
258
+ if (errors.length > 0) {
259
+ return errorResult({ category: ERROR_CATEGORIES.SCHEMA, errors });
260
+ }
261
+
262
+ // Semantic + hybrid require an injected backend. Production v0.1.0
263
+ // passes undefined → error with the install-memsearch hint. v0.1.x
264
+ // wires the real backend.
265
+ if (mode === SEARCH_MODES.SEMANTIC || mode === SEARCH_MODES.HYBRID) {
266
+ if (typeof opts.semanticBackend !== 'function') {
267
+ return errorResult({
268
+ category: ERROR_CATEGORIES.SEMANTIC_UNAVAILABLE,
269
+ errors: [
270
+ 'memsearch not installed — install via the Layer 5b install path. ' +
271
+ 'Use --mode=keyword for the always-available FTS5 search.',
272
+ ],
273
+ });
274
+ }
275
+ }
276
+
277
+ let results;
278
+ try {
279
+ if (mode === SEARCH_MODES.KEYWORD) {
280
+ results = runKeywordSearch(opts.db, opts);
281
+ } else if (mode === SEARCH_MODES.SEMANTIC) {
282
+ // The semantic backend is an injected callable returning the same
283
+ // shape as runKeywordSearch (array of {id, snippet, source_file,
284
+ // source_line, tier, trust, score}).
285
+ results = opts.semanticBackend(opts);
286
+ } else {
287
+ // hybrid: run both backends + fuse.
288
+ const keywordResults = runKeywordSearch(opts.db, opts);
289
+ const semanticResults = opts.semanticBackend(opts);
290
+ const fused = reciprocalRankFusion({
291
+ keywordResults,
292
+ semanticResults,
293
+ });
294
+ results = fused.slice(0, opts.limit ?? DEFAULT_LIMIT);
295
+ }
296
+ } catch (err) {
297
+ if (err instanceof FTS5ParseError) {
298
+ return errorResult({
299
+ category: ERROR_CATEGORIES.SCHEMA,
300
+ errors: [
301
+ `query: FTS5 parse error — ${err.originalError.message}. ` +
302
+ 'Try wrapping the query in double quotes for phrase mode ' +
303
+ '(e.g., `cmk search \'"user-explicit"\'`).',
304
+ ],
305
+ });
306
+ }
307
+ throw err;
308
+ }
309
+
310
+ return { action: 'found', mode, results };
311
+ }