@lh8ppl/claude-memory-kit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cmk-compress-lazy.mjs +59 -0
- package/bin/cmk-daily-distill.mjs +67 -0
- package/bin/cmk-weekly-curate.mjs +56 -0
- package/bin/cmk.mjs +12 -0
- package/package.json +50 -0
- package/src/audit-log.mjs +103 -0
- package/src/auto-extract.mjs +742 -0
- package/src/capture-prompt.mjs +61 -0
- package/src/capture-turn.mjs +273 -0
- package/src/claude-md.mjs +212 -0
- package/src/compress-session.mjs +349 -0
- package/src/compressor.mjs +376 -0
- package/src/conflict-queue.mjs +796 -0
- package/src/cooldown.mjs +61 -0
- package/src/daily-distill.mjs +252 -0
- package/src/doctor.mjs +528 -0
- package/src/forget.mjs +335 -0
- package/src/frontmatter.mjs +73 -0
- package/src/import-anthropic-memory.mjs +266 -0
- package/src/index-db.mjs +154 -0
- package/src/index-rebuild.mjs +597 -0
- package/src/index.mjs +90 -0
- package/src/inject-context.mjs +484 -0
- package/src/install.mjs +327 -0
- package/src/lazy-compress.mjs +326 -0
- package/src/lock-discipline.mjs +166 -0
- package/src/mcp-server.mjs +498 -0
- package/src/memory-write.mjs +565 -0
- package/src/merge-facts.mjs +213 -0
- package/src/observe-edit.mjs +87 -0
- package/src/platform-commands.mjs +138 -0
- package/src/poison-guard.mjs +245 -0
- package/src/privacy.mjs +21 -0
- package/src/provenance.mjs +217 -0
- package/src/register-crons.mjs +354 -0
- package/src/reindex.mjs +134 -0
- package/src/repair.mjs +316 -0
- package/src/result-shapes.mjs +155 -0
- package/src/review-queue.mjs +345 -0
- package/src/roll.mjs +115 -0
- package/src/scratchpad.mjs +335 -0
- package/src/search.mjs +311 -0
- package/src/subcommands.mjs +1252 -0
- package/src/tier-paths.mjs +74 -0
- package/src/transcripts.mjs +234 -0
- package/src/trust.mjs +226 -0
- package/src/weekly-curate.mjs +454 -0
- package/src/write-fact.mjs +205 -0
- package/template/.claude/hooks/pre-tool-memory.js +78 -0
- package/template/.claude/hooks/transcript-capture.js +69 -0
- package/template/.claude/settings.json +27 -0
- package/template/.claude/skills/memory-write/SKILL.md +117 -0
- package/template/.gitignore.fragment +12 -0
- package/template/CLAUDE.md.template +49 -0
- package/template/docs/journey/journey-log.md.template +292 -0
- package/template/local/machine-paths.md.template +37 -0
- package/template/local/overrides.md.template +36 -0
- package/template/project/.index/.gitkeep +0 -0
- package/template/project/MEMORY.md.template +47 -0
- package/template/project/SOUL.md.template +35 -0
- package/template/project/memory/INDEX.md.template +47 -0
- package/template/project/memory/archive/superseded/.gitkeep +0 -0
- package/template/project/memory/archive/tombstones/.gitkeep +0 -0
- package/template/project/queues/.gitkeep +0 -0
- package/template/project/sessions/.gitkeep +0 -0
- package/template/project/transcripts/.gitkeep +0 -0
- package/template/support/cron-jobs/daily-memory-distill.md +15 -0
- package/template/support/cron-jobs/nightly-memsearch-index.md +17 -0
- package/template/support/cron-jobs/weekly-memory-curator.md +15 -0
- package/template/support/milvus-deploy/README.md +57 -0
- package/template/support/milvus-deploy/docker-compose.yml +66 -0
- package/template/support/scripts/auto-extract-memory.sh +102 -0
- package/template/support/scripts/memsearch-index-with-flush.sh +59 -0
- package/template/support/scripts/refresh-distill-timestamp.py +35 -0
- package/template/support/scripts/register-crons.py +242 -0
- package/template/support/scripts/run-daily-distill.sh +67 -0
- package/template/support/scripts/run-weekly-curate.sh +58 -0
- package/template/user/HABITS.md.template +18 -0
- package/template/user/LESSONS.md.template +18 -0
- package/template/user/USER.md.template +18 -0
- package/template/user/fragments/INDEX.md.template +23 -0
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
// Bounded scratchpad writer (Task 12, T-010). First Layer 3 module.
|
|
2
|
+
// First real consumer of the shared modules established at PR-2.
|
|
3
|
+
//
|
|
4
|
+
// Public boundary: appendScratchpadBullet(opts) → result.
|
|
5
|
+
// See design §2.1 + §4 + tasks.md 12.1-12.5.
|
|
6
|
+
//
|
|
7
|
+
// Uses shared modules per CLAUDE.md "Shared modules" rule:
|
|
8
|
+
// tier-paths.mjs — VALID_TIERS, SCRATCHPADS_BY_TIER, DEFAULT_SCRATCHPAD_CAPS,
|
|
9
|
+
// resolveTierRoot, resolveScratchpadPath
|
|
10
|
+
// audit-log.mjs — appendAuditEntry, nowIso, AUDIT_LOG_SCHEMA_VERSION
|
|
11
|
+
// result-shapes.mjs — ERROR_CATEGORIES, errorResult
|
|
12
|
+
// @lh8ppl/cmk-canonicalize — generateId (citation IDs derived from the bullet text)
|
|
13
|
+
//
|
|
14
|
+
// Frontmatter (HTML-comment provenance below the bullet) is hand-formatted
|
|
15
|
+
// inline for v0.1. Task 13 (Provenance frontmatter writer + reader) will
|
|
16
|
+
// extract this to a shared `writeBullet(text, provenance)` primitive that
|
|
17
|
+
// this module will call instead. The handoff is clean: format stays identical;
|
|
18
|
+
// only the location of the formatter moves.
|
|
19
|
+
|
|
20
|
+
import { existsSync, readFileSync, writeFileSync } from 'node:fs';
|
|
21
|
+
import { generateId } from '@lh8ppl/cmk-canonicalize';
|
|
22
|
+
import {
|
|
23
|
+
VALID_TIERS,
|
|
24
|
+
SCRATCHPADS_BY_TIER,
|
|
25
|
+
DEFAULT_SCRATCHPAD_CAPS,
|
|
26
|
+
resolveTierRoot,
|
|
27
|
+
resolveScratchpadPath,
|
|
28
|
+
} from './tier-paths.mjs';
|
|
29
|
+
import { appendAuditEntry, nowIso, REASON_CODES } from './audit-log.mjs';
|
|
30
|
+
import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
|
|
31
|
+
import { writeBullet, parseBulletProvenance } from './provenance.mjs';
|
|
32
|
+
|
|
33
|
+
const VALID_TRUST = new Set(['high', 'medium', 'low']);
|
|
34
|
+
const VALID_WRITE_SOURCES = new Set([
|
|
35
|
+
'user-explicit',
|
|
36
|
+
'auto-extract',
|
|
37
|
+
'compressor',
|
|
38
|
+
'manual-edit',
|
|
39
|
+
'imported',
|
|
40
|
+
]);
|
|
41
|
+
// Per Task 13.2 / provenance.mjs: 6 comment fields. `id` comes from the
|
|
42
|
+
// bullet line and is added by appendScratchpadBullet, not from caller.
|
|
43
|
+
const REQUIRED_PROVENANCE_FIELDS = [
|
|
44
|
+
'source',
|
|
45
|
+
'source_line',
|
|
46
|
+
'sha1',
|
|
47
|
+
'write',
|
|
48
|
+
'trust',
|
|
49
|
+
'at',
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
const CONSOLIDATION_TRIGGER_RATIO = 0.95;
|
|
53
|
+
const STALE_AFTER_DAYS = 14;
|
|
54
|
+
|
|
55
|
+
function validateOptions(opts) {
|
|
56
|
+
const errors = [];
|
|
57
|
+
|
|
58
|
+
if (!opts.tier) {
|
|
59
|
+
errors.push("tier: required, one of 'U', 'P', 'L'");
|
|
60
|
+
} else if (!VALID_TIERS.has(opts.tier)) {
|
|
61
|
+
errors.push(`tier: must be 'U', 'P', or 'L' (got ${JSON.stringify(opts.tier)})`);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (!opts.scratchpad) {
|
|
65
|
+
errors.push('scratchpad: required, one of the documented scratchpad filenames');
|
|
66
|
+
} else if (opts.tier && VALID_TIERS.has(opts.tier)) {
|
|
67
|
+
const allowed = SCRATCHPADS_BY_TIER[opts.tier];
|
|
68
|
+
if (!allowed.has(opts.scratchpad)) {
|
|
69
|
+
errors.push(
|
|
70
|
+
`scratchpad: ${opts.scratchpad} is not valid for tier ${opts.tier} (allowed: ${[...allowed].join(', ')})`,
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (!opts.section || typeof opts.section !== 'string') {
|
|
76
|
+
errors.push('section: required, non-empty string (must match a `## <section>` heading in the file)');
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (opts.text == null || typeof opts.text !== 'string' || !opts.text.trim()) {
|
|
80
|
+
errors.push('text: required, non-empty string');
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (!opts.provenance || typeof opts.provenance !== 'object') {
|
|
84
|
+
errors.push('provenance: required object with source/source_line/sha1/write/trust/at');
|
|
85
|
+
} else {
|
|
86
|
+
for (const f of REQUIRED_PROVENANCE_FIELDS) {
|
|
87
|
+
if (opts.provenance[f] === undefined || opts.provenance[f] === null || opts.provenance[f] === '') {
|
|
88
|
+
errors.push(`provenance.${f}: required, non-empty`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
if (opts.provenance.trust && !VALID_TRUST.has(opts.provenance.trust)) {
|
|
92
|
+
errors.push(`provenance.trust: must be one of high/medium/low (got ${JSON.stringify(opts.provenance.trust)})`);
|
|
93
|
+
}
|
|
94
|
+
if (opts.provenance.write && !VALID_WRITE_SOURCES.has(opts.provenance.write)) {
|
|
95
|
+
errors.push(
|
|
96
|
+
`provenance.write: must be one of user-explicit/auto-extract/compressor/manual-edit/imported (got ${JSON.stringify(opts.provenance.write)})`,
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return errors;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Bullet formatting is delegated to provenance.mjs's writeBullet (Task 13).
|
|
105
|
+
// scratchpad.mjs is responsible for "where the bullet goes" (which file,
|
|
106
|
+
// which section, cap enforcement); provenance.mjs is responsible for
|
|
107
|
+
// "what the bullet+comment look like on disk".
|
|
108
|
+
function formatBullet({ id, text, provenance }) {
|
|
109
|
+
const result = writeBullet({ id, text, provenance });
|
|
110
|
+
if (result.action !== 'formatted') {
|
|
111
|
+
// Shouldn't happen — we already validated above, but be defensive.
|
|
112
|
+
throw new Error(
|
|
113
|
+
`scratchpad.formatBullet: writeBullet returned ${result.action}: ${result.errors?.join('; ') ?? 'unknown'}`,
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
return result.lines;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function readJsonIfExists(path) {
|
|
120
|
+
if (!existsSync(path)) return null;
|
|
121
|
+
try {
|
|
122
|
+
return JSON.parse(readFileSync(path, 'utf8'));
|
|
123
|
+
} catch {
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function resolveCap({ tier, scratchpad, projectRoot, userDir, settings }) {
|
|
129
|
+
// Test-injected settings short-circuit.
|
|
130
|
+
if (settings) {
|
|
131
|
+
return (
|
|
132
|
+
settings?.scratchpads?.[scratchpad]?.max_chars ??
|
|
133
|
+
DEFAULT_SCRATCHPAD_CAPS[scratchpad]
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
// Project tier wins (only meaningful for tiers P + L which share a project root).
|
|
137
|
+
if (tier === 'P' || tier === 'L') {
|
|
138
|
+
const projectSettings = readJsonIfExists(
|
|
139
|
+
resolveScratchpadPath({
|
|
140
|
+
tier: 'P',
|
|
141
|
+
scratchpad: 'settings.json',
|
|
142
|
+
projectRoot,
|
|
143
|
+
}),
|
|
144
|
+
);
|
|
145
|
+
const projectCap = projectSettings?.scratchpads?.[scratchpad]?.max_chars;
|
|
146
|
+
if (typeof projectCap === 'number') return projectCap;
|
|
147
|
+
}
|
|
148
|
+
// User tier fallback.
|
|
149
|
+
const userSettings = readJsonIfExists(
|
|
150
|
+
resolveScratchpadPath({
|
|
151
|
+
tier: 'U',
|
|
152
|
+
scratchpad: 'settings.json',
|
|
153
|
+
userDir,
|
|
154
|
+
}),
|
|
155
|
+
);
|
|
156
|
+
const userCap = userSettings?.scratchpads?.[scratchpad]?.max_chars;
|
|
157
|
+
if (typeof userCap === 'number') return userCap;
|
|
158
|
+
// Hardcoded default.
|
|
159
|
+
return DEFAULT_SCRATCHPAD_CAPS[scratchpad];
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function findSectionRange(lines, sectionTitle) {
|
|
163
|
+
const startIdx = lines.findIndex(
|
|
164
|
+
(l) => l.trim() === `## ${sectionTitle}`,
|
|
165
|
+
);
|
|
166
|
+
if (startIdx === -1) return null;
|
|
167
|
+
let endIdx = lines.findIndex(
|
|
168
|
+
(l, i) => i > startIdx && /^##\s/.test(l),
|
|
169
|
+
);
|
|
170
|
+
if (endIdx === -1) endIdx = lines.length;
|
|
171
|
+
return { startIdx, endIdx };
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function insertIntoSection(text, sectionTitle, bullet) {
|
|
175
|
+
const lines = text.split('\n');
|
|
176
|
+
const range = findSectionRange(lines, sectionTitle);
|
|
177
|
+
if (!range) return null;
|
|
178
|
+
// Insert before the next `## ` heading; skip trailing blank lines so the
|
|
179
|
+
// new bullet sits cleanly at the end of this section's content.
|
|
180
|
+
let insertAt = range.endIdx;
|
|
181
|
+
while (insertAt > range.startIdx + 1 && lines[insertAt - 1].trim() === '') {
|
|
182
|
+
insertAt--;
|
|
183
|
+
}
|
|
184
|
+
// Preserve a blank line after the new bullet pair when there's content
|
|
185
|
+
// beyond it (the next heading).
|
|
186
|
+
const bulletLines = bullet.split('\n');
|
|
187
|
+
lines.splice(insertAt, 0, ...bulletLines);
|
|
188
|
+
return lines.join('\n');
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function consolidate(text, { nowDate }) {
|
|
192
|
+
const lines = text.split('\n');
|
|
193
|
+
const removeIdx = new Set();
|
|
194
|
+
const staleCutoff = new Date(nowDate.getTime() - STALE_AFTER_DAYS * 24 * 60 * 60 * 1000);
|
|
195
|
+
let bulletsRemoved = 0;
|
|
196
|
+
|
|
197
|
+
for (let i = 0; i < lines.length - 1; i++) {
|
|
198
|
+
if (removeIdx.has(i)) continue;
|
|
199
|
+
const bulletLine = lines[i];
|
|
200
|
+
const commentLine = lines[i + 1];
|
|
201
|
+
if (!bulletLine.startsWith('- (')) continue;
|
|
202
|
+
if (!commentLine || !/^\s*<!--.*-->\s*$/.test(commentLine)) continue;
|
|
203
|
+
|
|
204
|
+
const prov = parseBulletProvenance(commentLine);
|
|
205
|
+
if (!prov || !prov.at || !prov.trust) continue;
|
|
206
|
+
if (prov.trust === 'high') continue; // Preserve high-trust regardless of age.
|
|
207
|
+
|
|
208
|
+
const at = new Date(prov.at);
|
|
209
|
+
if (Number.isNaN(at.getTime())) continue;
|
|
210
|
+
if (at >= staleCutoff) continue; // <14d → keep
|
|
211
|
+
|
|
212
|
+
removeIdx.add(i);
|
|
213
|
+
removeIdx.add(i + 1);
|
|
214
|
+
bulletsRemoved++;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
if (removeIdx.size === 0) {
|
|
218
|
+
return { text, bulletsRemoved: 0 };
|
|
219
|
+
}
|
|
220
|
+
const out = lines.filter((_, i) => !removeIdx.has(i)).join('\n');
|
|
221
|
+
return { text: out, bulletsRemoved };
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
export function appendScratchpadBullet(opts = {}) {
|
|
225
|
+
const errors = validateOptions(opts);
|
|
226
|
+
if (errors.length > 0) {
|
|
227
|
+
return errorResult({
|
|
228
|
+
category: ERROR_CATEGORIES.SCHEMA,
|
|
229
|
+
errors,
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const {
|
|
234
|
+
tier,
|
|
235
|
+
scratchpad,
|
|
236
|
+
section,
|
|
237
|
+
text,
|
|
238
|
+
provenance,
|
|
239
|
+
projectRoot,
|
|
240
|
+
userDir,
|
|
241
|
+
now,
|
|
242
|
+
settings,
|
|
243
|
+
} = opts;
|
|
244
|
+
|
|
245
|
+
const tierRoot = resolveTierRoot({ tier, projectRoot, userDir });
|
|
246
|
+
const path = resolveScratchpadPath({ tier, scratchpad, projectRoot, userDir });
|
|
247
|
+
|
|
248
|
+
if (!existsSync(path)) {
|
|
249
|
+
return errorResult({
|
|
250
|
+
category: ERROR_CATEGORIES.NOT_FOUND,
|
|
251
|
+
errors: [
|
|
252
|
+
`scratchpad file does not exist at ${path} — run \`cmk install\` (project tier) or \`cmk init-user-tier\` (user tier) first`,
|
|
253
|
+
],
|
|
254
|
+
path,
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const original = readFileSync(path, 'utf8');
|
|
259
|
+
const id = opts.id ?? generateId(tier, text);
|
|
260
|
+
const cap = resolveCap({ tier, scratchpad, projectRoot, userDir, settings });
|
|
261
|
+
const bullet = formatBullet({ id, text, provenance });
|
|
262
|
+
|
|
263
|
+
// 1. Build candidate file content (bullet inserted into section)
|
|
264
|
+
const candidate = insertIntoSection(original, section, bullet);
|
|
265
|
+
if (candidate === null) {
|
|
266
|
+
return errorResult({
|
|
267
|
+
category: ERROR_CATEGORIES.SCHEMA,
|
|
268
|
+
errors: [
|
|
269
|
+
`section "${section}" not found in ${scratchpad} (expected a "## ${section}" heading)`,
|
|
270
|
+
],
|
|
271
|
+
path,
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// 2. Cap check: would the write push to >95%? If yes, consolidate.
|
|
276
|
+
let consolidationRan = false;
|
|
277
|
+
let bulletsConsolidated = 0;
|
|
278
|
+
let finalContent = candidate;
|
|
279
|
+
const candidateBytes = Buffer.byteLength(candidate, 'utf8');
|
|
280
|
+
|
|
281
|
+
if (candidateBytes > cap * CONSOLIDATION_TRIGGER_RATIO) {
|
|
282
|
+
consolidationRan = true;
|
|
283
|
+
const nowDate = new Date(now ?? nowIso());
|
|
284
|
+
const consolidated = consolidate(candidate, { nowDate });
|
|
285
|
+
bulletsConsolidated = consolidated.bulletsRemoved;
|
|
286
|
+
finalContent = consolidated.text;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// 3. Post-consolidation cap check
|
|
290
|
+
const finalBytes = Buffer.byteLength(finalContent, 'utf8');
|
|
291
|
+
if (finalBytes > cap) {
|
|
292
|
+
// File untouched. The original on-disk content is preserved verbatim.
|
|
293
|
+
return errorResult({
|
|
294
|
+
category: ERROR_CATEGORIES.CAP_EXCEEDED,
|
|
295
|
+
errors: [
|
|
296
|
+
`scratchpad cap exceeded: ${finalBytes} bytes would exceed cap of ${cap} bytes for ${scratchpad} (consolidator dropped ${bulletsConsolidated} bullet(s), still over). No silent truncation; resolve by raising the cap in settings.json or manually distilling.`,
|
|
297
|
+
],
|
|
298
|
+
path,
|
|
299
|
+
cap,
|
|
300
|
+
bytes: finalBytes,
|
|
301
|
+
consolidationRan,
|
|
302
|
+
bulletsConsolidated,
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// 4. Write + audit
|
|
307
|
+
writeFileSync(path, finalContent, 'utf8');
|
|
308
|
+
const ts = now ?? nowIso();
|
|
309
|
+
appendAuditEntry(tierRoot, {
|
|
310
|
+
ts,
|
|
311
|
+
action: 'appended',
|
|
312
|
+
tier,
|
|
313
|
+
id,
|
|
314
|
+
reasonCode: REASON_CODES.SCRATCHPAD_APPEND,
|
|
315
|
+
paths: { after: path },
|
|
316
|
+
extra: {
|
|
317
|
+
scratchpad,
|
|
318
|
+
section,
|
|
319
|
+
cap,
|
|
320
|
+
bytes: finalBytes,
|
|
321
|
+
consolidationRan,
|
|
322
|
+
bulletsConsolidated,
|
|
323
|
+
},
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
return {
|
|
327
|
+
action: 'appended',
|
|
328
|
+
id,
|
|
329
|
+
path,
|
|
330
|
+
cap,
|
|
331
|
+
bytes: finalBytes,
|
|
332
|
+
consolidationRan,
|
|
333
|
+
bulletsConsolidated,
|
|
334
|
+
};
|
|
335
|
+
}
|
package/src/search.mjs
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
// `cmk search` query engine (Task 30, T-026).
|
|
2
|
+
//
|
|
3
|
+
// Composes on top of:
|
|
4
|
+
// - index-db.mjs (Task 28) — observations + observations_fts schema
|
|
5
|
+
// - index-rebuild.mjs (Task 29) — populates the index
|
|
6
|
+
// - result-shapes.mjs — ERROR_CATEGORIES, errorResult
|
|
7
|
+
//
|
|
8
|
+
// Three search modes per design §9.3:
|
|
9
|
+
//
|
|
10
|
+
// keyword FTS5 BM25 over the body / heading_path / write_source columns.
|
|
11
|
+
// ~100ms for 10k bullets. Always available — the keyword
|
|
12
|
+
// backend ships in v0.1.0 with no extra install.
|
|
13
|
+
//
|
|
14
|
+
// semantic memsearch + Milvus (Layer 5b — optional install). The kit
|
|
15
|
+
// does NOT ship memsearch in v0.1.0; this mode errors with
|
|
16
|
+
// ERROR_CATEGORIES.SEMANTIC_UNAVAILABLE when the caller
|
|
17
|
+
// requests it without injecting a semantic backend. NO silent
|
|
18
|
+
// fallback to keyword — design §9.3's explicit "exit 2 when
|
|
19
|
+
// not installed" contract.
|
|
20
|
+
//
|
|
21
|
+
// hybrid Reciprocal-rank fusion of keyword + semantic, 0.5/0.5
|
|
22
|
+
// weight per design §9.3. Requires the semantic backend.
|
|
23
|
+
// Errors the same way when semantic is unavailable.
|
|
24
|
+
//
|
|
25
|
+
// Filter flags (per tasks.md 30.4):
|
|
26
|
+
// minTrust: 'low' | 'medium' | 'high' — uses ordinal compare
|
|
27
|
+
// tier: 'U' | 'P' | 'L' — exact match
|
|
28
|
+
// since: ISO 8601 string — `created_at >= since`
|
|
29
|
+
// limit: positive integer — default 20
|
|
30
|
+
// includeTombstoned: boolean — default false
|
|
31
|
+
// (default WHERE excludes rows with deleted_at IS NOT NULL)
|
|
32
|
+
//
|
|
33
|
+
// Public boundary:
|
|
34
|
+
// search({db, query, mode?, minTrust?, tier?, since?, limit?,
|
|
35
|
+
// includeTombstoned?, semanticBackend?})
|
|
36
|
+
// → { action: 'found', mode, results: [{id, snippet, source_file,
|
|
37
|
+
// source_line, trust, score}] }
|
|
38
|
+
// → errorResult({category, errors}) on semantic-unavailable / schema-error
|
|
39
|
+
//
|
|
40
|
+
// `semanticBackend` is a dependency-injection hook for testing the
|
|
41
|
+
// hybrid + semantic paths. Production callers (the `cmk search` CLI in
|
|
42
|
+
// subcommands.mjs) pass undefined; v0.1.x lands the real backend.
|
|
43
|
+
|
|
44
|
+
import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
|
|
45
|
+
import { VALID_TIERS } from './tier-paths.mjs';
|
|
46
|
+
|
|
47
|
+
export const SEARCH_MODES = Object.freeze({
|
|
48
|
+
KEYWORD: 'keyword',
|
|
49
|
+
SEMANTIC: 'semantic',
|
|
50
|
+
HYBRID: 'hybrid',
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
export const DEFAULT_LIMIT = 20;
|
|
54
|
+
const MAX_LIMIT = 1000;
|
|
55
|
+
|
|
56
|
+
const TRUST_ORDINAL = Object.freeze({
|
|
57
|
+
low: 1,
|
|
58
|
+
medium: 2,
|
|
59
|
+
high: 3,
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
// Reciprocal-rank fusion constant per design §9.3 (k=60 is the
|
|
63
|
+
// standard RRF default from the IR literature; smaller k weights the
|
|
64
|
+
// top results more heavily).
|
|
65
|
+
const RRF_K = 60;
|
|
66
|
+
|
|
67
|
+
// --- Validation -------------------------------------------------------
|
|
68
|
+
|
|
69
|
+
function validateInput(opts) {
|
|
70
|
+
const errors = [];
|
|
71
|
+
if (!opts.db || typeof opts.db.prepare !== 'function') {
|
|
72
|
+
errors.push('db: required, better-sqlite3 Database instance');
|
|
73
|
+
}
|
|
74
|
+
if (
|
|
75
|
+
typeof opts.query !== 'string' ||
|
|
76
|
+
opts.query.trim().length === 0
|
|
77
|
+
) {
|
|
78
|
+
errors.push('query: required, non-empty string');
|
|
79
|
+
}
|
|
80
|
+
const mode = opts.mode ?? SEARCH_MODES.KEYWORD;
|
|
81
|
+
if (
|
|
82
|
+
mode !== SEARCH_MODES.KEYWORD &&
|
|
83
|
+
mode !== SEARCH_MODES.SEMANTIC &&
|
|
84
|
+
mode !== SEARCH_MODES.HYBRID
|
|
85
|
+
) {
|
|
86
|
+
errors.push(`mode: must be one of keyword/semantic/hybrid (got ${JSON.stringify(mode)})`);
|
|
87
|
+
}
|
|
88
|
+
if (opts.minTrust !== undefined && !TRUST_ORDINAL[opts.minTrust]) {
|
|
89
|
+
errors.push(`minTrust: must be one of low/medium/high (got ${JSON.stringify(opts.minTrust)})`);
|
|
90
|
+
}
|
|
91
|
+
if (opts.tier !== undefined && !VALID_TIERS.has(opts.tier)) {
|
|
92
|
+
errors.push(`tier: must be one of U/P/L (got ${JSON.stringify(opts.tier)})`);
|
|
93
|
+
}
|
|
94
|
+
if (opts.since !== undefined) {
|
|
95
|
+
const t = Date.parse(opts.since);
|
|
96
|
+
if (!Number.isFinite(t)) {
|
|
97
|
+
errors.push(`since: must be an ISO 8601 date string (got ${JSON.stringify(opts.since)})`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
if (opts.limit !== undefined) {
|
|
101
|
+
if (
|
|
102
|
+
!Number.isInteger(opts.limit) ||
|
|
103
|
+
opts.limit <= 0 ||
|
|
104
|
+
opts.limit > MAX_LIMIT
|
|
105
|
+
) {
|
|
106
|
+
errors.push(`limit: must be a positive integer ≤ ${MAX_LIMIT}`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return { errors, mode };
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// --- Keyword (FTS5 BM25) backend --------------------------------------
|
|
113
|
+
|
|
114
|
+
const KEYWORD_BASE_SQL = `
|
|
115
|
+
SELECT
|
|
116
|
+
o.id AS id,
|
|
117
|
+
o.body AS body,
|
|
118
|
+
o.heading_path AS heading_path,
|
|
119
|
+
o.source_file AS source_file,
|
|
120
|
+
o.source_line AS source_line,
|
|
121
|
+
o.tier AS tier,
|
|
122
|
+
o.trust AS trust,
|
|
123
|
+
o.created_at AS created_at,
|
|
124
|
+
o.deleted_at AS deleted_at,
|
|
125
|
+
observations_fts.rank AS score,
|
|
126
|
+
snippet(observations_fts, 0, '<b>', '</b>', '...', 16) AS snippet
|
|
127
|
+
FROM observations_fts
|
|
128
|
+
JOIN observations o ON o.rowid = observations_fts.rowid
|
|
129
|
+
WHERE observations_fts MATCH @query
|
|
130
|
+
`;
|
|
131
|
+
|
|
132
|
+
function buildKeywordSql(opts) {
|
|
133
|
+
const clauses = [];
|
|
134
|
+
const params = { query: opts.query };
|
|
135
|
+
if (opts.tier !== undefined) {
|
|
136
|
+
clauses.push('o.tier = @tier');
|
|
137
|
+
params.tier = opts.tier;
|
|
138
|
+
}
|
|
139
|
+
if (opts.minTrust !== undefined) {
|
|
140
|
+
// SQLite has no enum-ordinal builtin; CASE WHEN translates the
|
|
141
|
+
// string trust to its numeric rank, then compares.
|
|
142
|
+
clauses.push(`
|
|
143
|
+
CASE o.trust
|
|
144
|
+
WHEN 'high' THEN 3
|
|
145
|
+
WHEN 'medium' THEN 2
|
|
146
|
+
WHEN 'low' THEN 1
|
|
147
|
+
ELSE 0
|
|
148
|
+
END >= @min_trust_ord
|
|
149
|
+
`);
|
|
150
|
+
params.min_trust_ord = TRUST_ORDINAL[opts.minTrust];
|
|
151
|
+
}
|
|
152
|
+
if (opts.since !== undefined) {
|
|
153
|
+
clauses.push('o.created_at >= @since_ms');
|
|
154
|
+
params.since_ms = Date.parse(opts.since);
|
|
155
|
+
}
|
|
156
|
+
if (!opts.includeTombstoned) {
|
|
157
|
+
clauses.push('o.deleted_at IS NULL');
|
|
158
|
+
}
|
|
159
|
+
const where = clauses.length > 0 ? ' AND ' + clauses.join(' AND ') : '';
|
|
160
|
+
const sql =
|
|
161
|
+
KEYWORD_BASE_SQL + where + ' ORDER BY observations_fts.rank LIMIT @limit';
|
|
162
|
+
params.limit = opts.limit ?? DEFAULT_LIMIT;
|
|
163
|
+
return { sql, params };
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// FTS5 parse errors aren't validation errors — they're query-syntax
|
|
167
|
+
// errors thrown by SQLite when the user's query violates FTS5 grammar
|
|
168
|
+
// (e.g., `"user-explicit"` parses as `user AND NOT explicit` because
|
|
169
|
+
// `-` is the NOT operator; `"AND"` / `"OR"` are reserved; `"foo:bar"`
|
|
170
|
+
// treats `foo` as a column name and crashes if no such column exists).
|
|
171
|
+
// The kit's `cmk search "user-explicit"` is a realistic user query —
|
|
172
|
+
// the kit's own `write_source` enum value uses that exact string —
|
|
173
|
+
// so the error must surface as a clean schema-error result, NOT as an
|
|
174
|
+
// uncaught SqliteError stack trace. Surfaced by the Task 30 code-review
|
|
175
|
+
// as Important finding I1.
|
|
176
|
+
class FTS5ParseError extends Error {
|
|
177
|
+
constructor(originalError, query) {
|
|
178
|
+
super(`FTS5 parse error on query ${JSON.stringify(query)}: ${originalError.message}`);
|
|
179
|
+
this.name = 'FTS5ParseError';
|
|
180
|
+
this.originalError = originalError;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function runKeywordSearch(db, opts) {
|
|
185
|
+
const { sql, params } = buildKeywordSql(opts);
|
|
186
|
+
let rows;
|
|
187
|
+
try {
|
|
188
|
+
rows = db.prepare(sql).all(params);
|
|
189
|
+
} catch (err) {
|
|
190
|
+
// FTS5's parser surfaces grammar violations as SqliteError. Recognize
|
|
191
|
+
// the documented FTS5-specific messages and re-throw as our typed
|
|
192
|
+
// class so the caller (`search()`) can translate to a schema-error
|
|
193
|
+
// result with a user-friendly hint.
|
|
194
|
+
if (
|
|
195
|
+
err?.code === 'SQLITE_ERROR' ||
|
|
196
|
+
/fts5:|no such column:/i.test(err?.message ?? '')
|
|
197
|
+
) {
|
|
198
|
+
throw new FTS5ParseError(err, opts.query);
|
|
199
|
+
}
|
|
200
|
+
throw err;
|
|
201
|
+
}
|
|
202
|
+
return rows.map((r) => ({
|
|
203
|
+
id: r.id,
|
|
204
|
+
snippet: r.snippet ?? r.body,
|
|
205
|
+
source_file: r.source_file,
|
|
206
|
+
source_line: r.source_line,
|
|
207
|
+
tier: r.tier,
|
|
208
|
+
trust: r.trust,
|
|
209
|
+
score: r.score,
|
|
210
|
+
}));
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// --- Reciprocal-rank fusion (hybrid mode) -----------------------------
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Reciprocal-rank fusion of two ranked result lists into one. Design
|
|
217
|
+
* §9.3 specifies 0.5/0.5 weight; standard RRF formula is
|
|
218
|
+
* fused_score(d) = sum over backends b of: weight_b / (k + rank_b(d))
|
|
219
|
+
* where rank starts at 1 for the top hit. Documents missing from one
|
|
220
|
+
* backend contribute 0 from that backend.
|
|
221
|
+
*
|
|
222
|
+
* Exported for direct unit-test in isolation (the production search()
|
|
223
|
+
* call composes this with the keyword + semantic backends).
|
|
224
|
+
*/
|
|
225
|
+
export function reciprocalRankFusion({
|
|
226
|
+
keywordResults,
|
|
227
|
+
semanticResults,
|
|
228
|
+
keywordWeight = 0.5,
|
|
229
|
+
semanticWeight = 0.5,
|
|
230
|
+
k = RRF_K,
|
|
231
|
+
}) {
|
|
232
|
+
const scores = new Map(); // id → fused score
|
|
233
|
+
const byId = new Map(); // id → result object (first-seen wins for snippet/source)
|
|
234
|
+
|
|
235
|
+
keywordResults.forEach((r, i) => {
|
|
236
|
+
const rank = i + 1;
|
|
237
|
+
const inc = keywordWeight / (k + rank);
|
|
238
|
+
scores.set(r.id, (scores.get(r.id) ?? 0) + inc);
|
|
239
|
+
if (!byId.has(r.id)) byId.set(r.id, r);
|
|
240
|
+
});
|
|
241
|
+
semanticResults.forEach((r, i) => {
|
|
242
|
+
const rank = i + 1;
|
|
243
|
+
const inc = semanticWeight / (k + rank);
|
|
244
|
+
scores.set(r.id, (scores.get(r.id) ?? 0) + inc);
|
|
245
|
+
if (!byId.has(r.id)) byId.set(r.id, r);
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
const fused = [...scores.entries()]
|
|
249
|
+
.map(([id, score]) => ({ ...byId.get(id), score }))
|
|
250
|
+
.sort((a, b) => b.score - a.score);
|
|
251
|
+
return fused;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// --- Public boundary --------------------------------------------------
|
|
255
|
+
|
|
256
|
+
export function search(opts = {}) {
|
|
257
|
+
const { errors, mode } = validateInput(opts);
|
|
258
|
+
if (errors.length > 0) {
|
|
259
|
+
return errorResult({ category: ERROR_CATEGORIES.SCHEMA, errors });
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Semantic + hybrid require an injected backend. Production v0.1.0
|
|
263
|
+
// passes undefined → error with the install-memsearch hint. v0.1.x
|
|
264
|
+
// wires the real backend.
|
|
265
|
+
if (mode === SEARCH_MODES.SEMANTIC || mode === SEARCH_MODES.HYBRID) {
|
|
266
|
+
if (typeof opts.semanticBackend !== 'function') {
|
|
267
|
+
return errorResult({
|
|
268
|
+
category: ERROR_CATEGORIES.SEMANTIC_UNAVAILABLE,
|
|
269
|
+
errors: [
|
|
270
|
+
'memsearch not installed — install via the Layer 5b install path. ' +
|
|
271
|
+
'Use --mode=keyword for the always-available FTS5 search.',
|
|
272
|
+
],
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
let results;
|
|
278
|
+
try {
|
|
279
|
+
if (mode === SEARCH_MODES.KEYWORD) {
|
|
280
|
+
results = runKeywordSearch(opts.db, opts);
|
|
281
|
+
} else if (mode === SEARCH_MODES.SEMANTIC) {
|
|
282
|
+
// The semantic backend is an injected callable returning the same
|
|
283
|
+
// shape as runKeywordSearch (array of {id, snippet, source_file,
|
|
284
|
+
// source_line, tier, trust, score}).
|
|
285
|
+
results = opts.semanticBackend(opts);
|
|
286
|
+
} else {
|
|
287
|
+
// hybrid: run both backends + fuse.
|
|
288
|
+
const keywordResults = runKeywordSearch(opts.db, opts);
|
|
289
|
+
const semanticResults = opts.semanticBackend(opts);
|
|
290
|
+
const fused = reciprocalRankFusion({
|
|
291
|
+
keywordResults,
|
|
292
|
+
semanticResults,
|
|
293
|
+
});
|
|
294
|
+
results = fused.slice(0, opts.limit ?? DEFAULT_LIMIT);
|
|
295
|
+
}
|
|
296
|
+
} catch (err) {
|
|
297
|
+
if (err instanceof FTS5ParseError) {
|
|
298
|
+
return errorResult({
|
|
299
|
+
category: ERROR_CATEGORIES.SCHEMA,
|
|
300
|
+
errors: [
|
|
301
|
+
`query: FTS5 parse error — ${err.originalError.message}. ` +
|
|
302
|
+
'Try wrapping the query in double quotes for phrase mode ' +
|
|
303
|
+
'(e.g., `cmk search \'"user-explicit"\'`).',
|
|
304
|
+
],
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
throw err;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
return { action: 'found', mode, results };
|
|
311
|
+
}
|