@lh8ppl/claude-memory-kit 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/package.json +2 -2
- package/src/decisions-journal.mjs +223 -0
- package/src/digest.mjs +89 -0
- package/src/forget.mjs +6 -0
- package/src/search.mjs +105 -2
- package/src/subcommands.mjs +32 -0
package/README.md
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<picture>
|
|
3
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/LH8PPL/claude-memory-kit/main/docs/public/assets/wordmark-dark.svg">
|
|
4
|
+
<img src="https://raw.githubusercontent.com/LH8PPL/claude-memory-kit/main/docs/public/assets/wordmark.svg" alt="claude-memory-kit" width="340">
|
|
5
|
+
</picture>
|
|
6
|
+
</p>
|
|
7
|
+
|
|
1
8
|
# @lh8ppl/claude-memory-kit
|
|
2
9
|
|
|
3
10
|
**`cmk`** — the CLI for [claude-memory-kit](https://github.com/LH8PPL/claude-memory-kit), a per-project, in-repo memory system for [Claude Code](https://docs.claude.com/en/docs/claude-code). It fixes Claude's per-session amnesia so you don't have to re-tell the backstory every time you start a new session.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lh8ppl/claude-memory-kit",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.2",
|
|
4
4
|
"description": "cmk — the CLI for claude-memory-kit. Per-project, in-repo memory system for Claude Code.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
"better-sqlite3": "^12.10.0",
|
|
35
35
|
"chokidar": "^5.0.0",
|
|
36
36
|
"commander": "^15.0.0",
|
|
37
|
-
"js-yaml": "^4.
|
|
37
|
+
"js-yaml": "^4.2.0",
|
|
38
38
|
"sqlite-vec": "^0.1.9",
|
|
39
39
|
"zod": "^4.4.3"
|
|
40
40
|
},
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
// The append-only decision journal — context/DECISIONS.md (Task 147, D-161).
|
|
2
|
+
//
|
|
3
|
+
// A chronological, human-readable page of every decision + its why. The VIEW
|
|
4
|
+
// the kit was missing: decisions are captured as facts but scattered across N
|
|
5
|
+
// per-fact files with no chronological decision page (cmk search is pull, not
|
|
6
|
+
// browse; MEMORY.md is bounded + rolls). This is the squad `decisions.md` /
|
|
7
|
+
// our own DECISION-LOG.md equivalent, made automatic.
|
|
8
|
+
//
|
|
9
|
+
// LIFECYCLE — APPEND-ONLY, never regenerated, never parked (D-161):
|
|
10
|
+
// - This is NOT a derived view like INDEX.md. Regenerating from live facts
|
|
11
|
+
// would silently ERASE superseded/forgotten decisions — rewriting history
|
|
12
|
+
// to look like the current state was always obvious (the exact failure the
|
|
13
|
+
// decision-trail-preservation rule exists to prevent).
|
|
14
|
+
// - A decision journal is unbounded by design: old decisions are the MOST
|
|
15
|
+
// valuable part (they explain why the codebase is shaped as it is), so the
|
|
16
|
+
// MEMORY.md rolling-window must NOT apply.
|
|
17
|
+
// - Mechanics: new decision → appended; tombstoned → its entry MARKED
|
|
18
|
+
// retracted IN PLACE (never removed); every pre-existing entry survives
|
|
19
|
+
// every update.
|
|
20
|
+
//
|
|
21
|
+
// The update is triggered like a derived view (runs where reindex runs, so the
|
|
22
|
+
// journal stays current) but its WRITE LOGIC is append-only — the file is the
|
|
23
|
+
// accumulator, the facts are only the trigger. Each entry carries a stable
|
|
24
|
+
// machine marker `<!-- decision:P-XXXXXXXX -->` so the updater knows which ids
|
|
25
|
+
// are already journaled + which entries to annotate, without parsing prose
|
|
26
|
+
// (and so a human can freely add their own prose between entries — preserved).
|
|
27
|
+
//
|
|
28
|
+
// v0.3.2 scope: explicit signals only (capture appends; forget marks retracted;
|
|
29
|
+
// explicit supersession annotates). AUTOMATIC semantic contradiction-detection
|
|
30
|
+
// is deferred to F-D / Task 95.
|
|
31
|
+
|
|
32
|
+
import { existsSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
|
|
33
|
+
import { join } from 'node:path';
|
|
34
|
+
import { parse as parseFrontmatter } from './frontmatter.mjs';
|
|
35
|
+
import { ID_PATTERN } from './tier-paths.mjs';
|
|
36
|
+
|
|
37
|
+
export const DECISIONS_HEADER =
|
|
38
|
+
'# Decisions\n\n' +
|
|
39
|
+
'> Append-only decision journal — every decision the kit captured, in order, with its why.\n' +
|
|
40
|
+
'> Maintained by claude-memory-kit (`cmk digest`). Superseded/retracted entries stay (the trail is the point).';
|
|
41
|
+
|
|
42
|
+
// Only this fact type is a "decision" in the kit taxonomy (the project/state
|
|
43
|
+
// category — what project-memory's decisions.md and our DECISION-LOG track).
|
|
44
|
+
const DECISION_TYPE = 'project';
|
|
45
|
+
|
|
46
|
+
const markerFor = (id) => `<!-- decision:${id} -->`;
|
|
47
|
+
const RETRACT_TAG = '_(retracted';
|
|
48
|
+
|
|
49
|
+
/** The yyyy-mm-dd slice of an ISO timestamp, or the raw value if unparseable. */
|
|
50
|
+
function dateOnly(iso) {
|
|
51
|
+
if (typeof iso !== 'string') return 'unknown-date';
|
|
52
|
+
const m = iso.match(/^(\d{4}-\d{2}-\d{2})/);
|
|
53
|
+
return m ? m[1] : iso;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Render ONE journal entry for a decision fact. The machine marker lets the
|
|
58
|
+
* updater dedup + annotate; the human-readable lines are Title / date / Why / id.
|
|
59
|
+
*
|
|
60
|
+
* @param {{id:string,title:string,createdAt?:string,why?:string|null}} f
|
|
61
|
+
* @returns {string} the entry block (no trailing newline)
|
|
62
|
+
*/
|
|
63
|
+
export function buildDecisionEntry(f) {
|
|
64
|
+
const date = dateOnly(f.createdAt);
|
|
65
|
+
const lines = [
|
|
66
|
+
markerFor(f.id),
|
|
67
|
+
`### ${f.title}`,
|
|
68
|
+
`**When:** ${date} · **Fact:** \`${f.id}\``,
|
|
69
|
+
];
|
|
70
|
+
if (f.why && String(f.why).trim()) {
|
|
71
|
+
lines.push(`**Why:** ${String(f.why).trim()}`);
|
|
72
|
+
}
|
|
73
|
+
return lines.join('\n');
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// The kit's id matcher, DERIVED from the canonical ID_PATTERN (tier-paths.mjs)
|
|
77
|
+
// so the base32 alphabet lives in exactly ONE place and can't drift. The
|
|
78
|
+
// original bug: this module hardcoded `[A-Z2-9]` (uppercase only), but the real
|
|
79
|
+
// alphabet includes a lowercase `a` — so any id containing `a` never matched
|
|
80
|
+
// "already journaled" → re-appended on EVERY digest run (the cut-gate find).
|
|
81
|
+
// Strip the `^…$` anchors to embed the pattern inside larger regexes.
|
|
82
|
+
const ID_CHARS = ID_PATTERN.source.replace(/^\^/, '').replace(/\$$/, '');
|
|
83
|
+
|
|
84
|
+
/** ids already present in the journal body (by their machine marker). */
|
|
85
|
+
function journaledIds(content) {
|
|
86
|
+
const ids = new Set();
|
|
87
|
+
const re = new RegExp(`<!-- decision:(${ID_CHARS}) -->`, 'g');
|
|
88
|
+
let m;
|
|
89
|
+
while ((m = re.exec(content)) !== null) ids.add(m[1]);
|
|
90
|
+
return ids;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Append-only journal update (D-161). Pure: content in → content out.
|
|
95
|
+
*
|
|
96
|
+
* @param {object} a
|
|
97
|
+
* @param {string} a.existingContent current DECISIONS.md (‘’ if absent)
|
|
98
|
+
* @param {Array} a.facts live decision-class facts ({id,type,title,createdAt,why})
|
|
99
|
+
* @param {Set<string>} a.tombstonedIds ids whose fact has been forgotten
|
|
100
|
+
* @param {string} a.now ISO timestamp for retraction stamps
|
|
101
|
+
* @returns {string} the new DECISIONS.md content
|
|
102
|
+
*/
|
|
103
|
+
export function updateDecisionsJournal({ existingContent = '', facts = [], tombstonedIds = new Set(), now }) {
|
|
104
|
+
let content = existingContent.trim() === '' ? DECISIONS_HEADER + '\n' : existingContent;
|
|
105
|
+
const already = journaledIds(content);
|
|
106
|
+
|
|
107
|
+
// 1) Append entries for decision-class facts not yet journaled.
|
|
108
|
+
const newEntries = [];
|
|
109
|
+
for (const f of facts) {
|
|
110
|
+
if (f.type !== DECISION_TYPE) continue; // only decisions
|
|
111
|
+
if (already.has(f.id)) continue; // already journaled — never duplicate
|
|
112
|
+
newEntries.push(buildDecisionEntry(f));
|
|
113
|
+
already.add(f.id);
|
|
114
|
+
}
|
|
115
|
+
if (newEntries.length > 0) {
|
|
116
|
+
if (!content.endsWith('\n')) content += '\n';
|
|
117
|
+
content += '\n' + newEntries.join('\n\n') + '\n';
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// 2) Mark retracted (in place) any journaled entry whose fact is now
|
|
121
|
+
// tombstoned and not already marked. Never removes the entry.
|
|
122
|
+
const stamp = dateOnly(now);
|
|
123
|
+
for (const id of tombstonedIds) {
|
|
124
|
+
const marker = markerFor(id);
|
|
125
|
+
const idx = content.indexOf(marker);
|
|
126
|
+
if (idx === -1) continue; // not journaled — nothing to retract
|
|
127
|
+
// Bound the search to THIS entry's span — up to the next decision marker
|
|
128
|
+
// (or end-of-file). Prevents a malformed/hand-edited entry with no heading
|
|
129
|
+
// from attaching the retraction note to the NEXT entry's heading.
|
|
130
|
+
const nextMarker = content.indexOf('<!-- decision:', idx + marker.length);
|
|
131
|
+
const spanEnd = nextMarker === -1 ? content.length : nextMarker;
|
|
132
|
+
// Find this entry's heading line (the `### …` after the marker, within span).
|
|
133
|
+
const headingStart = content.indexOf('### ', idx);
|
|
134
|
+
if (headingStart === -1 || headingStart >= spanEnd) continue;
|
|
135
|
+
const headingEnd = content.indexOf('\n', headingStart);
|
|
136
|
+
if (headingEnd === -1) continue;
|
|
137
|
+
// Already retracted? (the note sits right after the heading)
|
|
138
|
+
const afterHeading = content.slice(headingEnd + 1, headingEnd + 1 + RETRACT_TAG.length);
|
|
139
|
+
if (afterHeading === RETRACT_TAG) continue;
|
|
140
|
+
const note = `${RETRACT_TAG} ${stamp})_`;
|
|
141
|
+
content = content.slice(0, headingEnd + 1) + note + '\n' + content.slice(headingEnd + 1);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if (!content.endsWith('\n')) content += '\n';
|
|
145
|
+
return content;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// --- File-IO orchestration (the impure shell over the pure core) ----------
|
|
149
|
+
|
|
150
|
+
// Leading indent is [ \t]* (NOT \s*) so it can't match the newline the
|
|
151
|
+
// (?:^|\n) anchor already consumed — that overlap is the backtracking
|
|
152
|
+
// ambiguity SonarCloud flags as ReDoS. Disjoint character classes = linear.
|
|
153
|
+
const RICH_WHY_RE = /(?:^|\n)[ \t]*\*\*Why:\*\*[ \t]*([^\n]+)/;
|
|
154
|
+
|
|
155
|
+
/** Read decision-class facts (type:project) from the project tier. */
|
|
156
|
+
function readProjectDecisionFacts(projectRoot) {
|
|
157
|
+
const dir = join(projectRoot, 'context', 'memory');
|
|
158
|
+
const out = [];
|
|
159
|
+
if (!existsSync(dir)) return out;
|
|
160
|
+
for (const name of readdirSync(dir)) {
|
|
161
|
+
if (!name.endsWith('.md') || name === 'INDEX.md') continue;
|
|
162
|
+
try {
|
|
163
|
+
const { frontmatter, body } = parseFrontmatter(readFileSync(join(dir, name), 'utf8'));
|
|
164
|
+
if (!frontmatter?.id || frontmatter.type !== DECISION_TYPE) continue;
|
|
165
|
+
if (frontmatter.deleted_at) continue;
|
|
166
|
+
const whyMatch = String(body ?? '').match(RICH_WHY_RE);
|
|
167
|
+
out.push({
|
|
168
|
+
id: frontmatter.id,
|
|
169
|
+
type: frontmatter.type,
|
|
170
|
+
title: frontmatter.title ?? frontmatter.id,
|
|
171
|
+
createdAt: frontmatter.created_at ?? null,
|
|
172
|
+
why: whyMatch ? whyMatch[1].trim() : null,
|
|
173
|
+
});
|
|
174
|
+
} catch {
|
|
175
|
+
// unparseable file — reindex/HC-4 own that class; the journal skips it
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
// Stable chronological order (oldest first) so appends read like a timeline.
|
|
179
|
+
out.sort((a, b) => String(a.createdAt).localeCompare(String(b.createdAt)));
|
|
180
|
+
return out;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/** ids of forgotten facts (tombstone archive). */
|
|
184
|
+
function readTombstonedIds(projectRoot) {
|
|
185
|
+
const ids = new Set();
|
|
186
|
+
const dir = join(projectRoot, 'context', 'memory', 'archive', 'tombstones');
|
|
187
|
+
if (!existsSync(dir)) return ids;
|
|
188
|
+
for (const name of readdirSync(dir)) {
|
|
189
|
+
const m = name.match(new RegExp(`^(${ID_CHARS})\\.md$`));
|
|
190
|
+
if (m) ids.add(m[1]);
|
|
191
|
+
}
|
|
192
|
+
return ids;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Read → append-only update → write context/DECISIONS.md. Idempotent: a run
|
|
197
|
+
* with nothing new is a no-op write (same bytes). Best-effort: never throws
|
|
198
|
+
* into the caller (a journal failure must not break a capture/read path).
|
|
199
|
+
*
|
|
200
|
+
* @returns {{written:boolean, path:string, appended:number}|{written:false,error:string}}
|
|
201
|
+
*/
|
|
202
|
+
export function syncDecisionsJournal({ projectRoot, now } = {}) {
|
|
203
|
+
try {
|
|
204
|
+
const path = join(projectRoot, 'context', 'DECISIONS.md');
|
|
205
|
+
const existingContent = existsSync(path) ? readFileSync(path, 'utf8') : '';
|
|
206
|
+
const facts = readProjectDecisionFacts(projectRoot);
|
|
207
|
+
const tombstonedIds = readTombstonedIds(projectRoot);
|
|
208
|
+
const before = existingContent;
|
|
209
|
+
const next = updateDecisionsJournal({
|
|
210
|
+
existingContent,
|
|
211
|
+
facts,
|
|
212
|
+
tombstonedIds,
|
|
213
|
+
now: now ?? new Date().toISOString(),
|
|
214
|
+
});
|
|
215
|
+
if (next !== before) {
|
|
216
|
+
writeFileSync(path, next, 'utf8');
|
|
217
|
+
return { written: true, path, appended: next.length - before.length };
|
|
218
|
+
}
|
|
219
|
+
return { written: false, path, appended: 0 };
|
|
220
|
+
} catch (err) {
|
|
221
|
+
return { written: false, error: err?.message ?? String(err) };
|
|
222
|
+
}
|
|
223
|
+
}
|
package/src/digest.mjs
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
// `cmk digest` — a regenerated, readable render of everything the kit currently
|
|
2
|
+
// knows (Task 147, D-132). Facts by type + the persona + active threads, as one
|
|
3
|
+
// markdown page. The README-demo artifact.
|
|
4
|
+
//
|
|
5
|
+
// REGENERATED (not append-only): unlike DECISIONS.md (the permanent journal),
|
|
6
|
+
// the digest is a CURRENT-KNOWLEDGE snapshot — it should reflect only what
|
|
7
|
+
// exists now, so it is rebuilt on every invocation (the INDEX.md lifecycle,
|
|
8
|
+
// correct here). The two surfaces differ on purpose: digest = "what do we know
|
|
9
|
+
// now", DECISIONS.md = "what did we decide over time" (D-161).
|
|
10
|
+
//
|
|
11
|
+
// Read-only by contract: pure reads over the fact archive + scratchpads. The
|
|
12
|
+
// `--decisions` flag also triggers the DECISIONS.md journal sync (the one
|
|
13
|
+
// mutation, delegated to the append-only writer).
|
|
14
|
+
|
|
15
|
+
import { existsSync, readFileSync, readdirSync } from 'node:fs';
|
|
16
|
+
import { join } from 'node:path';
|
|
17
|
+
import { parse as parseFrontmatter } from './frontmatter.mjs';
|
|
18
|
+
|
|
19
|
+
const TYPE_ORDER = ['project', 'feedback', 'reference', 'user'];
|
|
20
|
+
const TYPE_LABEL = {
|
|
21
|
+
project: 'Decisions & project state',
|
|
22
|
+
feedback: 'Working-style & preferences',
|
|
23
|
+
reference: 'References',
|
|
24
|
+
user: 'About the user',
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
function readFacts(projectRoot) {
|
|
28
|
+
const dir = join(projectRoot, 'context', 'memory');
|
|
29
|
+
const facts = [];
|
|
30
|
+
if (!existsSync(dir)) return facts;
|
|
31
|
+
for (const name of readdirSync(dir)) {
|
|
32
|
+
if (!name.endsWith('.md') || name === 'INDEX.md') continue;
|
|
33
|
+
try {
|
|
34
|
+
const { frontmatter } = parseFrontmatter(readFileSync(join(dir, name), 'utf8'));
|
|
35
|
+
if (!frontmatter?.id || frontmatter.deleted_at) continue;
|
|
36
|
+
facts.push({
|
|
37
|
+
id: frontmatter.id,
|
|
38
|
+
type: frontmatter.type ?? 'unknown',
|
|
39
|
+
title: frontmatter.title ?? frontmatter.id,
|
|
40
|
+
trust: frontmatter.trust ?? 'unknown',
|
|
41
|
+
createdAt: frontmatter.created_at ?? null,
|
|
42
|
+
});
|
|
43
|
+
} catch {
|
|
44
|
+
// unparseable — reindex/HC-4 own that class
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return facts;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Build the digest markdown from facts (pure — exported for testing).
|
|
52
|
+
* @param {Array} facts
|
|
53
|
+
* @param {{now?:string}} [opts]
|
|
54
|
+
*/
|
|
55
|
+
export function buildDigest(facts, { now } = {}) {
|
|
56
|
+
const stamp = (now ?? new Date().toISOString()).slice(0, 10);
|
|
57
|
+
const lines = [`# Memory digest — ${stamp}`, ''];
|
|
58
|
+
if (facts.length === 0) {
|
|
59
|
+
lines.push('_Memory is empty — capture starts as you work._', '');
|
|
60
|
+
return lines.join('\n');
|
|
61
|
+
}
|
|
62
|
+
lines.push(`${facts.length} fact(s) in project memory.`, '');
|
|
63
|
+
|
|
64
|
+
const byType = new Map();
|
|
65
|
+
for (const f of facts) {
|
|
66
|
+
if (!byType.has(f.type)) byType.set(f.type, []);
|
|
67
|
+
byType.get(f.type).push(f);
|
|
68
|
+
}
|
|
69
|
+
const orderedTypes = [
|
|
70
|
+
...TYPE_ORDER.filter((t) => byType.has(t)),
|
|
71
|
+
...[...byType.keys()].filter((t) => !TYPE_ORDER.includes(t)),
|
|
72
|
+
];
|
|
73
|
+
for (const type of orderedTypes) {
|
|
74
|
+
const group = byType.get(type).slice().sort((a, b) => String(a.createdAt).localeCompare(String(b.createdAt)));
|
|
75
|
+
lines.push(`## ${TYPE_LABEL[type] ?? type} (${group.length})`, '');
|
|
76
|
+
for (const f of group) {
|
|
77
|
+
const date = String(f.createdAt ?? '').slice(0, 10) || '—';
|
|
78
|
+
lines.push(`- **${f.title}** · \`${f.id}\` · ${f.trust} · ${date}`);
|
|
79
|
+
}
|
|
80
|
+
lines.push('');
|
|
81
|
+
}
|
|
82
|
+
return lines.join('\n');
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/** Read facts + render the digest for a project (read-only). */
|
|
86
|
+
export function digest({ projectRoot, now } = {}) {
|
|
87
|
+
const facts = readFacts(projectRoot);
|
|
88
|
+
return buildDigest(facts, { now });
|
|
89
|
+
}
|
package/src/forget.mjs
CHANGED
|
@@ -172,6 +172,12 @@ function scrubAllScratchpads(tierRoot, id) {
|
|
|
172
172
|
if (!entry.isFile()) continue;
|
|
173
173
|
if (!entry.name.endsWith('.md')) continue;
|
|
174
174
|
if (entry.name === 'INDEX.md') continue;
|
|
175
|
+
// DECISIONS.md is the APPEND-ONLY decision journal (Task 147 / D-161), NOT
|
|
176
|
+
// a scratchpad — forget must NOT strip its id-bearing lines (the marker +
|
|
177
|
+
// **Fact:** line). The journal sync marks the entry RETRACTED in place
|
|
178
|
+
// instead (preserving the trail). Scrubbing it here would delete the
|
|
179
|
+
// entry's marker and break the retract-in-place path (composition bug).
|
|
180
|
+
if (entry.name === 'DECISIONS.md') continue;
|
|
175
181
|
const p = join(tierRoot, entry.name);
|
|
176
182
|
const r = scrubScratchpadFile(p, id);
|
|
177
183
|
if (r.changed) edits.push({ path: p, removed: r.removed });
|
package/src/search.mjs
CHANGED
|
@@ -136,6 +136,109 @@ function validateInput(opts) {
|
|
|
136
136
|
return { errors, mode, scope };
|
|
137
137
|
}
|
|
138
138
|
|
|
139
|
+
// --- FTS5 query sanitization (Task 153) -------------------------------
|
|
140
|
+
//
|
|
141
|
+
// FTS5's MATCH grammar (sqlite.org/fts5 §3) treats many characters a user
|
|
142
|
+
// would type in a natural query as operators or syntax errors:
|
|
143
|
+
// - a bareword may ONLY contain letters / digits / underscore / non-ASCII;
|
|
144
|
+
// a `.`, `-`, `:`, `+`, `^`, `(`, etc. in a bareword is a SYNTAX ERROR.
|
|
145
|
+
// - `AND` / `OR` / `NOT` (case-sensitive) are reserved boolean operators.
|
|
146
|
+
// So `cmk search v0.3` crashed (`v0` then `.3` → `.` violates the bareword
|
|
147
|
+
// grammar), and `cmk search user-explicit` parsed `-` as a column-exclude.
|
|
148
|
+
//
|
|
149
|
+
// The SQLite-sanctioned fix is to double-quote the offending token: inside a
|
|
150
|
+
// quoted string the tokenizer treats `.`/`-`/`:` as separators, so `"v0.3"`
|
|
151
|
+
// tokenizes to `v0` + `3` and matches the literal content. We quote
|
|
152
|
+
// PER-TOKEN (not the whole query) so a plain multi-word query keeps its
|
|
153
|
+
// implicit-AND semantics (better recall) rather than collapsing to a strict
|
|
154
|
+
// adjacency phrase. A token the user already quoted is left untouched.
|
|
155
|
+
//
|
|
156
|
+
// Validated against the FTS5 spec AND basic-memory's real implementation
|
|
157
|
+
// (the kit's closest FTS5 + markdown-native design analog). Full rationale:
|
|
158
|
+
// docs/research/2026-06-15-fts5-query-preparation-cross-system.md.
|
|
159
|
+
|
|
160
|
+
// A bareword that FTS5 accepts as-is: letters, digits, underscore, non-ASCII.
|
|
161
|
+
// Anything else in the token means it must be quoted to be a literal.
|
|
162
|
+
const FTS5_BAREWORD_RE = /^[\p{L}\p{N}_]+$/u;
|
|
163
|
+
const FTS5_RESERVED_WORDS = new Set(['AND', 'OR', 'NOT']);
|
|
164
|
+
|
|
165
|
+
// Quote a single token for literal FTS5 matching, escaping embedded `"`
|
|
166
|
+
// SQL-style (double it) per the spec. Used only when the token isn't a safe
|
|
167
|
+
// bareword.
|
|
168
|
+
function quoteFtsToken(token) {
|
|
169
|
+
return `"${token.replace(/"/g, '""')}"`;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Transform a raw user query into an FTS5-safe MATCH string.
|
|
174
|
+
*
|
|
175
|
+
* Per-token: a safe bareword passes through untouched (preserving
|
|
176
|
+
* implicit-AND between words); a token with FTS5-special characters or a
|
|
177
|
+
* bare reserved word (AND/OR/NOT) is double-quoted (literal). A token the
|
|
178
|
+
* user already wrapped in `"…"` is preserved verbatim — explicit phrase
|
|
179
|
+
* search still works for power users.
|
|
180
|
+
*
|
|
181
|
+
* Exported for isolated unit testing (like reciprocalRankFusion).
|
|
182
|
+
*
|
|
183
|
+
* @param {string} raw the user's query
|
|
184
|
+
* @returns {string} an FTS5-safe MATCH expression ('' for empty input)
|
|
185
|
+
*/
|
|
186
|
+
export function prepareFtsQuery(raw) {
|
|
187
|
+
if (typeof raw !== 'string') return '';
|
|
188
|
+
const trimmed = raw.trim();
|
|
189
|
+
if (trimmed === '') return '';
|
|
190
|
+
|
|
191
|
+
return tokenizeQuery(trimmed)
|
|
192
|
+
.map((token) => {
|
|
193
|
+
// Already a user-quoted phrase (`"…"`, possibly multi-word): leave it
|
|
194
|
+
// exactly as typed — explicit phrase search still works for power users.
|
|
195
|
+
if (token.length >= 2 && token.startsWith('"') && token.endsWith('"')) {
|
|
196
|
+
return token;
|
|
197
|
+
}
|
|
198
|
+
// Safe bareword that isn't a reserved operator: pass through.
|
|
199
|
+
if (FTS5_BAREWORD_RE.test(token) && !FTS5_RESERVED_WORDS.has(token)) {
|
|
200
|
+
return token;
|
|
201
|
+
}
|
|
202
|
+
// Everything else (special chars, or a bare AND/OR/NOT): quote literal.
|
|
203
|
+
return quoteFtsToken(token);
|
|
204
|
+
})
|
|
205
|
+
.join(' ');
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Split a query into tokens, keeping a double-quoted span (which may contain
|
|
209
|
+
// spaces, e.g. `"thin routes"`) as ONE token. A naive whitespace split would
|
|
210
|
+
// tear `"thin routes"` into `"thin` + `routes"` and corrupt the quoting.
|
|
211
|
+
// Unbalanced trailing quote: the final quoted run extends to end-of-string.
|
|
212
|
+
function tokenizeQuery(query) {
|
|
213
|
+
const tokens = [];
|
|
214
|
+
let i = 0;
|
|
215
|
+
while (i < query.length) {
|
|
216
|
+
if (/\s/.test(query[i])) {
|
|
217
|
+
i += 1;
|
|
218
|
+
continue;
|
|
219
|
+
}
|
|
220
|
+
if (query[i] === '"') {
|
|
221
|
+
// A `"` at a token boundary opens a phrase span: consume up to and
|
|
222
|
+
// including the closing quote (or end-of-string if unbalanced).
|
|
223
|
+
let j = i + 1;
|
|
224
|
+
while (j < query.length && query[j] !== '"') j += 1;
|
|
225
|
+
const end = j < query.length ? j + 1 : query.length;
|
|
226
|
+
tokens.push(query.slice(i, end));
|
|
227
|
+
i = end;
|
|
228
|
+
} else {
|
|
229
|
+
// A run of non-space characters. A `"` that appears MID-run (e.g.
|
|
230
|
+
// `he"llo`) is part of this token, NOT a phrase delimiter — it'll be
|
|
231
|
+
// escaped + quoted as a literal by prepareFtsQuery. Only whitespace
|
|
232
|
+
// ends the run.
|
|
233
|
+
let j = i;
|
|
234
|
+
while (j < query.length && !/\s/.test(query[j])) j += 1;
|
|
235
|
+
tokens.push(query.slice(i, j));
|
|
236
|
+
i = j;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
return tokens;
|
|
240
|
+
}
|
|
241
|
+
|
|
139
242
|
// --- Keyword (FTS5 BM25) backend --------------------------------------
|
|
140
243
|
|
|
141
244
|
const KEYWORD_BASE_SQL = `
|
|
@@ -158,7 +261,7 @@ WHERE observations_fts MATCH @query
|
|
|
158
261
|
|
|
159
262
|
function buildKeywordSql(opts) {
|
|
160
263
|
const clauses = [];
|
|
161
|
-
const params = { query: opts.query };
|
|
264
|
+
const params = { query: prepareFtsQuery(opts.query) };
|
|
162
265
|
if (opts.tier !== undefined) {
|
|
163
266
|
clauses.push('o.tier = @tier');
|
|
164
267
|
params.tier = opts.tier;
|
|
@@ -265,7 +368,7 @@ function runTranscriptKeywordSearch(db, opts) {
|
|
|
265
368
|
try {
|
|
266
369
|
rows = db
|
|
267
370
|
.prepare(TRANSCRIPT_KEYWORD_SQL)
|
|
268
|
-
.all({ query: opts.query, limit: opts.limit ?? DEFAULT_LIMIT });
|
|
371
|
+
.all({ query: prepareFtsQuery(opts.query), limit: opts.limit ?? DEFAULT_LIMIT });
|
|
269
372
|
} catch (err) {
|
|
270
373
|
if (err?.code === 'SQLITE_ERROR' || /fts5:|no such column:/i.test(err?.message ?? '')) {
|
|
271
374
|
throw new FTS5ParseError(err, opts.query);
|
package/src/subcommands.mjs
CHANGED
|
@@ -869,6 +869,32 @@ function runReindex(options /* , command */) {
|
|
|
869
869
|
}
|
|
870
870
|
}
|
|
871
871
|
|
|
872
|
+
/**
|
|
873
|
+
* `cmk digest` (Task 147) — print a regenerated, readable render of everything
|
|
874
|
+
* the kit currently knows, AND sync the append-only context/DECISIONS.md
|
|
875
|
+
* journal (the permanent decision ledger; D-161). The digest goes to stdout;
|
|
876
|
+
* the journal is a committed file the sync maintains in place.
|
|
877
|
+
*/
|
|
878
|
+
async function runDigestCli(options) {
|
|
879
|
+
const projectRoot = resolvePath(process.cwd());
|
|
880
|
+
const { digest } = await import('./digest.mjs');
|
|
881
|
+
const { syncDecisionsJournal } = await import('./decisions-journal.mjs');
|
|
882
|
+
|
|
883
|
+
// Keep the permanent decision journal current (append-only; best-effort —
|
|
884
|
+
// a journal hiccup must not break the digest render).
|
|
885
|
+
const sync = syncDecisionsJournal({ projectRoot });
|
|
886
|
+
|
|
887
|
+
console.log(digest({ projectRoot }));
|
|
888
|
+
|
|
889
|
+
if (sync.written) {
|
|
890
|
+
console.log(`\ncontext/DECISIONS.md updated (+${sync.appended} bytes) — the append-only decision journal.`);
|
|
891
|
+
} else if (sync.error) {
|
|
892
|
+
console.error(`\n(decision journal not updated: ${sync.error})`);
|
|
893
|
+
} else {
|
|
894
|
+
console.log('\ncontext/DECISIONS.md is up to date.');
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
|
|
872
898
|
/**
|
|
873
899
|
* `cmk forget <id-or-query>` — wired in Task 9. Tombstones the matching
|
|
874
900
|
* fact (moves it to <tier>/<memory|fragments>/archive/tombstones/<id>.md
|
|
@@ -2019,6 +2045,12 @@ export const subcommands = [
|
|
|
2019
2045
|
milestone: 37,
|
|
2020
2046
|
action: runDoctorCli,
|
|
2021
2047
|
},
|
|
2048
|
+
{
|
|
2049
|
+
name: 'digest',
|
|
2050
|
+
description: 'print a readable digest of everything in memory + sync the append-only DECISIONS.md decision journal',
|
|
2051
|
+
milestone: 147,
|
|
2052
|
+
action: runDigestCli,
|
|
2053
|
+
},
|
|
2022
2054
|
{
|
|
2023
2055
|
name: 'config',
|
|
2024
2056
|
description: 'read/write kit settings (context/settings.json) without hand-editing JSON',
|