@lh8ppl/claude-memory-kit 0.2.4 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -10
- package/bin/cmk-capture-prompt.mjs +21 -1
- package/package.json +2 -1
- package/src/audit-log.mjs +1 -0
- package/src/auto-drain.mjs +17 -1
- package/src/auto-extract.mjs +72 -16
- package/src/auto-persona.mjs +86 -1
- package/src/capture-prompt.mjs +34 -1
- package/src/capture-turn.mjs +64 -6
- package/src/config-core.mjs +161 -0
- package/src/conflict-queue.mjs +20 -3
- package/src/content-hash.mjs +30 -0
- package/src/doctor.mjs +62 -3
- package/src/forget.mjs +13 -0
- package/src/frontmatter.mjs +4 -1
- package/src/import-anthropic-memory.mjs +25 -1
- package/src/import-claude-md.mjs +333 -0
- package/src/index-db.mjs +39 -0
- package/src/index-rebuild.mjs +48 -4
- package/src/index.mjs +10 -0
- package/src/inject-context.mjs +179 -7
- package/src/install.mjs +180 -1
- package/src/mcp-server.mjs +63 -8
- package/src/memory-health.mjs +229 -0
- package/src/memory-write.mjs +32 -10
- package/src/merge-facts.mjs +12 -0
- package/src/native-binding.mjs +142 -0
- package/src/poison-guard.mjs +55 -0
- package/src/provenance.mjs +4 -0
- package/src/remember-core.mjs +53 -8
- package/src/repair.mjs +20 -3
- package/src/result-shapes.mjs +1 -1
- package/src/scratchpad.mjs +5 -3
- package/src/search.mjs +96 -9
- package/src/semantic-backend.mjs +599 -0
- package/src/settings-hooks.mjs +4 -1
- package/src/subcommands.mjs +359 -42
- package/src/transcript-index.mjs +165 -0
- package/src/turn-tools.mjs +179 -0
- package/src/write-fact.mjs +34 -3
- package/template/.claude/skills/memory-search/SKILL.md +86 -0
- package/template/.gitattributes.fragment +16 -0
- package/template/CLAUDE.md.template +3 -1
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
// `cmk import-claude-md` (Task 142, D-130).
|
|
2
|
+
//
|
|
3
|
+
// Public boundary:
|
|
4
|
+
// async importClaudeMd({projectRoot, file?, now?, dryRun?, acceptAll?, writeFactImpl?})
|
|
5
|
+
// → {action, mode?, reason?, proposals, accepted, skipped, rejected, errors, sourcePath, duration_ms}
|
|
6
|
+
//
|
|
7
|
+
// Onboards a project from the rules file the user already owns (CLAUDE.md,
|
|
8
|
+
// .cursorrules, AGENTS.md, any markdown/plain rules file): parses it into
|
|
9
|
+
// TYPED fact candidates and writes each through writeFact() — the kit's one
|
|
10
|
+
// safe write path. That composition (not re-implementation) is the point:
|
|
11
|
+
// writeFact already gives Poison_Guard screening, home-path sanitization,
|
|
12
|
+
// content-addressed dedup, INDEX reindex, and create-audit. The D-125 bug
|
|
13
|
+
// (import-anthropic hand-rolling its provenance comment and breaking the next
|
|
14
|
+
// reindex) is the precedent this design avoids.
|
|
15
|
+
//
|
|
16
|
+
// Differences from `cmk import-anthropic-memory` (the structural template):
|
|
17
|
+
// - target is the GRANULAR fact archive (context/memory/), not MEMORY.md
|
|
18
|
+
// bullets — rules-file content is durable and typed, not scratchpad;
|
|
19
|
+
// - fact `type` is inferred from the nearest markdown heading
|
|
20
|
+
// (user / feedback / reference, default project);
|
|
21
|
+
// - candidates inside the kit's own managed CLAUDE.md block and inside
|
|
22
|
+
// code fences are never proposed (boilerplate / shell examples).
|
|
23
|
+
//
|
|
24
|
+
// Explicit user action only. Never automatic. `--dry-run` previews; apply
|
|
25
|
+
// requires explicit `--yes` (same confirmation contract as the precedent).
|
|
26
|
+
|
|
27
|
+
import { existsSync, readFileSync, readdirSync } from 'node:fs';
|
|
28
|
+
import { isAbsolute, join } from 'node:path';
|
|
29
|
+
import { canonicalize, generateId } from '@lh8ppl/cmk-canonicalize';
|
|
30
|
+
import { hashContent } from './content-hash.mjs';
|
|
31
|
+
import { appendAuditEntry, nowIso, REASON_CODES } from './audit-log.mjs';
|
|
32
|
+
import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
|
|
33
|
+
import { writeFact } from './write-fact.mjs';
|
|
34
|
+
import { slugifyFact } from './rich-fact.mjs';
|
|
35
|
+
import { sanitizeHomePaths } from './sanitize.mjs';
|
|
36
|
+
import { parse as parseFrontmatter } from './frontmatter.mjs';
|
|
37
|
+
|
|
38
|
+
const DEFAULT_FILE = 'CLAUDE.md';
|
|
39
|
+
const IMPORT_SOURCE = 'claude-md';
|
|
40
|
+
// Below this length a line is noise ("go", "etc."), not a rule.
|
|
41
|
+
const MIN_CANDIDATE_CHARS = 8;
|
|
42
|
+
|
|
43
|
+
const MANAGED_BLOCK_START = /<!--\s*claude-memory-kit:start\b/;
|
|
44
|
+
const MANAGED_BLOCK_END = /<!--\s*claude-memory-kit:end\s*-->/;
|
|
45
|
+
// Linear-time by construction (S5852, the D-128 class): every adjacent
|
|
46
|
+
// pair is disjoint — `[ \t]+` can never donate characters to the `\S` that
|
|
47
|
+
// starts the capture — so the regex engine has no backtracking ambiguity.
|
|
48
|
+
// Captures keep trailing whitespace; every consumer already calls .trim().
|
|
49
|
+
const HEADING = /^(#{1,6})[ \t]+(\S.*)$/;
|
|
50
|
+
const LIST_ITEM = /^[ \t]*(?:[-*+]|\d+[.)])[ \t]+(\S.*)$/;
|
|
51
|
+
const CODE_FENCE = /^\s*(```|~~~)/;
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Infer the kit fact type from the heading a candidate sits under.
|
|
55
|
+
* Heuristic by design — `--dry-run` shows the inferred type so the user can
|
|
56
|
+
* inspect before applying. Order matters: user-profile phrasing wins over the
|
|
57
|
+
* broad rule/style class, and \b on "reference" keeps "Preferences" from
|
|
58
|
+
* matching it.
|
|
59
|
+
*
|
|
60
|
+
* @param {string|null} heading
|
|
61
|
+
* @returns {'user'|'feedback'|'project'|'reference'}
|
|
62
|
+
*/
|
|
63
|
+
export function inferFactType(heading) {
|
|
64
|
+
if (!heading) return 'project';
|
|
65
|
+
const h = String(heading).toLowerCase();
|
|
66
|
+
if (/prefer|about (me|the user)|profile|persona|communicat/.test(h)) return 'user';
|
|
67
|
+
if (/\b(link|reference|resource|url|bookmark)/.test(h)) return 'reference';
|
|
68
|
+
if (/rule|discipline|workflow|convention|anti-pattern|style|verification|review|testing|engineering|working/.test(h)) {
|
|
69
|
+
return 'feedback';
|
|
70
|
+
}
|
|
71
|
+
return 'project';
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Parse a rules file into typed fact candidates.
|
|
76
|
+
*
|
|
77
|
+
* Primary shape: markdown list items (-, *, +, 1.) with the nearest heading
|
|
78
|
+
* as type context. Fallback shape (.cursorrules and other plain-text rules
|
|
79
|
+
* files): when the file has NO list items at all, every non-empty,
|
|
80
|
+
* non-heading line outside code fences is a candidate.
|
|
81
|
+
*
|
|
82
|
+
* Skipped in both shapes: code-fence content (shell examples, not rules) and
|
|
83
|
+
* the kit's own managed CLAUDE.md block (importing our boilerplate back into
|
|
84
|
+
* memory would be noise for every kit user).
|
|
85
|
+
*
|
|
86
|
+
* @param {string} text - the rules-file content.
|
|
87
|
+
* @returns {Array<{text: string, line: number, heading: string|null, type: string}>}
|
|
88
|
+
*/
|
|
89
|
+
export function parseRulesFile(text) {
|
|
90
|
+
const lines = String(text).split(/\r?\n/);
|
|
91
|
+
const bullets = [];
|
|
92
|
+
const plain = [];
|
|
93
|
+
let heading = null;
|
|
94
|
+
let inFence = false;
|
|
95
|
+
let inManagedBlock = false;
|
|
96
|
+
|
|
97
|
+
for (let i = 0; i < lines.length; i++) {
|
|
98
|
+
const line = lines[i];
|
|
99
|
+
if (MANAGED_BLOCK_START.test(line)) {
|
|
100
|
+
inManagedBlock = true;
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
if (inManagedBlock) {
|
|
104
|
+
if (MANAGED_BLOCK_END.test(line)) inManagedBlock = false;
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
if (CODE_FENCE.test(line)) {
|
|
108
|
+
inFence = !inFence;
|
|
109
|
+
continue;
|
|
110
|
+
}
|
|
111
|
+
if (inFence) continue;
|
|
112
|
+
|
|
113
|
+
const h = HEADING.exec(line);
|
|
114
|
+
if (h) {
|
|
115
|
+
heading = h[2].trim();
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const item = { line: i + 1, heading, type: inferFactType(heading) };
|
|
120
|
+
const m = LIST_ITEM.exec(line);
|
|
121
|
+
if (m && m[1].trim().length >= MIN_CANDIDATE_CHARS) {
|
|
122
|
+
bullets.push({ ...item, text: m[1].trim() });
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
const t = line.trim();
|
|
126
|
+
if (!m && t.length >= MIN_CANDIDATE_CHARS && !t.startsWith('<!--')) {
|
|
127
|
+
plain.push({ ...item, text: t });
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return bullets.length > 0 ? bullets : plain;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Canonical forms already present in memory: every MEMORY.md scratchpad
|
|
135
|
+
// bullet + every granular fact body. Imported fact bodies are the bare rule
|
|
136
|
+
// text, so a re-run canonicalize-matches its own first run here.
|
|
137
|
+
function collectExistingCanonical(projectRoot) {
|
|
138
|
+
const existing = new Set();
|
|
139
|
+
const memPath = join(projectRoot, 'context', 'MEMORY.md');
|
|
140
|
+
if (existsSync(memPath)) {
|
|
141
|
+
try {
|
|
142
|
+
for (const line of readFileSync(memPath, 'utf8').split(/\r?\n/)) {
|
|
143
|
+
const m = LIST_ITEM.exec(line);
|
|
144
|
+
if (m) {
|
|
145
|
+
const c = canonicalize(m[1].trim());
|
|
146
|
+
if (c) existing.add(c);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
} catch {
|
|
150
|
+
// best-effort: unreadable scratchpad means no dedup hits from it
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
const factDir = join(projectRoot, 'context', 'memory');
|
|
154
|
+
if (existsSync(factDir)) {
|
|
155
|
+
for (const name of readdirSync(factDir)) {
|
|
156
|
+
if (!name.endsWith('.md') || name === 'INDEX.md') continue;
|
|
157
|
+
try {
|
|
158
|
+
const { body } = parseFrontmatter(readFileSync(join(factDir, name), 'utf8'));
|
|
159
|
+
const c = canonicalize(String(body ?? '').trim());
|
|
160
|
+
if (c) existing.add(c);
|
|
161
|
+
} catch {
|
|
162
|
+
// skip unparseable files; writeFact's own id dedup still backstops
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return existing;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Run the import pipeline.
|
|
171
|
+
*
|
|
172
|
+
* @param {object} opts
|
|
173
|
+
* @param {string} opts.projectRoot
|
|
174
|
+
* @param {string} [opts.file] - rules file, relative to projectRoot or absolute (default CLAUDE.md)
|
|
175
|
+
* @param {string} [opts.now]
|
|
176
|
+
* @param {boolean} [opts.dryRun] - preview proposals; no file modified
|
|
177
|
+
* @param {boolean} [opts.acceptAll] - apply every proposal (the CLI's --yes)
|
|
178
|
+
* @param {Function} [opts.writeFactImpl] - test seam (default: the real writeFact)
|
|
179
|
+
* @returns {Promise<object>}
|
|
180
|
+
*/
|
|
181
|
+
export async function importClaudeMd({
|
|
182
|
+
projectRoot,
|
|
183
|
+
file,
|
|
184
|
+
now,
|
|
185
|
+
dryRun = false,
|
|
186
|
+
acceptAll = false,
|
|
187
|
+
writeFactImpl = writeFact,
|
|
188
|
+
} = {}) {
|
|
189
|
+
const ts = now ?? nowIso();
|
|
190
|
+
const t0 = Date.now();
|
|
191
|
+
|
|
192
|
+
if (!projectRoot) {
|
|
193
|
+
return errorResult({
|
|
194
|
+
category: ERROR_CATEGORIES.MISSING_PROJECT_ROOT,
|
|
195
|
+
errors: ['projectRoot is required'],
|
|
196
|
+
duration_ms: Date.now() - t0,
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const fileRel = file && String(file).trim() ? String(file).trim() : DEFAULT_FILE;
|
|
201
|
+
const sourcePath = isAbsolute(fileRel) ? fileRel : join(projectRoot, fileRel);
|
|
202
|
+
const done = (extra) => ({
|
|
203
|
+
action: 'completed',
|
|
204
|
+
proposals: [],
|
|
205
|
+
accepted: 0,
|
|
206
|
+
skipped: 0,
|
|
207
|
+
rejected: 0,
|
|
208
|
+
errors: 0,
|
|
209
|
+
sourcePath,
|
|
210
|
+
duration_ms: Date.now() - t0,
|
|
211
|
+
...extra,
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
if (!existsSync(sourcePath)) return done({ reason: 'no-source' });
|
|
215
|
+
|
|
216
|
+
let sourceText;
|
|
217
|
+
try {
|
|
218
|
+
sourceText = readFileSync(sourcePath, 'utf8');
|
|
219
|
+
} catch (err) {
|
|
220
|
+
return done({ errors: 1, reason: `read-source-failed: ${err?.message ?? err}` });
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const existingCanonical = collectExistingCanonical(projectRoot);
|
|
224
|
+
const tierRoot = join(projectRoot, 'context');
|
|
225
|
+
const proposals = [];
|
|
226
|
+
let skipped = 0;
|
|
227
|
+
// Dry-run / requires-confirmation must not touch ANY file — including the
|
|
228
|
+
// audit log. Skip entries are only audited when the user actually applied.
|
|
229
|
+
const auditSkips = acceptAll && !dryRun;
|
|
230
|
+
|
|
231
|
+
for (const candidate of parseRulesFile(sourceText)) {
|
|
232
|
+
// Sanitize BEFORE canonicalizing so the dedup key matches what writeFact
|
|
233
|
+
// actually lands on disk (it ids the sanitized body).
|
|
234
|
+
const sanitized = sanitizeHomePaths(candidate.text);
|
|
235
|
+
const canonical = canonicalize(sanitized);
|
|
236
|
+
if (!canonical) continue;
|
|
237
|
+
const id = generateId('P', sanitized);
|
|
238
|
+
if (existingCanonical.has(canonical)) {
|
|
239
|
+
skipped += 1;
|
|
240
|
+
if (auditSkips) {
|
|
241
|
+
try {
|
|
242
|
+
appendAuditEntry(tierRoot, {
|
|
243
|
+
ts,
|
|
244
|
+
action: 'import',
|
|
245
|
+
tier: 'P',
|
|
246
|
+
id,
|
|
247
|
+
reasonCode: REASON_CODES.IMPORT_SKIPPED_DUPLICATE,
|
|
248
|
+
extra: { source: IMPORT_SOURCE },
|
|
249
|
+
});
|
|
250
|
+
} catch {
|
|
251
|
+
// best-effort — never block the import flow on audit-log failure
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
continue;
|
|
255
|
+
}
|
|
256
|
+
existingCanonical.add(canonical); // same-file duplicates collapse to one proposal
|
|
257
|
+
proposals.push({
|
|
258
|
+
text: candidate.text,
|
|
259
|
+
line: candidate.line,
|
|
260
|
+
heading: candidate.heading,
|
|
261
|
+
type: candidate.type,
|
|
262
|
+
id,
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
if (dryRun) return done({ mode: 'dry-run', proposals, skipped });
|
|
267
|
+
if (!acceptAll && proposals.length > 0) {
|
|
268
|
+
return done({ mode: 'requires-confirmation', proposals, skipped });
|
|
269
|
+
}
|
|
270
|
+
if (proposals.length === 0) return done({ mode: 'apply', skipped });
|
|
271
|
+
|
|
272
|
+
let accepted = 0;
|
|
273
|
+
let rejected = 0;
|
|
274
|
+
let errors = 0;
|
|
275
|
+
// Two distinct rules can share a 60-char slug prefix (slugifyFact caps);
|
|
276
|
+
// the second would hit writeFact's filename-collision error and be lost.
|
|
277
|
+
// De-collide within the run by suffixing the (unique) source line.
|
|
278
|
+
const usedSlugs = new Set();
|
|
279
|
+
// The committed source_file field must never carry a username from an
|
|
280
|
+
// absolute --file argument (the D-51 name-privacy class).
|
|
281
|
+
const sourceFileField = sanitizeHomePaths(fileRel);
|
|
282
|
+
for (const p of proposals) {
|
|
283
|
+
const title = p.text.split('\n')[0].slice(0, 80);
|
|
284
|
+
let slug = slugifyFact(title);
|
|
285
|
+
if (usedSlugs.has(`${p.type}/${slug}`)) slug = `${slug}-l${p.line}`;
|
|
286
|
+
usedSlugs.add(`${p.type}/${slug}`);
|
|
287
|
+
const r = writeFactImpl({
|
|
288
|
+
tier: 'P',
|
|
289
|
+
type: p.type,
|
|
290
|
+
slug,
|
|
291
|
+
title,
|
|
292
|
+
body: p.text,
|
|
293
|
+
writeSource: 'imported',
|
|
294
|
+
trust: 'medium',
|
|
295
|
+
sourceFile: sourceFileField,
|
|
296
|
+
sourceLine: p.line,
|
|
297
|
+
// Content fingerprint for provenance — NOT a security context. Routes
|
|
298
|
+
// through the shared hashContent (SHA-256, D-149); see remember-core.mjs.
|
|
299
|
+
sourceSha1: hashContent(p.text),
|
|
300
|
+
projectRoot,
|
|
301
|
+
// writeFact's default create-audit is replaced by the richer-semantic
|
|
302
|
+
// IMPORT_APPLIED entry below (the merge-facts precedent).
|
|
303
|
+
audit: false,
|
|
304
|
+
});
|
|
305
|
+
if (r.action === 'created') {
|
|
306
|
+
accepted += 1;
|
|
307
|
+
try {
|
|
308
|
+
appendAuditEntry(tierRoot, {
|
|
309
|
+
ts,
|
|
310
|
+
action: 'import',
|
|
311
|
+
tier: 'P',
|
|
312
|
+
id: r.id,
|
|
313
|
+
reasonCode: REASON_CODES.IMPORT_APPLIED,
|
|
314
|
+
paths: { after: r.path },
|
|
315
|
+
extra: { source: IMPORT_SOURCE, trust: 'medium', write_source: 'imported' },
|
|
316
|
+
});
|
|
317
|
+
} catch {
|
|
318
|
+
// best-effort
|
|
319
|
+
}
|
|
320
|
+
} else if (r.action === 'skipped') {
|
|
321
|
+
skipped += 1;
|
|
322
|
+
} else if (r.errorCategory === ERROR_CATEGORIES.POISON_GUARD) {
|
|
323
|
+
// writeFact already logged the rejection to poison-guard.log (Door 4);
|
|
324
|
+
// count it honestly — a rejected secret is not an "error", it's the
|
|
325
|
+
// guard doing its job.
|
|
326
|
+
rejected += 1;
|
|
327
|
+
} else {
|
|
328
|
+
errors += 1;
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
return done({ mode: 'apply', proposals, accepted, skipped, rejected, errors });
|
|
333
|
+
}
|
package/src/index-db.mjs
CHANGED
|
@@ -116,6 +116,45 @@ CREATE TABLE IF NOT EXISTS files (
|
|
|
116
116
|
sha1 TEXT NOT NULL,
|
|
117
117
|
indexed_at INTEGER NOT NULL
|
|
118
118
|
);
|
|
119
|
+
|
|
120
|
+
-- Task 104.2 — the L3 raw tier (D-117). Transcript turn-chunks live in a
|
|
121
|
+
-- SEPARATE table + FTS so the raw tier is searched only when explicitly
|
|
122
|
+
-- asked (search --scope transcripts, the MemPalace last-resort contract)
|
|
123
|
+
-- and never pollutes L1 fact results. Chunks have no id/tier/trust — the
|
|
124
|
+
-- drill-back key is source_file:source_line. IF NOT EXISTS means existing
|
|
125
|
+
-- DBs gain these tables on the first open after upgrade (no migration).
|
|
126
|
+
CREATE TABLE IF NOT EXISTS transcript_chunks (
|
|
127
|
+
source_file TEXT NOT NULL,
|
|
128
|
+
chunk_idx INTEGER NOT NULL,
|
|
129
|
+
source_line INTEGER NOT NULL,
|
|
130
|
+
heading TEXT,
|
|
131
|
+
body TEXT NOT NULL,
|
|
132
|
+
PRIMARY KEY (source_file, chunk_idx)
|
|
133
|
+
);
|
|
134
|
+
|
|
135
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS transcript_chunks_fts USING fts5(
|
|
136
|
+
body, heading,
|
|
137
|
+
content='transcript_chunks',
|
|
138
|
+
content_rowid='rowid',
|
|
139
|
+
tokenize='porter unicode61'
|
|
140
|
+
);
|
|
141
|
+
|
|
142
|
+
CREATE TRIGGER IF NOT EXISTS tch_after_insert AFTER INSERT ON transcript_chunks BEGIN
|
|
143
|
+
INSERT INTO transcript_chunks_fts(rowid, body, heading)
|
|
144
|
+
VALUES (new.rowid, new.body, new.heading);
|
|
145
|
+
END;
|
|
146
|
+
|
|
147
|
+
CREATE TRIGGER IF NOT EXISTS tch_after_update AFTER UPDATE ON transcript_chunks BEGIN
|
|
148
|
+
INSERT INTO transcript_chunks_fts(transcript_chunks_fts, rowid, body, heading)
|
|
149
|
+
VALUES ('delete', old.rowid, old.body, old.heading);
|
|
150
|
+
INSERT INTO transcript_chunks_fts(rowid, body, heading)
|
|
151
|
+
VALUES (new.rowid, new.body, new.heading);
|
|
152
|
+
END;
|
|
153
|
+
|
|
154
|
+
CREATE TRIGGER IF NOT EXISTS tch_after_delete AFTER DELETE ON transcript_chunks BEGIN
|
|
155
|
+
INSERT INTO transcript_chunks_fts(transcript_chunks_fts, rowid, body, heading)
|
|
156
|
+
VALUES ('delete', old.rowid, old.body, old.heading);
|
|
157
|
+
END;
|
|
119
158
|
`;
|
|
120
159
|
|
|
121
160
|
/**
|
package/src/index-rebuild.mjs
CHANGED
|
@@ -42,11 +42,12 @@
|
|
|
42
42
|
// established sources of truth and does NOT re-implement bullet/frontmatter
|
|
43
43
|
// parsing or path resolution.
|
|
44
44
|
|
|
45
|
-
import { createHash } from 'node:crypto';
|
|
46
45
|
import { existsSync, readdirSync, readFileSync, statSync } from 'node:fs';
|
|
47
46
|
import { basename, join, relative } from 'node:path';
|
|
48
47
|
import chokidar from 'chokidar';
|
|
49
48
|
import { INDEX_DB_SCHEMA } from './index-db.mjs';
|
|
49
|
+
import { hashContent } from './content-hash.mjs';
|
|
50
|
+
import { syncTranscriptChunks } from './transcript-index.mjs';
|
|
50
51
|
import { readBullet, parseBulletProvenance } from './provenance.mjs';
|
|
51
52
|
import { parse as parseFrontmatter } from './frontmatter.mjs';
|
|
52
53
|
import {
|
|
@@ -94,8 +95,12 @@ export function listObservationSources({ projectRoot, userDir }) {
|
|
|
94
95
|
|
|
95
96
|
// --- Helpers ----------------------------------------------------------
|
|
96
97
|
|
|
98
|
+
// Content fingerprint for the `files`-table mtime+sha1 diff key. The column
|
|
99
|
+
// name stays `sha1` for checkpoint back-compat; hashContent is SHA-256 (D-149).
|
|
100
|
+
// On the first boot after the algorithm change every checkpoint mismatches
|
|
101
|
+
// once and self-heals via the normal reindex.
|
|
97
102
|
function sha1OfContent(content) {
|
|
98
|
-
return
|
|
103
|
+
return hashContent(content);
|
|
99
104
|
}
|
|
100
105
|
|
|
101
106
|
function isoToEpochMs(iso) {
|
|
@@ -145,7 +150,10 @@ export function parseObservationsFromScratchpad({
|
|
|
145
150
|
projectRoot,
|
|
146
151
|
userDir,
|
|
147
152
|
}) {
|
|
148
|
-
|
|
153
|
+
// Task 139 (D-126): CRLF-tolerant read — autocrlf clones rewrite the
|
|
154
|
+
// committed memory files; a strict-\n split left \r on every line and
|
|
155
|
+
// the bullet/provenance regexes went blind.
|
|
156
|
+
const lines = content.split(/\r?\n/);
|
|
149
157
|
const sha1 = sha1OfContent(content);
|
|
150
158
|
const source_file = relativeSource(path, { projectRoot, userDir });
|
|
151
159
|
const baseName = basename(path);
|
|
@@ -435,6 +443,12 @@ export function reindexBoot({ projectRoot, userDir, db, now }) {
|
|
|
435
443
|
});
|
|
436
444
|
const knownPaths = db.prepare('SELECT path FROM files').all();
|
|
437
445
|
for (const { path: relPath } of knownPaths) {
|
|
446
|
+
// Task 104.2 composition guard: 'transcript:'-prefixed checkpoints
|
|
447
|
+
// belong to the transcript scope (transcript-index.mjs) — they are
|
|
448
|
+
// never in the observation live-set and pruning them here would
|
|
449
|
+
// defeat that scope's checkpoint on every boot. Its own sync prunes
|
|
450
|
+
// its own orphans.
|
|
451
|
+
if (relPath.startsWith('transcript:')) continue;
|
|
438
452
|
if (liveRelPaths.has(relPath)) continue;
|
|
439
453
|
const obsCount = db
|
|
440
454
|
.prepare('SELECT COUNT(*) AS n FROM observations WHERE source_file = ?')
|
|
@@ -443,12 +457,24 @@ export function reindexBoot({ projectRoot, userDir, db, now }) {
|
|
|
443
457
|
}
|
|
444
458
|
}
|
|
445
459
|
|
|
460
|
+
// Task 104.2 — sync the transcript scope (the L3 raw tier) in the same
|
|
461
|
+
// boot pass. Cheap: per-file sha1 checkpoint; best-effort — a transcript
|
|
462
|
+
// sync hiccup must not fail the observation reindex.
|
|
463
|
+
let transcripts = { files: 0, chunks: 0 };
|
|
464
|
+
try {
|
|
465
|
+
transcripts = syncTranscriptChunks({ db, projectRoot, now: ts });
|
|
466
|
+
} catch {
|
|
467
|
+
// best-effort; the next boot retries
|
|
468
|
+
}
|
|
469
|
+
|
|
446
470
|
return {
|
|
447
471
|
filesScanned,
|
|
448
472
|
filesReindexed,
|
|
449
473
|
observationsAffected,
|
|
450
474
|
filesPruned,
|
|
451
475
|
observationsPruned,
|
|
476
|
+
transcriptFiles: transcripts.files,
|
|
477
|
+
transcriptChunks: transcripts.chunks,
|
|
452
478
|
durationMs: Date.now() - t0,
|
|
453
479
|
skipped,
|
|
454
480
|
};
|
|
@@ -464,13 +490,20 @@ export function reindexBoot({ projectRoot, userDir, db, now }) {
|
|
|
464
490
|
export function reindexFull({ projectRoot, userDir, db, now }) {
|
|
465
491
|
const t0 = Date.now();
|
|
466
492
|
const ts = now ?? t0;
|
|
467
|
-
// Drop + recreate (faster than per-row DELETE).
|
|
493
|
+
// Drop + recreate (faster than per-row DELETE). Task 104.2: the transcript
|
|
494
|
+
// scope drops + rebuilds with everything else — `files` carries its
|
|
495
|
+
// checkpoints, so a full reindex must re-chunk from scratch too.
|
|
468
496
|
db.exec(`
|
|
469
497
|
DROP TABLE IF EXISTS observations_fts;
|
|
470
498
|
DROP TRIGGER IF EXISTS obs_after_insert;
|
|
471
499
|
DROP TRIGGER IF EXISTS obs_after_update;
|
|
472
500
|
DROP TRIGGER IF EXISTS obs_after_delete;
|
|
473
501
|
DROP TABLE IF EXISTS observations;
|
|
502
|
+
DROP TABLE IF EXISTS transcript_chunks_fts;
|
|
503
|
+
DROP TRIGGER IF EXISTS tch_after_insert;
|
|
504
|
+
DROP TRIGGER IF EXISTS tch_after_update;
|
|
505
|
+
DROP TRIGGER IF EXISTS tch_after_delete;
|
|
506
|
+
DROP TABLE IF EXISTS transcript_chunks;
|
|
474
507
|
DROP TABLE IF EXISTS files;
|
|
475
508
|
`);
|
|
476
509
|
db.exec(INDEX_DB_SCHEMA);
|
|
@@ -514,9 +547,20 @@ export function reindexFull({ projectRoot, userDir, db, now }) {
|
|
|
514
547
|
observationsAffected += txn(source, sha1);
|
|
515
548
|
}
|
|
516
549
|
|
|
550
|
+
// Task 104.2 — rebuild the transcript scope from scratch (its tables were
|
|
551
|
+
// dropped above). Best-effort, same contract as the boot-path sync.
|
|
552
|
+
let transcripts = { files: 0, chunks: 0 };
|
|
553
|
+
try {
|
|
554
|
+
transcripts = syncTranscriptChunks({ db, projectRoot, now: ts });
|
|
555
|
+
} catch {
|
|
556
|
+
// best-effort; the next reindex retries
|
|
557
|
+
}
|
|
558
|
+
|
|
517
559
|
return {
|
|
518
560
|
filesScanned,
|
|
519
561
|
observationsAffected,
|
|
562
|
+
transcriptFiles: transcripts.files,
|
|
563
|
+
transcriptChunks: transcripts.chunks,
|
|
520
564
|
durationMs: Date.now() - t0,
|
|
521
565
|
skipped,
|
|
522
566
|
};
|
package/src/index.mjs
CHANGED
|
@@ -70,6 +70,16 @@ export function buildProgram() {
|
|
|
70
70
|
childCmd.action(() => sub.action(child.name));
|
|
71
71
|
}
|
|
72
72
|
}
|
|
73
|
+
// Task 129: a parent that has children AND its own action (e.g. `cmk
|
|
74
|
+
// config --show-origin <key>`, handled by the parent while get/set are
|
|
75
|
+
// children) must wire the parent action too — otherwise commander
|
|
76
|
+
// falls to the default "show help, exit 1" on a bare parent invocation
|
|
77
|
+
// with a flag. (Caught by the Task-129 live-test: `--show-origin`
|
|
78
|
+
// printed help instead of running.) Children still take precedence
|
|
79
|
+
// when a subcommand name is given.
|
|
80
|
+
if (typeof sub.action === 'function') {
|
|
81
|
+
cmd.action((...cmdArgs) => sub.action(...cmdArgs));
|
|
82
|
+
}
|
|
73
83
|
} else {
|
|
74
84
|
cmd.action((...cmdArgs) => sub.action(...cmdArgs));
|
|
75
85
|
}
|