@lh8ppl/claude-memory-kit 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +8 -5
  2. package/bin/cmk-auto-extract.mjs +13 -0
  3. package/bin/cmk-capture-prompt.mjs +0 -0
  4. package/bin/cmk-capture-turn.mjs +0 -0
  5. package/bin/cmk-compress-session.mjs +31 -17
  6. package/bin/cmk-inject-context.mjs +12 -2
  7. package/bin/cmk-observe-edit.mjs +0 -0
  8. package/bin/cmk-weekly-curate.mjs +14 -2
  9. package/package.json +3 -2
  10. package/src/audit-log.mjs +6 -0
  11. package/src/auto-drain.mjs +59 -0
  12. package/src/auto-extract.mjs +117 -6
  13. package/src/auto-persona.mjs +544 -0
  14. package/src/bullet-lookup.mjs +59 -0
  15. package/src/capture-turn.mjs +54 -0
  16. package/src/compress-session.mjs +6 -8
  17. package/src/compressor.mjs +37 -22
  18. package/src/conflict-queue.mjs +8 -1
  19. package/src/daily-distill.mjs +19 -11
  20. package/src/doctor.mjs +79 -26
  21. package/src/forget.mjs +14 -0
  22. package/src/graduate-session.mjs +65 -0
  23. package/src/graduation.mjs +179 -0
  24. package/src/index-rebuild.mjs +26 -4
  25. package/src/inject-context.mjs +352 -65
  26. package/src/install.mjs +52 -7
  27. package/src/lessons-promote.mjs +137 -0
  28. package/src/mcp-server.mjs +17 -0
  29. package/src/memory-write.mjs +20 -7
  30. package/src/native-memory.mjs +98 -0
  31. package/src/persona-portability.mjs +253 -0
  32. package/src/provenance.mjs +23 -5
  33. package/src/read-hook-stdin.mjs +47 -0
  34. package/src/register-crons.mjs +17 -8
  35. package/src/sanitize.mjs +39 -0
  36. package/src/scratchpad.mjs +247 -19
  37. package/src/session-end-tasks.mjs +127 -0
  38. package/src/settings-hooks.mjs +33 -3
  39. package/src/spawn-bin.mjs +83 -0
  40. package/src/subcommands.mjs +472 -26
  41. package/src/weekly-curate.mjs +53 -6
  42. package/src/write-fact.mjs +60 -3
  43. package/template/.claude/skills/memory-write/SKILL.md +47 -88
  44. package/template/.gitignore.fragment +6 -0
  45. package/template/CLAUDE.md.template +17 -7
  46. package/template/local/machine-paths.md.template +1 -12
  47. package/template/local/overrides.md.template +1 -11
  48. package/template/project/MEMORY.md.template +5 -26
  49. package/template/project/SOUL.md.template +1 -10
  50. package/template/user/fragments/INDEX.md.template +1 -1
  51. package/template/.claude/hooks/pre-tool-memory.js +0 -78
  52. package/template/.claude/hooks/transcript-capture.js +0 -69
  53. package/template/.claude/settings.json +0 -27
  54. package/template/support/scripts/auto-extract-memory.sh +0 -102
  55. package/template/support/scripts/refresh-distill-timestamp.py +0 -35
  56. package/template/support/scripts/register-crons.py +0 -242
  57. package/template/support/scripts/run-daily-distill.sh +0 -67
  58. package/template/support/scripts/run-weekly-curate.sh +0 -58
@@ -0,0 +1,137 @@
1
+ // lessons-promote.mjs — `cmk lessons promote <id>`: move a project-tier fact
2
+ // into the user tier (LESSONS.md by default) through the SAFE promote path.
3
+ //
4
+ // This is the EXPLICIT half of the wedge (D-27/D-30): a project observation the
5
+ // user wants to carry across ALL their projects. Before this, the subcommand
6
+ // was a stub and the memory-write skill hand-edited LESSONS.md — bypassing
7
+ // home-path sanitization, Poison_Guard, dedup, and the audit trail.
8
+ //
9
+ // It routes through promoteCandidatesToUserTier (D-13) at confidence:'high'
10
+ // (an explicit user action is the highest-trust signal there is, so it promotes
11
+ // rather than queuing). NEVER hand-edit ~/.claude-memory-kit/*.md.
12
+ //
13
+ // Composes on: forget.resolveFact (read a project fact by id) +
14
+ // auto-persona.promoteCandidatesToUserTier (safe user-tier write).
15
+
16
+ import { resolveFact } from './forget.mjs';
17
+ import { promoteCandidatesToUserTier } from './auto-persona.mjs';
18
+ import { findBulletScratchpad } from './bullet-lookup.mjs';
19
+ import { errorResult, notFoundResult } from './result-shapes.mjs';
20
+
21
+ const VALID_TARGETS = new Set(['USER.md', 'HABITS.md', 'LESSONS.md']);
22
+
23
+ // Sensible default landing section per target. Each name passes
24
+ // auto-persona's SAFE_SECTION_NAME guard; ensureSectionExists creates it if the
25
+ // user's scaffold doesn't already have it.
26
+ const DEFAULT_SECTION = Object.freeze({
27
+ 'LESSONS.md': 'Cross-Project Lessons',
28
+ 'HABITS.md': 'Working Style',
29
+ 'USER.md': 'Profile',
30
+ });
31
+
32
+ /**
33
+ * Promote a project-tier fact to the user tier through the safe path.
34
+ *
35
+ * @param {object} opts
36
+ * @param {string} opts.id citation id of the project fact (e.g. P-XXXXXXXX)
37
+ * @param {string} opts.projectRoot project root (for resolving the source fact)
38
+ * @param {string} opts.userDir user-tier dir (~/.claude-memory-kit)
39
+ * @param {string} [opts.to] target user-tier file (default LESSONS.md)
40
+ * @param {string} [opts.section] landing section (default per-target)
41
+ * @param {string} [opts.now] ISO timestamp override (tests)
42
+ * @returns {{action:string, id?:string, target?:string, section?:string, ...}}
43
+ */
44
+ export function lessonsPromote({ id, projectRoot, userDir, to = 'LESSONS.md', section, now } = {}) {
45
+ if (!userDir) {
46
+ return errorResult({ category: 'schema', errors: ['userDir is required (lessons promote writes to the user tier)'] });
47
+ }
48
+ if (!VALID_TARGETS.has(to)) {
49
+ return errorResult({ category: 'schema', errors: [`invalid target '${to}' (expected USER.md | HABITS.md | LESSONS.md)`] });
50
+ }
51
+ // `lessons promote` carries a PROJECT observation to the user tier. Reject a
52
+ // U-tier id (already user-tier — nothing to promote) and an L-tier id (local
53
+ // is gitignored/machine-specific on purpose — promoting it to the
54
+ // machine-global user tier would surface deliberately-unshared content in
55
+ // every project's persona). Source must be the committed project tier.
56
+ if (typeof id === 'string' && (id[0] === 'U' || id[0] === 'L')) {
57
+ return errorResult({
58
+ category: 'schema',
59
+ errors: [`lessons promote moves a PROJECT-tier (P-) fact; got a ${id[0]}-tier id '${id}'`],
60
+ id,
61
+ });
62
+ }
63
+
64
+ const found = resolveFact({ id, projectRoot, userDir });
65
+ if (found.state === 'not-found') {
66
+ // The id might be a scratchpad BULLET (the common `cmk search` mix-up):
67
+ // search surfaces bullet ids too, but promote carries FACTS. Say so.
68
+ const bulletIn = findBulletScratchpad(id, { projectRoot, userDir });
69
+ if (bulletIn) {
70
+ return notFoundResult({
71
+ errors: [
72
+ `'${id}' is a scratchpad bullet in ${bulletIn}, not a graduated fact — \`cmk lessons promote\` carries facts (in context/memory/) to the user tier. In \`cmk search\` output, pick an id whose location is a context/memory/*.md file, not a ${bulletIn}:NN bullet.`,
73
+ ],
74
+ id,
75
+ });
76
+ }
77
+ return notFoundResult({ errors: [`no fact with id '${id}'`], id });
78
+ }
79
+ if (found.state === 'tombstoned') {
80
+ return notFoundResult({ errors: [`fact '${id}' is tombstoned (forgotten); cannot promote`], id });
81
+ }
82
+
83
+ // A scratchpad bullet is single-line (the provenance HTML-comment must sit on
84
+ // the very next line). A RICH fact body is multi-line — `headline\n\n**Why:**
85
+ // …\n\n**How to apply:** …` — which writeBullet rejects outright (newlines
86
+ // break the 2-line bullet+comment shape). Flatten all whitespace to single
87
+ // spaces so the rule + its rationale promote as one well-formed bullet (the
88
+ // primary wedge case: an explicitly-captured rich architecture rule). The
89
+ // scratchpad byte cap still applies downstream via memoryWrite.
90
+ const text = (found.body ?? '').replace(/\s+/g, ' ').trim();
91
+ if (!text) {
92
+ return errorResult({ category: 'schema', errors: [`fact '${id}' has no body to promote`], id });
93
+ }
94
+
95
+ const candidate = {
96
+ target: to,
97
+ section: section || DEFAULT_SECTION[to],
98
+ text,
99
+ confidence: 'high', // explicit user action → clears the confidence gate (promotes, not queued)
100
+ };
101
+
102
+ // trust:'high' + source:'user-explicit' — a user-attested promotion is durable
103
+ // (never aged out / auto-superseded by the maintenance passes — the 45.4
104
+ // invariant). The auto path leaves these at the default medium.
105
+ const res = promoteCandidatesToUserTier({
106
+ candidates: [candidate],
107
+ userDir,
108
+ now,
109
+ trust: 'high',
110
+ source: 'user-explicit',
111
+ });
112
+
113
+ const promotedHit = res.promoted.find((p) => p.target === to);
114
+ if (promotedHit) {
115
+ return { action: 'promoted', id, target: to, section: candidate.section, newId: promotedHit.id ?? null };
116
+ }
117
+ // A supersede is ALSO success: the promotion replaced an existing same-topic
118
+ // lesson with this updated one (common when the user re-promotes a refined rule).
119
+ const supersededHit = res.superseded.find((s) => s.target === to);
120
+ if (supersededHit) {
121
+ return { action: 'promoted', id, target: to, section: candidate.section, newId: supersededHit.newId, superseded: supersededHit.oldId };
122
+ }
123
+ // Routed to the conflict queue (e.g. it clashes with a hand-curated entry the
124
+ // kit won't silently overwrite) or otherwise didn't land — surface honestly.
125
+ const conflictHit = res.conflicts.find((q) => q.target === to);
126
+ if (conflictHit) {
127
+ return { action: 'queued', id, target: to, section: candidate.section, reason: 'conflict' };
128
+ }
129
+ const queuedHit = res.queued.find((q) => q.target === to);
130
+ return {
131
+ action: 'queued',
132
+ id,
133
+ target: to,
134
+ section: candidate.section,
135
+ reason: queuedHit?.reason ?? 'not-promoted',
136
+ };
137
+ }
@@ -34,6 +34,7 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
34
34
  import { z } from 'zod';
35
35
  import { resolve as resolvePath, isAbsolute } from 'node:path';
36
36
  import { openIndexDb } from './index-db.mjs';
37
+ import { reindexBoot } from './index-rebuild.mjs';
37
38
  import { search, SEARCH_MODES } from './search.mjs';
38
39
  import { memoryWrite } from './memory-write.mjs';
39
40
  import { ID_PATTERN, resolveTierRoot } from './tier-paths.mjs';
@@ -451,6 +452,22 @@ export function buildMcpServer({ projectRoot, userDir, db, semanticBackend }) {
451
452
  */
452
453
  export async function runMcpServer({ projectRoot, userDir, db: dbOverride, semanticBackend } = {}) {
453
454
  const db = dbOverride ?? openIndexDb({ projectRoot });
455
+ // Refresh the index at server startup so mk_search sees facts already on
456
+ // disk — same fresh-install gap as `cmk search` (self-test finding #0):
457
+ // nothing reindexes for a just-installed project, so without this the
458
+ // model's first mk_search returns empty for facts sitting in the
459
+ // scratchpads. Incremental (mtime/sha1 diff) + best-effort; in-session
460
+ // freshness for facts written AFTER startup is the runtime watcher's job
461
+ // (future). The in-process buildMcpServer tests bypass this path.
462
+ if (projectRoot) {
463
+ try {
464
+ reindexBoot({ projectRoot, userDir, db });
465
+ } catch (err) {
466
+ process.stderr.write(
467
+ `cmk-mcp-server: startup index refresh failed: ${err?.message ?? err}\n`,
468
+ );
469
+ }
470
+ }
454
471
  const server = buildMcpServer({ projectRoot, userDir, db, semanticBackend });
455
472
  const transport = new StdioServerTransport();
456
473
 
@@ -54,9 +54,10 @@ import {
54
54
  import { nowIso, appendAuditEntry, REASON_CODES } from './audit-log.mjs';
55
55
  import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
56
56
  import { appendScratchpadBullet } from './scratchpad.mjs';
57
- import { parseBulletProvenance } from './provenance.mjs';
57
+ import { parseBulletProvenance, isProvenanceCommentLine } from './provenance.mjs';
58
58
  import { checkPoisonGuard, logPoisonGuardRejection } from './poison-guard.mjs';
59
59
  import { detectConflicts, writeConflictEntry } from './conflict-queue.mjs';
60
+ import { sanitizeHomePaths } from './sanitize.mjs';
60
61
 
61
62
  const VALID_ACTIONS = new Set(['add', 'replace', 'remove']);
62
63
 
@@ -189,7 +190,7 @@ function findMatchingBullet({ lines, substring, sectionTitle }) {
189
190
  const [, tier, idShort, bulletText] = m;
190
191
  if (!bulletText.includes(substring)) continue;
191
192
  const commentLine = lines[i + 1];
192
- if (!commentLine || !/^\s*<!--.*-->\s*$/.test(commentLine)) continue;
193
+ if (!isProvenanceCommentLine(commentLine)) continue;
193
194
  return {
194
195
  bulletIdx: i,
195
196
  commentIdx: i + 1,
@@ -252,8 +253,20 @@ function doAdd(opts) {
252
253
  if (errors.length > 0) {
253
254
  return errorResult({ category: ERROR_CATEGORIES.SCHEMA, errors });
254
255
  }
256
+ // Privacy (write-path fix #1): abstract home-dir paths to `~` for
257
+ // committed/shared tiers (P/U) BEFORE the bullet is screened, conflict-
258
+ // checked, dedup-keyed, and written — so a captured fact never ships the
259
+ // local username and stays portable. Local tier (L) keeps machine paths
260
+ // verbatim (its purpose). Everything downstream uses `addOpts`.
261
+ const sanitizedText =
262
+ opts.tier === 'P' || opts.tier === 'U'
263
+ ? sanitizeHomePaths(opts.text)
264
+ : opts.text;
265
+ const addOpts =
266
+ sanitizedText === opts.text ? opts : { ...opts, text: sanitizedText };
267
+
255
268
  const poisonResult = runPoisonGuard({
256
- text: opts.text,
269
+ text: addOpts.text,
257
270
  projectRoot: opts.projectRoot,
258
271
  source: opts.source,
259
272
  sessionId: opts.sessionId,
@@ -276,7 +289,7 @@ function doAdd(opts) {
276
289
  userDir: opts.userDir,
277
290
  });
278
291
  const conflict = detectConflicts({
279
- newText: opts.text,
292
+ newText: addOpts.text,
280
293
  newTrust,
281
294
  scratchpadPath,
282
295
  sectionTitle: opts.section,
@@ -296,14 +309,14 @@ function doAdd(opts) {
296
309
  // appendScratchpadBullet would have used, then route to the queue.
297
310
  // (Task 25b fix: generateId is positional `(tier, text)`, not
298
311
  // named-args — Task 25 originally called it as an object.)
299
- const proposedId = generateId(opts.tier, opts.text);
312
+ const proposedId = generateId(addOpts.tier, addOpts.text);
300
313
  const ts = opts.now ?? nowIso();
301
314
  return writeConflictEntry({
302
315
  tier: opts.tier,
303
316
  projectRoot: opts.projectRoot,
304
317
  userDir: opts.userDir,
305
318
  newId: proposedId,
306
- newText: opts.text,
319
+ newText: addOpts.text,
307
320
  newTrust,
308
321
  existingId: conflict.existingId,
309
322
  existingText: conflict.existingText,
@@ -313,7 +326,7 @@ function doAdd(opts) {
313
326
  detectedAt: ts,
314
327
  });
315
328
  }
316
- return appendBulletGuarded(opts);
329
+ return appendBulletGuarded(addOpts);
317
330
  }
318
331
 
319
332
  function appendBulletGuarded(opts) {
@@ -0,0 +1,98 @@
1
+ // Native Anthropic Auto Memory coexistence (Task 60, ADR-0011).
2
+ //
3
+ // Claude Code ships its own Auto Memory (v2.1.59+, ON by default), writing
4
+ // machine-local `~/.claude/projects/<slug>/memory/` in the same shape the kit
5
+ // uses in-repo. With the kit installed BOTH inject at session start → context
6
+ // bloat. Per ADR-0011 the kit is ADDITIVE, not enforcing: the default is
7
+ // coexist (we never touch the user's setting); `cmk disable-native-memory`
8
+ // is a one-command, committable opt-in that writes `autoMemoryEnabled: false`
9
+ // into the project's `.claude/settings.json` (which travels with `git clone`,
10
+ // unlike the user-only `autoMemoryDirectory`). `cmk enable-native-memory`
11
+ // reverses it (explicit `true`).
12
+ //
13
+ // Public boundary:
14
+ // setNativeAutoMemory({ projectRoot, enabled })
15
+ // → { action: 'written' | 'unchanged', settingsPath, enabled }
16
+ // → errorResult({ category: SCHEMA }) when the existing file is unparseable
17
+ // (NEVER clobber a hand-broken file — surface it).
18
+ // getNativeAutoMemoryState({ projectRoot })
19
+ // → { state: 'enabled' | 'disabled' | 'default' | 'unknown', settingsPath }
20
+ // (`default` = key absent ⇒ Anthropic's default, which is ON.)
21
+
22
+ import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'node:fs';
23
+ import { join, dirname } from 'node:path';
24
+ import { errorResult, ERROR_CATEGORIES } from './result-shapes.mjs';
25
+
26
+ const SETTINGS_REL = ['.claude', 'settings.json'];
27
+
28
+ export function nativeMemorySettingsPath(projectRoot) {
29
+ return join(projectRoot, ...SETTINGS_REL);
30
+ }
31
+
32
+ function readSettings(settingsPath) {
33
+ if (!existsSync(settingsPath)) return { settings: {}, existed: false };
34
+ const raw = readFileSync(settingsPath, 'utf8');
35
+ return { settings: JSON.parse(raw), existed: true };
36
+ }
37
+
38
+ /**
39
+ * Read the project's `.claude/settings.json` and report the native-memory
40
+ * state. `default` means the user has not set `autoMemoryEnabled` at all, so
41
+ * Anthropic's default (enabled) applies.
42
+ */
43
+ export function getNativeAutoMemoryState({ projectRoot }) {
44
+ const settingsPath = nativeMemorySettingsPath(projectRoot);
45
+ if (!existsSync(settingsPath)) return { state: 'default', settingsPath };
46
+ let settings;
47
+ try {
48
+ ({ settings } = readSettings(settingsPath));
49
+ } catch (err) {
50
+ return { state: 'unknown', settingsPath, error: err?.message ?? String(err) };
51
+ }
52
+ const v = settings?.autoMemoryEnabled;
53
+ if (v === false) return { state: 'disabled', settingsPath };
54
+ if (v === true) return { state: 'enabled', settingsPath };
55
+ return { state: 'default', settingsPath };
56
+ }
57
+
58
+ /**
59
+ * The one-line `cmk install` heads-up about native-vs-kit coexistence
60
+ * (ADR-0011). Returns the note string when the heads-up is relevant (the user
61
+ * has NOT already opted out), or `null` when they've disabled native memory
62
+ * (no point nagging). Pure + trivially testable; runInstall just prints it.
63
+ */
64
+ export function nativeMemoryInstallNote(projectRoot) {
65
+ if (getNativeAutoMemoryState({ projectRoot }).state === 'disabled') return null;
66
+ return " Note: Claude Code's native Auto Memory keeps running alongside the kit (both fill over time). For one lean memory layer, run `cmk disable-native-memory`.";
67
+ }
68
+
69
+ /**
70
+ * Write `autoMemoryEnabled: <enabled>` into the project's committable
71
+ * `.claude/settings.json`. Idempotent (a no-op write reports `unchanged` and
72
+ * leaves the file byte-identical). Preserves every sibling key. On a parse
73
+ * error of an existing file, returns an error WITHOUT overwriting.
74
+ */
75
+ export function setNativeAutoMemory({ projectRoot, enabled }) {
76
+ const settingsPath = nativeMemorySettingsPath(projectRoot);
77
+
78
+ let settings = {};
79
+ if (existsSync(settingsPath)) {
80
+ try {
81
+ ({ settings } = readSettings(settingsPath));
82
+ } catch (err) {
83
+ return errorResult({
84
+ category: ERROR_CATEGORIES.SCHEMA,
85
+ errors: [`${settingsPath} parse error: ${err?.message ?? err}`],
86
+ });
87
+ }
88
+ }
89
+
90
+ if (settings.autoMemoryEnabled === enabled) {
91
+ return { action: 'unchanged', settingsPath, enabled };
92
+ }
93
+
94
+ settings.autoMemoryEnabled = enabled;
95
+ mkdirSync(dirname(settingsPath), { recursive: true });
96
+ writeFileSync(settingsPath, JSON.stringify(settings, null, 2) + '\n', 'utf8');
97
+ return { action: 'written', settingsPath, enabled };
98
+ }
@@ -0,0 +1,253 @@
1
+ // persona-portability.mjs — Task 72. `cmk persona export` / `cmk persona import`.
2
+ //
3
+ // The persona (the user tier — USER/HABITS/LESSONS + fragments/) follows the
4
+ // HUMAN, not the repo (design §1.1, D-27): it lives machine-local at
5
+ // ~/.claude-memory-kit and is deliberately OUT of any project repo, because
6
+ // committing it would leak your working-style to teammates who clone. So
7
+ // portability across YOUR machines is per-human, not per-repo: export the user
8
+ // tier to one OS-agnostic bundle file, carry it (USB / private repo / Dropbox),
9
+ // import it on the other machine.
10
+ //
11
+ // This is the EXPLICIT primitive (decided in Task 72): no merge, no collision
12
+ // control. Import OVERWRITES, backing up anything it would replace so nothing is
13
+ // lost. The seamless auto-merge path (`cmk persona sync <git-url>`, Task 72.2)
14
+ // is deferred — git handles transport + conflicts there.
15
+ //
16
+ // Bundle format: a single self-describing JSON file (no tar/zip dependency, and
17
+ // human-inspectable). `{ kind, version, exportedAt, fileCount, files: { relpath:
18
+ // content } }`.
19
+
20
+ import {
21
+ existsSync,
22
+ readFileSync,
23
+ writeFileSync,
24
+ mkdirSync,
25
+ readdirSync,
26
+ statSync,
27
+ renameSync,
28
+ unlinkSync,
29
+ } from 'node:fs';
30
+ import { join, dirname } from 'node:path';
31
+ import { reindex } from './reindex.mjs';
32
+ import { appendAuditEntry, nowIso, REASON_CODES } from './audit-log.mjs';
33
+
34
+ const BUNDLE_KIND = 'cmk-persona-bundle';
35
+ const BUNDLE_VERSION = 1;
36
+
37
+ // The persona surface to bundle: the 3 user-tier scratchpads + a settings
38
+ // override, plus the fact-store / queue subdirs (walked recursively). Everything
39
+ // else under the user tier is machine-local + regenerable and is NEVER bundled —
40
+ // runtime locks/audit (.locks/), the FTS cache (.index/), and prior import
41
+ // backups (.import-backups/). Using an explicit allow-list (rather than
42
+ // "everything minus excludes") guarantees a new runtime dir can't leak in later.
43
+ const TOP_LEVEL_FILES = ['USER.md', 'HABITS.md', 'LESSONS.md', 'settings.json'];
44
+ const SUBDIRS = ['fragments', 'queues'];
45
+
46
+ function walkFiles(absDir, relPrefix, out) {
47
+ for (const name of readdirSync(absDir)) {
48
+ const abs = join(absDir, name);
49
+ const rel = relPrefix ? `${relPrefix}/${name}` : name;
50
+ if (statSync(abs).isDirectory()) walkFiles(abs, rel, out);
51
+ else out.push({ rel, abs });
52
+ }
53
+ }
54
+
55
+ /**
56
+ * Export the user tier to a portable bundle file.
57
+ *
58
+ * @param {object} opts
59
+ * @param {string} opts.userDir - the user-tier root to export.
60
+ * @param {string} opts.outFile - where to write the bundle.
61
+ * @param {string} [opts.now] - ISO timestamp override (tests).
62
+ * @returns {{action:'exported'|'error', path?, fileCount?, bytes?, errorCategory?, errors?}}
63
+ */
64
+ export function exportPersona({ userDir, outFile, now } = {}) {
65
+ if (!userDir || !existsSync(userDir)) {
66
+ return {
67
+ action: 'error',
68
+ errorCategory: 'not-found',
69
+ errors: [`user tier not found at ${userDir} — run \`cmk init-user-tier\` first`],
70
+ };
71
+ }
72
+ if (!outFile) {
73
+ return { action: 'error', errorCategory: 'schema', errors: ['no output file given'] };
74
+ }
75
+
76
+ const files = {};
77
+ for (const f of TOP_LEVEL_FILES) {
78
+ const abs = join(userDir, f);
79
+ if (existsSync(abs) && statSync(abs).isFile()) {
80
+ files[f] = readFileSync(abs, 'utf8');
81
+ }
82
+ }
83
+ for (const sub of SUBDIRS) {
84
+ const absSub = join(userDir, sub);
85
+ if (existsSync(absSub) && statSync(absSub).isDirectory()) {
86
+ const collected = [];
87
+ walkFiles(absSub, sub, collected);
88
+ for (const { rel, abs } of collected) files[rel] = readFileSync(abs, 'utf8');
89
+ }
90
+ }
91
+
92
+ const bundle = {
93
+ kind: BUNDLE_KIND,
94
+ version: BUNDLE_VERSION,
95
+ exportedAt: now ?? nowIso(),
96
+ fileCount: Object.keys(files).length,
97
+ files,
98
+ };
99
+ const json = JSON.stringify(bundle, null, 2);
100
+ mkdirSync(dirname(outFile), { recursive: true });
101
+ writeFileSync(outFile, json, 'utf8');
102
+
103
+ return {
104
+ action: 'exported',
105
+ path: outFile,
106
+ fileCount: bundle.fileCount,
107
+ bytes: Buffer.byteLength(json, 'utf8'),
108
+ };
109
+ }
110
+
111
+ // Read + validate a bundle file. Returns { bundle } on success, or { error: <the
112
+ // error result> } on any problem. Kept separate so importPersona stays simple.
113
+ function readAndValidateBundle(inFile) {
114
+ const err = (msg, cat = 'schema') => ({ error: { action: 'error', errorCategory: cat, errors: [msg] } });
115
+ if (!inFile || !existsSync(inFile)) return err(`bundle not found at ${inFile}`, 'not-found');
116
+ let bundle;
117
+ try {
118
+ bundle = JSON.parse(readFileSync(inFile, 'utf8'));
119
+ } catch (e) {
120
+ return err(`bundle is not valid JSON: ${e.message}`);
121
+ }
122
+ if (bundle?.kind !== BUNDLE_KIND) return err(`not a cmk persona bundle (kind: ${bundle?.kind ?? 'missing'})`);
123
+ if (bundle.version !== BUNDLE_VERSION) {
124
+ return err(`unsupported bundle version ${bundle.version} (this cmk supports v${BUNDLE_VERSION})`);
125
+ }
126
+ if (!bundle.files || typeof bundle.files !== 'object') return err('bundle carries no files');
127
+ return { bundle };
128
+ }
129
+
130
+ // Undo a partial import: remove the files we created, restore the ones we moved
131
+ // aside. Best-effort per item — a leaked backup is recoverable; a clobbered live
132
+ // file is not, so we always try to put the originals back.
133
+ function rollbackImport(created, renamed) {
134
+ for (const dest of created) {
135
+ try {
136
+ if (existsSync(dest)) unlinkSync(dest);
137
+ } catch {
138
+ /* best-effort */
139
+ }
140
+ }
141
+ for (const { dest, bkp } of renamed) {
142
+ try {
143
+ if (existsSync(bkp)) {
144
+ mkdirSync(dirname(dest), { recursive: true });
145
+ renameSync(bkp, dest);
146
+ }
147
+ } catch {
148
+ /* best-effort — the backup copy still exists for manual recovery */
149
+ }
150
+ }
151
+ }
152
+
153
+ // Apply the bundle's files TRANSACTIONALLY (the Task-91 rollback discipline):
154
+ // back up every existing target first, then write all files, and if ANY write
155
+ // fails partway, roll the whole thing back so a mid-import disk/permission error
156
+ // never leaves the persona half-applied. Returns the count of backed-up files;
157
+ // throws on unrecoverable failure (after rolling back).
158
+ function applyBundleAtomic(userDir, files, backupRoot) {
159
+ const renamed = []; // {dest, bkp} — existing files moved aside
160
+ const created = []; // dest — files that did NOT exist before (new this import)
161
+ try {
162
+ for (const rel of Object.keys(files)) {
163
+ const dest = join(userDir, ...rel.split('/'));
164
+ if (existsSync(dest)) {
165
+ const bkp = join(backupRoot, ...rel.split('/'));
166
+ mkdirSync(dirname(bkp), { recursive: true });
167
+ renameSync(dest, bkp);
168
+ renamed.push({ dest, bkp });
169
+ } else {
170
+ created.push(dest);
171
+ }
172
+ }
173
+ for (const [rel, content] of Object.entries(files)) {
174
+ const dest = join(userDir, ...rel.split('/'));
175
+ mkdirSync(dirname(dest), { recursive: true });
176
+ writeFileSync(dest, content, 'utf8');
177
+ }
178
+ } catch (err) {
179
+ rollbackImport(created, renamed);
180
+ throw err;
181
+ }
182
+ return renamed.length;
183
+ }
184
+
185
+ // Best-effort user-tier reindex — `cmk search` works immediately after import;
186
+ // `cmk reindex` can rebuild later if this throws.
187
+ function tryReindexUserTier(userDir) {
188
+ try {
189
+ reindex({ tier: 'U', userDir, warn: () => {} });
190
+ return true;
191
+ } catch {
192
+ return false;
193
+ }
194
+ }
195
+
196
+ // Door 4: one operational audit entry (the user tier was bulk-rewritten). The
197
+ // individual facts keep their own provenance inside the bundled fact files; this
198
+ // records the import event + where overwritten files were backed up. Best-effort.
199
+ function writeImportAudit(userDir, { ts, fileCount, backedUp, backupRoot, inFile }) {
200
+ try {
201
+ appendAuditEntry(userDir, {
202
+ ts,
203
+ action: 'persona-imported',
204
+ tier: 'U',
205
+ id: 'persona-bundle',
206
+ reasonCode: REASON_CODES.PERSONA_IMPORTED,
207
+ paths: backedUp > 0 ? { archive: backupRoot } : undefined,
208
+ extra: { fileCount, backedUp, source: inFile },
209
+ });
210
+ } catch {
211
+ /* never fail the import because the audit write failed */
212
+ }
213
+ }
214
+
215
+ /**
216
+ * Import a persona bundle onto this machine's user tier. OVERWRITES, backing up
217
+ * any file it would replace to <userDir>/.import-backups/<ts>/ first (no data
218
+ * loss; transactional — rolls back on a mid-import failure). Rebuilds the
219
+ * user-tier search index from the imported fragments.
220
+ *
221
+ * @param {object} opts
222
+ * @param {string} opts.userDir - the target user-tier root.
223
+ * @param {string} opts.inFile - the bundle to import.
224
+ * @param {string} [opts.now] - ISO timestamp override (tests).
225
+ * @returns {{action:'imported'|'error', fileCount?, backedUp?, backupPath?, reindexed?, errorCategory?, errors?}}
226
+ */
227
+ export function importPersona({ userDir, inFile, now } = {}) {
228
+ const { bundle, error } = readAndValidateBundle(inFile);
229
+ if (error) return error;
230
+
231
+ const ts = now ?? nowIso();
232
+ mkdirSync(userDir, { recursive: true });
233
+ const backupRoot = join(userDir, '.import-backups', ts.replace(/[:.]/g, '-'));
234
+
235
+ let backedUp;
236
+ try {
237
+ backedUp = applyBundleAtomic(userDir, bundle.files, backupRoot);
238
+ } catch (err) {
239
+ return { action: 'error', errorCategory: 'io', errors: [`import failed and was rolled back: ${err?.message ?? err}`] };
240
+ }
241
+
242
+ const fileCount = Object.keys(bundle.files).length;
243
+ const reindexed = tryReindexUserTier(userDir);
244
+ writeImportAudit(userDir, { ts, fileCount, backedUp, backupRoot, inFile });
245
+
246
+ return {
247
+ action: 'imported',
248
+ fileCount,
249
+ backedUp,
250
+ backupPath: backedUp > 0 ? backupRoot : null,
251
+ reindexed,
252
+ };
253
+ }
@@ -75,8 +75,27 @@ const BULLET_RE = new RegExp(
75
75
  `^- \\((${ID_PATTERN.source.replace(/^\^/, '').replace(/\$$/, '')})\\)\\s+(.+)$`,
76
76
  );
77
77
 
78
- // Match a provenance comment, tolerant of leading indentation.
79
- const COMMENT_RE = /^\s*<!--.*-->\s*$/;
78
+ // Is `line` a single-line HTML comment (the shape the kit writes provenance
79
+ // in: ` <!-- source: …, trust: … -->`), tolerant of leading indentation?
80
+ // String-scanning, NOT a regex, on purpose: a `/<!--.*-->/` regex trips
81
+ // CodeQL js/bad-tag-filter (`.` skips newlines; ignores the `--!>` end-tag
82
+ // variant). Our provenance comments are always single-line, so a literal
83
+ // prefix/suffix check is equivalent AND clears the alert (the PR #72
84
+ // pattern). Shared so scratchpad / memory-write / inject-context don't each
85
+ // re-roll the flagged regex.
86
+ export function isProvenanceCommentLine(line) {
87
+ if (typeof line !== 'string') return false;
88
+ const t = line.trim();
89
+ return t.length >= 7 && t.startsWith('<!--') && t.endsWith('-->');
90
+ }
91
+
92
+ // Strip the `<!--` (4 chars) / `-->` (3 chars) delimiters from a line already
93
+ // confirmed by isProvenanceCommentLine. Slicing, not a regex, for the same
94
+ // js/bad-tag-filter reason.
95
+ function stripCommentDelimiters(line) {
96
+ const t = line.trim();
97
+ return t.slice(4, t.length - 3);
98
+ }
80
99
 
81
100
  function validateBulletInput({ id, text, provenance }) {
82
101
  const errors = [];
@@ -183,10 +202,9 @@ export function writeBullet(opts = {}) {
183
202
  }
184
203
 
185
204
  export function parseBulletProvenance(line) {
186
- if (typeof line !== 'string') return null;
187
- if (!COMMENT_RE.test(line)) return null;
205
+ if (!isProvenanceCommentLine(line)) return null;
188
206
 
189
- const inner = line.replace(/^\s*<!--/, '').replace(/-->\s*$/, '');
207
+ const inner = stripCommentDelimiters(line);
190
208
  const fields = {};
191
209
  for (const part of inner.split(',')) {
192
210
  const idx = part.indexOf(':');