@lh8ppl/claude-memory-kit 0.2.4 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -10
- package/bin/cmk-capture-prompt.mjs +21 -1
- package/package.json +2 -1
- package/src/audit-log.mjs +1 -0
- package/src/auto-drain.mjs +17 -1
- package/src/auto-extract.mjs +72 -16
- package/src/auto-persona.mjs +86 -1
- package/src/capture-prompt.mjs +34 -1
- package/src/capture-turn.mjs +64 -6
- package/src/config-core.mjs +161 -0
- package/src/conflict-queue.mjs +20 -3
- package/src/content-hash.mjs +30 -0
- package/src/doctor.mjs +62 -3
- package/src/forget.mjs +13 -0
- package/src/frontmatter.mjs +4 -1
- package/src/import-anthropic-memory.mjs +25 -1
- package/src/import-claude-md.mjs +333 -0
- package/src/index-db.mjs +39 -0
- package/src/index-rebuild.mjs +48 -4
- package/src/index.mjs +10 -0
- package/src/inject-context.mjs +179 -7
- package/src/install.mjs +180 -1
- package/src/mcp-server.mjs +63 -8
- package/src/memory-health.mjs +229 -0
- package/src/memory-write.mjs +32 -10
- package/src/merge-facts.mjs +12 -0
- package/src/native-binding.mjs +142 -0
- package/src/poison-guard.mjs +55 -0
- package/src/provenance.mjs +4 -0
- package/src/remember-core.mjs +53 -8
- package/src/repair.mjs +20 -3
- package/src/result-shapes.mjs +1 -1
- package/src/scratchpad.mjs +5 -3
- package/src/search.mjs +96 -9
- package/src/semantic-backend.mjs +599 -0
- package/src/settings-hooks.mjs +4 -1
- package/src/subcommands.mjs +359 -42
- package/src/transcript-index.mjs +165 -0
- package/src/turn-tools.mjs +179 -0
- package/src/write-fact.mjs +34 -3
- package/template/.claude/skills/memory-search/SKILL.md +86 -0
- package/template/.gitattributes.fragment +16 -0
- package/template/CLAUDE.md.template +3 -1
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
// Task 104.2 (D-117) — transcript chunking + index sync: the SEARCH half of
|
|
2
|
+
// the L3 raw tier (the capture half shipped in 104.1). Transcript files
|
|
3
|
+
// (context/transcripts/{date}.md — dialogue + per-turn Tools blocks) are
|
|
4
|
+
// chunked by `## ` turn headings and windowed to ≤1500 chars (the memsearch
|
|
5
|
+
// chunking rule Task 65 adopted), then synced into the SEPARATE
|
|
6
|
+
// transcript_chunks table (index-db.mjs) so `cmk search --scope transcripts`
|
|
7
|
+
// reaches them WITHOUT polluting L1 fact results (the MemPalace last-resort
|
|
8
|
+
// contract, D-70/D-72).
|
|
9
|
+
//
|
|
10
|
+
// Sync strategy mirrors the observation indexer: per-file mtime/sha1 rows in
|
|
11
|
+
// the shared `files` table (keyed with a 'transcript:' prefix so they never
|
|
12
|
+
// collide with observation sources) → unchanged files cost one stat.
|
|
13
|
+
//
|
|
14
|
+
// Public boundary:
|
|
15
|
+
// chunkTranscript(text) → [{heading, body, sourceLine, chunkIdx}] (pure)
|
|
16
|
+
// syncTranscriptChunks({db, projectRoot, now?}) → {files, chunks}
|
|
17
|
+
|
|
18
|
+
import { hashContent } from './content-hash.mjs';
|
|
19
|
+
import { existsSync, readdirSync, readFileSync, statSync } from 'node:fs';
|
|
20
|
+
import { join } from 'node:path';
|
|
21
|
+
|
|
22
|
+
const CHUNK_MAX_CHARS = 1500; // the Task-65 / memsearch chunking rule
|
|
23
|
+
const FILES_KEY_PREFIX = 'transcript:';
|
|
24
|
+
|
|
25
|
+
export function chunkTranscript(text) {
|
|
26
|
+
if (typeof text !== 'string' || text.trim() === '') return [];
|
|
27
|
+
const lines = text.split(/\r?\n/);
|
|
28
|
+
// Locate turn headings (`## <ts> — speaker`, the capture-prompt/-turn shape).
|
|
29
|
+
const headings = [];
|
|
30
|
+
for (let i = 0; i < lines.length; i++) {
|
|
31
|
+
if (/^##\s/.test(lines[i])) headings.push(i);
|
|
32
|
+
}
|
|
33
|
+
if (headings.length === 0) return [];
|
|
34
|
+
|
|
35
|
+
const chunks = [];
|
|
36
|
+
let chunkIdx = 0;
|
|
37
|
+
for (let h = 0; h < headings.length; h++) {
|
|
38
|
+
const start = headings[h];
|
|
39
|
+
const end = h + 1 < headings.length ? headings[h + 1] : lines.length;
|
|
40
|
+
const heading = lines[start].trim();
|
|
41
|
+
const body = lines
|
|
42
|
+
.slice(start + 1, end)
|
|
43
|
+
.join('\n')
|
|
44
|
+
.trim();
|
|
45
|
+
if (body === '') continue;
|
|
46
|
+
// Window oversized turns; every window keeps its turn heading so a hit
|
|
47
|
+
// is always attributable to a specific turn.
|
|
48
|
+
for (let off = 0; off < body.length; off += CHUNK_MAX_CHARS) {
|
|
49
|
+
chunks.push({
|
|
50
|
+
heading,
|
|
51
|
+
body: body.slice(off, off + CHUNK_MAX_CHARS),
|
|
52
|
+
sourceLine: start + 1, // 1-based heading line — the drill-back anchor
|
|
53
|
+
chunkIdx: chunkIdx++,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return chunks;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Transcript-chunk fingerprint for the `files`-table diff key (column name
|
|
61
|
+
// `sha1` kept for checkpoint back-compat; algorithm is SHA-256 via hashContent,
|
|
62
|
+
// D-149). Self-heals on the first post-upgrade boot like the observation index.
|
|
63
|
+
function sha1(text) {
|
|
64
|
+
return hashContent(text);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Task 126 (D-119) — the raw-tier scope covers BOTH halves of the session
|
|
68
|
+
// record: verbatim transcripts AND the Haiku-compressed sessions summaries
|
|
69
|
+
// (today-*.md / recent.md / archive.md — the middle tier that was otherwise
|
|
70
|
+
// a recall blind spot: discussed-but-never-graduated content). Exclusions:
|
|
71
|
+
// now.md (the volatile live buffer — already in context, and its constant
|
|
72
|
+
// truncation would churn the index) and non-.md observability files.
|
|
73
|
+
const RAW_TIER_DIRS = ['transcripts', 'sessions'];
|
|
74
|
+
const SESSIONS_EXCLUDE = new Set(['now.md']);
|
|
75
|
+
|
|
76
|
+
export function syncTranscriptChunks({ db, projectRoot, now = Date.now() } = {}) {
|
|
77
|
+
let files = 0;
|
|
78
|
+
let chunks = 0;
|
|
79
|
+
|
|
80
|
+
const entries = []; // {abs, sourceFile}
|
|
81
|
+
for (const sub of RAW_TIER_DIRS) {
|
|
82
|
+
const dir = join(projectRoot, 'context', sub);
|
|
83
|
+
if (!existsSync(dir)) continue;
|
|
84
|
+
let names;
|
|
85
|
+
try {
|
|
86
|
+
names = readdirSync(dir).filter(
|
|
87
|
+
(n) => n.endsWith('.md') && !(sub === 'sessions' && SESSIONS_EXCLUDE.has(n)),
|
|
88
|
+
);
|
|
89
|
+
} catch {
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
for (const name of names) {
|
|
93
|
+
entries.push({ abs: join(dir, name), sourceFile: `context/${sub}/${name}` });
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const getFileRow = db.prepare('SELECT mtime, sha1 FROM files WHERE path = ?');
|
|
98
|
+
const upsertFileRow = db.prepare(
|
|
99
|
+
'INSERT INTO files (path, mtime, sha1, indexed_at) VALUES (?, ?, ?, ?) ' +
|
|
100
|
+
'ON CONFLICT(path) DO UPDATE SET mtime = excluded.mtime, sha1 = excluded.sha1, indexed_at = excluded.indexed_at',
|
|
101
|
+
);
|
|
102
|
+
const deleteChunks = db.prepare('DELETE FROM transcript_chunks WHERE source_file = ?');
|
|
103
|
+
const insertChunk = db.prepare(
|
|
104
|
+
'INSERT INTO transcript_chunks (source_file, chunk_idx, source_line, heading, body) VALUES (?, ?, ?, ?, ?)',
|
|
105
|
+
);
|
|
106
|
+
|
|
107
|
+
for (const { abs, sourceFile } of entries) {
|
|
108
|
+
const filesKey = FILES_KEY_PREFIX + sourceFile;
|
|
109
|
+
let st;
|
|
110
|
+
try {
|
|
111
|
+
st = statSync(abs);
|
|
112
|
+
} catch {
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
const prev = getFileRow.get(filesKey);
|
|
116
|
+
// NO mtime fast-path: two appends inside the filesystem's mtime
|
|
117
|
+
// resolution would make the second invisible (caught as a flaky test —
|
|
118
|
+
// rapid Stop hooks are the same shape in production). sha1 is the
|
|
119
|
+
// authority; day-files are small and reindex reads its other sources
|
|
120
|
+
// anyway, so the read cost is negligible.
|
|
121
|
+
let text;
|
|
122
|
+
try {
|
|
123
|
+
text = readFileSync(abs, 'utf8');
|
|
124
|
+
} catch {
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
const digest = sha1(text);
|
|
128
|
+
if (prev && prev.sha1 === digest) {
|
|
129
|
+
continue; // content unchanged
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const parsed = chunkTranscript(text);
|
|
133
|
+
const replaceFile = db.transaction(() => {
|
|
134
|
+
deleteChunks.run(sourceFile);
|
|
135
|
+
for (const c of parsed) {
|
|
136
|
+
insertChunk.run(sourceFile, c.chunkIdx, c.sourceLine, c.heading, c.body);
|
|
137
|
+
}
|
|
138
|
+
upsertFileRow.run(filesKey, Math.trunc(st.mtimeMs), digest, now);
|
|
139
|
+
});
|
|
140
|
+
replaceFile();
|
|
141
|
+
files += 1;
|
|
142
|
+
chunks += parsed.length;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Orphan-prune for THIS scope: a deleted/rotated file (transcripts OR
|
|
146
|
+
// sessions — weekly-curate rotates today-*.md into archive.md) leaves its
|
|
147
|
+
// chunks + checkpoint behind otherwise. The observation indexer's prune
|
|
148
|
+
// deliberately skips 'transcript:' rows (they are not observation sources)
|
|
149
|
+
// — pruning them is this function's job, scoped by the key prefix.
|
|
150
|
+
const live = new Set(entries.map((e) => FILES_KEY_PREFIX + e.sourceFile));
|
|
151
|
+
const known = db
|
|
152
|
+
.prepare("SELECT path FROM files WHERE path LIKE ?")
|
|
153
|
+
.all(FILES_KEY_PREFIX + '%');
|
|
154
|
+
const pruneTxn = db.transaction((filesKey) => {
|
|
155
|
+
db.prepare('DELETE FROM transcript_chunks WHERE source_file = ?').run(
|
|
156
|
+
filesKey.slice(FILES_KEY_PREFIX.length),
|
|
157
|
+
);
|
|
158
|
+
db.prepare('DELETE FROM files WHERE path = ?').run(filesKey);
|
|
159
|
+
});
|
|
160
|
+
for (const { path } of known) {
|
|
161
|
+
if (!live.has(path)) pruneTxn(path);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return { files, chunks };
|
|
165
|
+
}
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
// Task 104.1 — extract the CURRENT turn's tool activity from Anthropic's
|
|
2
|
+
// session JSONL (the Stop payload's `transcript_path`) so capture-turn can
|
|
3
|
+
// enrich the kit's own committed transcript (the L3 raw tier of the recall
|
|
4
|
+
// waterfall, design §19 / D-117).
|
|
5
|
+
//
|
|
6
|
+
// Why read the live JSONL: it is the only record of tool calls + results
|
|
7
|
+
// (the Stop payload itself carries only the assistant TEXT), and it expires
|
|
8
|
+
// (~30 days, machine-local) — we extract the current turn into OUR format at
|
|
9
|
+
// capture time; we never copy/snapshot the file (the user's 2026-06-06
|
|
10
|
+
// directive: enriching our own transcript, not a JSONL crutch).
|
|
11
|
+
//
|
|
12
|
+
// The JSONL internal format is NOT a documented Anthropic contract (only
|
|
13
|
+
// `transcript_path` is). Shapes below were verified EMPIRICALLY across 6
|
|
14
|
+
// sessions / 4 projects (2026-06-10):
|
|
15
|
+
// - entries: {type: 'user'|'assistant'|<harness types to skip>, message?}
|
|
16
|
+
// - message.content: a block LIST or a plain STRING (both real)
|
|
17
|
+
// - blocks: text / thinking / tool_use {id,name,input} / tool_result
|
|
18
|
+
// {tool_use_id, content: STRING or LIST of {type:'text',text}}
|
|
19
|
+
// - tool_result blocks ride USER-role entries (API convention) — a user
|
|
20
|
+
// entry is a real prompt boundary ONLY if it has text and no tool_result.
|
|
21
|
+
// Everything here is defensive: unrecognized shapes are skipped; any failure
|
|
22
|
+
// returns null. A format shift degrades the enrichment, never the capture.
|
|
23
|
+
//
|
|
24
|
+
// Public boundary:
|
|
25
|
+
// extractTurnToolActivity(jsonlText) → string|null (pure)
|
|
26
|
+
// readTranscriptTail(path, maxBytes?) → string (bounded file read)
|
|
27
|
+
|
|
28
|
+
import { openSync, readSync, closeSync, fstatSync } from 'node:fs';
|
|
29
|
+
|
|
30
|
+
// Caps (git-bloat control, the D-117 sub-decision (a)): one turn's Tools
|
|
31
|
+
// block stays a small fraction of a transcript day.
|
|
32
|
+
const RESULT_SNIPPET_CHARS = 300;
|
|
33
|
+
const INPUT_SUMMARY_CHARS = 160;
|
|
34
|
+
const BLOCK_CAP_CHARS = 4000;
|
|
35
|
+
// Tail bound: one turn comfortably fits; a mega-session file is never read whole.
|
|
36
|
+
const DEFAULT_TAIL_BYTES = 768 * 1024;
|
|
37
|
+
|
|
38
|
+
// The most informative input field per common tool; unknown tools fall back
|
|
39
|
+
// to a compact JSON summary. Order matters — first present key wins.
|
|
40
|
+
const REPRESENTATIVE_INPUT_KEYS = [
|
|
41
|
+
'command',
|
|
42
|
+
'file_path',
|
|
43
|
+
'pattern',
|
|
44
|
+
'query',
|
|
45
|
+
'url',
|
|
46
|
+
'path',
|
|
47
|
+
'prompt',
|
|
48
|
+
];
|
|
49
|
+
|
|
50
|
+
function oneLine(s, max) {
|
|
51
|
+
const flat = String(s).replace(/\s+/g, ' ').trim();
|
|
52
|
+
return flat.length > max ? flat.slice(0, max) + '…' : flat;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function summarizeInput(input) {
|
|
56
|
+
if (!input || typeof input !== 'object') return '';
|
|
57
|
+
for (const key of REPRESENTATIVE_INPUT_KEYS) {
|
|
58
|
+
if (typeof input[key] === 'string' && input[key].trim() !== '') {
|
|
59
|
+
return oneLine(input[key], INPUT_SUMMARY_CHARS);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
try {
|
|
63
|
+
return oneLine(JSON.stringify(input), INPUT_SUMMARY_CHARS);
|
|
64
|
+
} catch {
|
|
65
|
+
return '';
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function flattenResultContent(content) {
|
|
70
|
+
if (typeof content === 'string') return content;
|
|
71
|
+
if (Array.isArray(content)) {
|
|
72
|
+
return content
|
|
73
|
+
.map((b) => (b && typeof b === 'object' && typeof b.text === 'string' ? b.text : ''))
|
|
74
|
+
.filter(Boolean)
|
|
75
|
+
.join(' ');
|
|
76
|
+
}
|
|
77
|
+
return '';
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function contentBlocks(message) {
|
|
81
|
+
const c = message?.content;
|
|
82
|
+
return Array.isArray(c) ? c.filter((b) => b && typeof b === 'object') : [];
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// A user entry is a REAL prompt boundary when it carries prompt text (string
|
|
86
|
+
// content or a text block) and no tool_result blocks (results ride user role).
|
|
87
|
+
function isRealUserPrompt(entry) {
|
|
88
|
+
if (entry?.type !== 'user') return false;
|
|
89
|
+
const c = entry.message?.content;
|
|
90
|
+
if (typeof c === 'string') return c.trim() !== '';
|
|
91
|
+
const blocks = contentBlocks(entry.message);
|
|
92
|
+
if (blocks.some((b) => b.type === 'tool_result')) return false;
|
|
93
|
+
return blocks.some((b) => b.type === 'text' && typeof b.text === 'string' && b.text.trim() !== '');
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export function extractTurnToolActivity(jsonlText) {
|
|
97
|
+
if (typeof jsonlText !== 'string' || jsonlText.trim() === '') return null;
|
|
98
|
+
|
|
99
|
+
const entries = [];
|
|
100
|
+
for (const raw of jsonlText.split('\n')) {
|
|
101
|
+
if (raw.trim() === '') continue;
|
|
102
|
+
try {
|
|
103
|
+
const e = JSON.parse(raw);
|
|
104
|
+
if (e && (e.type === 'user' || e.type === 'assistant')) entries.push(e);
|
|
105
|
+
} catch {
|
|
106
|
+
// partial first line of a tail read, or harness noise — skip
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
if (entries.length === 0) return null;
|
|
110
|
+
|
|
111
|
+
let lastPromptIdx = -1;
|
|
112
|
+
for (let i = entries.length - 1; i >= 0; i--) {
|
|
113
|
+
if (isRealUserPrompt(entries[i])) {
|
|
114
|
+
lastPromptIdx = i;
|
|
115
|
+
break;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
// No prompt boundary in the (tail) window → attribute everything we see to
|
|
119
|
+
// the current turn rather than dropping it (the tail bound already scopes us).
|
|
120
|
+
const turn = entries.slice(lastPromptIdx + 1);
|
|
121
|
+
|
|
122
|
+
const calls = []; // {id, name, summary, result}
|
|
123
|
+
const byId = new Map();
|
|
124
|
+
for (const e of turn) {
|
|
125
|
+
for (const b of contentBlocks(e.message)) {
|
|
126
|
+
if (b.type === 'tool_use' && typeof b.name === 'string') {
|
|
127
|
+
const call = { id: b.id, name: b.name, summary: summarizeInput(b.input), result: '' };
|
|
128
|
+
calls.push(call);
|
|
129
|
+
if (typeof b.id === 'string') byId.set(b.id, call);
|
|
130
|
+
} else if (b.type === 'tool_result') {
|
|
131
|
+
const call = typeof b.tool_use_id === 'string' ? byId.get(b.tool_use_id) : undefined;
|
|
132
|
+
if (call && !call.result) {
|
|
133
|
+
call.result = oneLine(flattenResultContent(b.content), RESULT_SNIPPET_CHARS);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
if (calls.length === 0) return null;
|
|
139
|
+
|
|
140
|
+
const lines = [];
|
|
141
|
+
let used = 0;
|
|
142
|
+
let shown = 0;
|
|
143
|
+
for (const call of calls) {
|
|
144
|
+
const line = `- ${call.name}(${call.summary})${call.result ? ` → ${call.result}` : ''}`;
|
|
145
|
+
if (used + line.length + 1 > BLOCK_CAP_CHARS) break;
|
|
146
|
+
lines.push(line);
|
|
147
|
+
used += line.length + 1;
|
|
148
|
+
shown += 1;
|
|
149
|
+
}
|
|
150
|
+
if (shown < calls.length) {
|
|
151
|
+
lines.push(`- …${calls.length - shown} more tool call(s) truncated`);
|
|
152
|
+
}
|
|
153
|
+
return lines.join('\n');
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Bounded tail read — a turn comfortably fits in the window; a multi-MB
|
|
157
|
+
// session file is never loaded whole inside the Stop hook's budget.
|
|
158
|
+
export function readTranscriptTail(path, maxBytes = DEFAULT_TAIL_BYTES) {
|
|
159
|
+
let fd;
|
|
160
|
+
try {
|
|
161
|
+
fd = openSync(path, 'r');
|
|
162
|
+
const size = fstatSync(fd).size;
|
|
163
|
+
const start = Math.max(0, size - maxBytes);
|
|
164
|
+
const len = size - start;
|
|
165
|
+
const buf = Buffer.alloc(len);
|
|
166
|
+
readSync(fd, buf, 0, len, start);
|
|
167
|
+
return buf.toString('utf8');
|
|
168
|
+
} catch {
|
|
169
|
+
return '';
|
|
170
|
+
} finally {
|
|
171
|
+
if (fd !== undefined) {
|
|
172
|
+
try {
|
|
173
|
+
closeSync(fd);
|
|
174
|
+
} catch {
|
|
175
|
+
// best-effort close
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
package/src/write-fact.mjs
CHANGED
|
@@ -21,6 +21,7 @@ import { reindex } from './reindex.mjs';
|
|
|
21
21
|
import { appendAuditEntry, nowIso, REASON_CODES } from './audit-log.mjs';
|
|
22
22
|
import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
|
|
23
23
|
import { sanitizeHomePaths } from './sanitize.mjs';
|
|
24
|
+
import { sanitizePrivacyTags } from './privacy.mjs';
|
|
24
25
|
import { checkPoisonGuard, logPoisonGuardRejection } from './poison-guard.mjs';
|
|
25
26
|
|
|
26
27
|
const VALID_TYPES = new Set(['user', 'feedback', 'project', 'reference']);
|
|
@@ -157,6 +158,14 @@ export function writeFact(opts = {}) {
|
|
|
157
158
|
// — that's its purpose. The id hashes the SANITIZED body, so dedup keys on
|
|
158
159
|
// what actually lands on disk.
|
|
159
160
|
let { body, title } = opts;
|
|
161
|
+
// Privacy: strip <private>…</private> FIRST, on EVERY tier (cut-gate
|
|
162
|
+
// v0.3.1 finding — the tag was honored only by the UserPromptSubmit hook,
|
|
163
|
+
// so a fact written via cmk remember/mk_remember/import kept the secret).
|
|
164
|
+
// Runs before home-path sanitization, Poison_Guard, and id-generation, so
|
|
165
|
+
// the redacted body is what gets screened, hashed (dedup keys on what
|
|
166
|
+
// lands), and written.
|
|
167
|
+
body = sanitizePrivacyTags(body);
|
|
168
|
+
title = sanitizePrivacyTags(title);
|
|
160
169
|
if (opts.tier === 'P' || opts.tier === 'U') {
|
|
161
170
|
body = sanitizeHomePaths(body);
|
|
162
171
|
title = sanitizeHomePaths(title);
|
|
@@ -252,10 +261,32 @@ export function writeFact(opts = {}) {
|
|
|
252
261
|
// 2026-06-03 — "users should get it working from the start"). Best-effort: the
|
|
253
262
|
// fact is already durably on disk, so an index-rebuild hiccup must not turn a
|
|
254
263
|
// successful capture into an error — the next reindex/search self-heals.
|
|
264
|
+
//
|
|
265
|
+
// D-152: the failure is OBSERVABLE, not silently swallowed. A detached
|
|
266
|
+
// auto-extract child whose reindex was killed mid-rebuild (hook ceiling) used
|
|
267
|
+
// to leave INDEX.md lagging with ZERO trace — so a stale committed INDEX was
|
|
268
|
+
// undiagnosable (the user caught a 5-fact lag in the cut-gate). On throw we
|
|
269
|
+
// now record an INDEX_REBUILD_FAILED audit entry; HC-4 still detects the drift
|
|
270
|
+
// and `cmk reindex` corrects it. The `_reindexFn` seam is test-only.
|
|
271
|
+
const doReindex = opts._reindexFn ?? reindex;
|
|
255
272
|
try {
|
|
256
|
-
|
|
257
|
-
} catch {
|
|
258
|
-
// index rebuild is best-effort; capture already succeeded
|
|
273
|
+
doReindex({ tier: opts.tier, projectRoot: opts.projectRoot, userDir: opts.userDir, warn: () => {} });
|
|
274
|
+
} catch (reindexErr) {
|
|
275
|
+
// index rebuild is best-effort; capture already succeeded — but leave a
|
|
276
|
+
// trace so a lagging committed INDEX is diagnosable, never silent.
|
|
277
|
+
try {
|
|
278
|
+
appendAuditEntry(tierRoot, {
|
|
279
|
+
ts: createdAt,
|
|
280
|
+
action: 'index-rebuild-failed',
|
|
281
|
+
tier: opts.tier,
|
|
282
|
+
id,
|
|
283
|
+
reasonCode: REASON_CODES.INDEX_REBUILD_FAILED,
|
|
284
|
+
paths: { after: path },
|
|
285
|
+
extra: { error: String(reindexErr?.message ?? reindexErr) },
|
|
286
|
+
});
|
|
287
|
+
} catch {
|
|
288
|
+
// even the audit append is best-effort; the fact is already on disk
|
|
289
|
+
}
|
|
259
290
|
}
|
|
260
291
|
|
|
261
292
|
// Default create-audit (Task 123.A / D-103). writeFact is the single boundary
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: memory-search
|
|
3
|
+
description: Searches the project's recorded memory (claude-memory-kit) — decisions, conventions, architecture, the reasoning behind choices, and where things live — and returns a curated, cited summary. Fire whenever the answer might be something the project already established in past work, HOWEVER the question is phrased — any prior decision, convention, rationale, or "how/where/why is it this way" question, including oblique or roundabout asks ("why is everything so spread out?", "remind me what we settled on for X", "how come these files are tiny?"). Also fire when a "[claude-memory-kit] Memory available" hint appears on the prompt. The examples are illustrative, not a checklist — prefer recalling over re-deriving an answer from the code. The session-start snapshot is a bounded index; this skill reaches the rest. Skip only when the question is purely about uncommitted or just-edited live code that memory cannot know, concerns this conversation only, or the user asked to ignore memory.
|
|
4
|
+
context: fork
|
|
5
|
+
allowed-tools: mcp__cmk__mk_search mcp__cmk__mk_get mcp__cmk__mk_timeline mcp__cmk__mk_recent_activity Bash(cmk search *) Bash(cmk get *) Bash(cmk timeline *) Bash(cmk recent-activity *)
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
# Recalling from deep memory
|
|
9
|
+
|
|
10
|
+
You are a memory-retrieval agent. Search the kit's memory archive for: $ARGUMENTS
|
|
11
|
+
|
|
12
|
+
Query well: search the core noun phrases (e.g. "deploy target", "auth
|
|
13
|
+
library decision"), not a full sentence. If the line above carries NO query
|
|
14
|
+
(you run isolated and cannot see the conversation), start from the
|
|
15
|
+
"When the query is vague" section below instead.
|
|
16
|
+
|
|
17
|
+
Memory is the ground truth for documented knowledge and prior decisions
|
|
18
|
+
(the injected-snapshot authority rule). Your job is to find what is already
|
|
19
|
+
recorded and return ONLY a curated summary — never the raw dumps.
|
|
20
|
+
|
|
21
|
+
## The 3-step ladder (filter before you fetch)
|
|
22
|
+
|
|
23
|
+
Work index → context → bodies. Full bodies are ~10x the tokens of an index
|
|
24
|
+
line; fetch them only for the ids that survived filtering.
|
|
25
|
+
|
|
26
|
+
**Step 1 — Search the index.** Prefer the MCP tool when the `cmk` server is
|
|
27
|
+
connected; otherwise the CLI:
|
|
28
|
+
|
|
29
|
+
- MCP: `mk_search` with `query` (natural language is fine — when semantic
|
|
30
|
+
recall is enabled the project default searches by meaning; paraphrase hits).
|
|
31
|
+
- CLI: `cmk search "<query>"`
|
|
32
|
+
|
|
33
|
+
Each hit is one line: id, tier/trust, source location, snippet. Run 1-3
|
|
34
|
+
query variants if the first misses (synonyms; the key noun alone). Drop
|
|
35
|
+
hits that are clearly off-topic or too generic.
|
|
36
|
+
|
|
37
|
+
**Step 2 — Context around an anchor (optional).** When a hit looks right
|
|
38
|
+
but you need what happened around it (what led to a decision, what followed
|
|
39
|
+
a fix):
|
|
40
|
+
|
|
41
|
+
- MCP: `mk_timeline` with `anchor: "<id>"` (and `depth_before`/`depth_after`).
|
|
42
|
+
- CLI: `cmk timeline <id>`
|
|
43
|
+
|
|
44
|
+
**Step 3 — Fetch full bodies for the survivors only.**
|
|
45
|
+
|
|
46
|
+
- MCP: `mk_get` with `ids: [...]` — batch all survivors in ONE call.
|
|
47
|
+
- CLI: `cmk get <id> <id> ...`
|
|
48
|
+
|
|
49
|
+
Rich facts carry **Why** / **How to apply** blocks — include those when the
|
|
50
|
+
question is about rationale or how to act on a rule.
|
|
51
|
+
|
|
52
|
+
**Step 4 — LAST RESORT: the session record.** Only when curated memory
|
|
53
|
+
(steps 1-3) has no answer and the question is about what actually happened
|
|
54
|
+
in a past session (an exact error message, the command that fixed
|
|
55
|
+
something, how a discussion went). This scope covers the verbatim
|
|
56
|
+
transcripts AND the compressed session summaries:
|
|
57
|
+
|
|
58
|
+
- MCP: `mk_search` with `scope: "transcripts"`.
|
|
59
|
+
- CLI: `cmk search "<query>" --scope transcripts`
|
|
60
|
+
|
|
61
|
+
Hits are raw turn excerpts (dialogue + the tools the agent ran), keyed
|
|
62
|
+
`T:<file>:<line>` — quote the relevant fragment in your summary; never dump
|
|
63
|
+
whole turns. If something found here is durably useful, say so in the
|
|
64
|
+
summary so the caller can capture it as a proper fact.
|
|
65
|
+
|
|
66
|
+
## When the query is vague
|
|
67
|
+
|
|
68
|
+
If you cannot form a concrete query, look at recent activity first, then
|
|
69
|
+
search the topic that stands out:
|
|
70
|
+
|
|
71
|
+
- MCP: `mk_recent_activity` (window `7d`) · CLI: `cmk recent-activity --window 7d`
|
|
72
|
+
|
|
73
|
+
## Output
|
|
74
|
+
|
|
75
|
+
Return a short, curated answer for the main conversation:
|
|
76
|
+
|
|
77
|
+
- The relevant facts/decisions, each with its citation id (e.g. `P-XXXXXXXX`)
|
|
78
|
+
and the Why when it matters.
|
|
79
|
+
- One line of source traceability per item (the source file the index line
|
|
80
|
+
showed).
|
|
81
|
+
- If nothing relevant exists, say exactly that — "no recorded memory on
|
|
82
|
+
this" — so the caller knows to derive it fresh and capture it afterward.
|
|
83
|
+
|
|
84
|
+
Never paste full fact files or long bodies into the summary; condense.
|
|
85
|
+
This skill is read-only — capturing new facts is the `memory-write` skill's
|
|
86
|
+
job.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# claude-memory-kit — added by `cmk install`. Do not edit by hand;
|
|
2
|
+
# `cmk install` refreshes these lines idempotently. Remove via
|
|
3
|
+
# `cmk uninstall`.
|
|
4
|
+
#
|
|
5
|
+
# Force LF on the COMMITTED memory files. The kit's frontmatter parser uses
|
|
6
|
+
# a strict-LF boundary and split('\n') readers; default Windows git
|
|
7
|
+
# (autocrlf=true) rewrites line endings at clone, which made every fact
|
|
8
|
+
# invisible to search on a Windows checkout (D-126). The read side now
|
|
9
|
+
# self-heals (tolerant CRLF reads), but pinning LF here PREVENTS the
|
|
10
|
+
# mangling so it never has to.
|
|
11
|
+
#
|
|
12
|
+
# Scoped to the text extensions the kit actually commits (.md + .json) under
|
|
13
|
+
# context/ — NOT a blanket `context/** text`, so a future committed binary
|
|
14
|
+
# under context/ is never force-normalized + corrupted.
|
|
15
|
+
context/**/*.md text eol=lf
|
|
16
|
+
context/**/*.json text eol=lf
|
|
@@ -26,7 +26,7 @@ The `cmk doctor` health checks verify each layer is wired correctly: install int
|
|
|
26
26
|
|
|
27
27
|
### Recalling memory (for Claude)
|
|
28
28
|
|
|
29
|
-
The snapshot injected at session start is a **bounded hot index, not everything** — there is a deeper, queryable archive. When a question is "what did we decide / what's our X / how does the user work / what's the setup
|
|
29
|
+
The snapshot injected at session start is a **bounded hot index, not everything** — there is a deeper, queryable archive. When a question is "what did we decide / what's our X / how does the user work / what's the setup / **how is this project structured or built / where does X live / what's the architecture**," **query your memory instead of re-deriving the answer from scratch** — the structure is a recorded decision, recall it before re-reading the files to reconstruct it:
|
|
30
30
|
|
|
31
31
|
- **`cmk search "<topic>"`** — find any captured fact (decisions, preferences, config, lessons) across the project + user tiers.
|
|
32
32
|
- **`context/memory/<type>_<slug>.md`** — the granular fact archive with full **Why / How** rationale (`context/memory/INDEX.md` lists them).
|
|
@@ -34,6 +34,8 @@ The snapshot injected at session start is a **bounded hot index, not everything*
|
|
|
34
34
|
|
|
35
35
|
Reach for these *first* — re-deriving an answer the project already recorded (by re-reading files, re-searching, or working it out again) wastes the memory that exists precisely so you don't have to. Recall from memory first, then verify against the source if needed.
|
|
36
36
|
|
|
37
|
+
**Authority rule:** when injected memory contradicts your assumptions, injected memory wins — it is the ground truth for documented knowledge and prior decisions (terminal/tool output stays the ground truth for live system state; official docs for version-specifics). Never treat a question as novel when the answer is already in your prompt.
|
|
38
|
+
|
|
37
39
|
### Memory write rules (for Claude)
|
|
38
40
|
|
|
39
41
|
Most capture is automatic — the Stop hook extracts durable facts each turn, no action needed. To capture something **explicitly**, the **`memory-write` skill** carries the full procedure; it loads on demand when you save a fact. The invariants it enforces:
|