@lh8ppl/claude-memory-kit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cmk-compress-lazy.mjs +59 -0
- package/bin/cmk-daily-distill.mjs +67 -0
- package/bin/cmk-weekly-curate.mjs +56 -0
- package/bin/cmk.mjs +12 -0
- package/package.json +50 -0
- package/src/audit-log.mjs +103 -0
- package/src/auto-extract.mjs +742 -0
- package/src/capture-prompt.mjs +61 -0
- package/src/capture-turn.mjs +273 -0
- package/src/claude-md.mjs +212 -0
- package/src/compress-session.mjs +349 -0
- package/src/compressor.mjs +376 -0
- package/src/conflict-queue.mjs +796 -0
- package/src/cooldown.mjs +61 -0
- package/src/daily-distill.mjs +252 -0
- package/src/doctor.mjs +528 -0
- package/src/forget.mjs +335 -0
- package/src/frontmatter.mjs +73 -0
- package/src/import-anthropic-memory.mjs +266 -0
- package/src/index-db.mjs +154 -0
- package/src/index-rebuild.mjs +597 -0
- package/src/index.mjs +90 -0
- package/src/inject-context.mjs +484 -0
- package/src/install.mjs +327 -0
- package/src/lazy-compress.mjs +326 -0
- package/src/lock-discipline.mjs +166 -0
- package/src/mcp-server.mjs +498 -0
- package/src/memory-write.mjs +565 -0
- package/src/merge-facts.mjs +213 -0
- package/src/observe-edit.mjs +87 -0
- package/src/platform-commands.mjs +138 -0
- package/src/poison-guard.mjs +245 -0
- package/src/privacy.mjs +21 -0
- package/src/provenance.mjs +217 -0
- package/src/register-crons.mjs +354 -0
- package/src/reindex.mjs +134 -0
- package/src/repair.mjs +316 -0
- package/src/result-shapes.mjs +155 -0
- package/src/review-queue.mjs +345 -0
- package/src/roll.mjs +115 -0
- package/src/scratchpad.mjs +335 -0
- package/src/search.mjs +311 -0
- package/src/subcommands.mjs +1252 -0
- package/src/tier-paths.mjs +74 -0
- package/src/transcripts.mjs +234 -0
- package/src/trust.mjs +226 -0
- package/src/weekly-curate.mjs +454 -0
- package/src/write-fact.mjs +205 -0
- package/template/.claude/hooks/pre-tool-memory.js +78 -0
- package/template/.claude/hooks/transcript-capture.js +69 -0
- package/template/.claude/settings.json +27 -0
- package/template/.claude/skills/memory-write/SKILL.md +117 -0
- package/template/.gitignore.fragment +12 -0
- package/template/CLAUDE.md.template +49 -0
- package/template/docs/journey/journey-log.md.template +292 -0
- package/template/local/machine-paths.md.template +37 -0
- package/template/local/overrides.md.template +36 -0
- package/template/project/.index/.gitkeep +0 -0
- package/template/project/MEMORY.md.template +47 -0
- package/template/project/SOUL.md.template +35 -0
- package/template/project/memory/INDEX.md.template +47 -0
- package/template/project/memory/archive/superseded/.gitkeep +0 -0
- package/template/project/memory/archive/tombstones/.gitkeep +0 -0
- package/template/project/queues/.gitkeep +0 -0
- package/template/project/sessions/.gitkeep +0 -0
- package/template/project/transcripts/.gitkeep +0 -0
- package/template/support/cron-jobs/daily-memory-distill.md +15 -0
- package/template/support/cron-jobs/nightly-memsearch-index.md +17 -0
- package/template/support/cron-jobs/weekly-memory-curator.md +15 -0
- package/template/support/milvus-deploy/README.md +57 -0
- package/template/support/milvus-deploy/docker-compose.yml +66 -0
- package/template/support/scripts/auto-extract-memory.sh +102 -0
- package/template/support/scripts/memsearch-index-with-flush.sh +59 -0
- package/template/support/scripts/refresh-distill-timestamp.py +35 -0
- package/template/support/scripts/register-crons.py +242 -0
- package/template/support/scripts/run-daily-distill.sh +67 -0
- package/template/support/scripts/run-weekly-curate.sh +58 -0
- package/template/user/HABITS.md.template +18 -0
- package/template/user/LESSONS.md.template +18 -0
- package/template/user/USER.md.template +18 -0
- package/template/user/fragments/INDEX.md.template +23 -0
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
// Weekly curate (Task 34, T-029).
|
|
2
|
+
//
|
|
3
|
+
// Companion to Task 33's daily-distill. Once a week (cron-scheduled
|
|
4
|
+
// at Sun 09:00 per §1.4), rotate every today-{YYYY-MM-DD}.md older
|
|
5
|
+
// than 7 days into archive.md, dedupe bullets across days, and
|
|
6
|
+
// rebuild recent.md from the current week's files via dailyDistill.
|
|
7
|
+
//
|
|
8
|
+
// Public boundary:
|
|
9
|
+
// weeklyCurate({projectRoot, backend, now, cooldownMs?,
|
|
10
|
+
// archiveMaxBytes?, recentMaxBytes?, skipRecentRebuild?})
|
|
11
|
+
// → {action: 'curated' | 'skipped' | 'error', archivedDays?,
|
|
12
|
+
// currentDays?, archivedPath?, recentPath?, bytesIn?, bytesOut?,
|
|
13
|
+
// duration_ms, errorCategory?, errors?}
|
|
14
|
+
//
|
|
15
|
+
// Composes on:
|
|
16
|
+
// - cooldown.mjs — shared 120s Haiku gate (same marker daily-distill /
|
|
17
|
+
// compress-session / auto-extract touch)
|
|
18
|
+
// - compressor.mjs — CompressorBackend interface; bin wrapper passes
|
|
19
|
+
// HaikuViaAnthropicApi
|
|
20
|
+
// - daily-distill.mjs — inline call to refresh recent.md from current
|
|
21
|
+
// week (cooldownMs=0 override per §8.7.2 composition)
|
|
22
|
+
// - canonicalize package — bullet-level dedup (Task 5 primitive that
|
|
23
|
+
// Task 10's mergeFacts itself uses; we reuse it at the scratchpad
|
|
24
|
+
// bullet level since today-*.md bullets have no per-bullet ids)
|
|
25
|
+
// - result-shapes.mjs — errorResult + ERROR_CATEGORIES
|
|
26
|
+
//
|
|
27
|
+
// Per design §8.7 + tasks.md 34.
|
|
28
|
+
|
|
29
|
+
import {
|
|
30
|
+
appendFileSync,
|
|
31
|
+
existsSync,
|
|
32
|
+
mkdirSync,
|
|
33
|
+
readdirSync,
|
|
34
|
+
readFileSync,
|
|
35
|
+
unlinkSync,
|
|
36
|
+
} from 'node:fs';
|
|
37
|
+
import { join } from 'node:path';
|
|
38
|
+
import { canonicalize } from '@lh8ppl/cmk-canonicalize';
|
|
39
|
+
import { nowIso } from './audit-log.mjs';
|
|
40
|
+
import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
|
|
41
|
+
import { HaikuTimeoutError } from './compressor.mjs';
|
|
42
|
+
import {
|
|
43
|
+
DEFAULT_COOLDOWN_MS,
|
|
44
|
+
isCooldownActive,
|
|
45
|
+
touchCooldownMarker,
|
|
46
|
+
} from './cooldown.mjs';
|
|
47
|
+
import { dailyDistill } from './daily-distill.mjs';
|
|
48
|
+
|
|
49
|
+
const DEFAULT_ARCHIVE_MAX_BYTES = 4096;
|
|
50
|
+
const DEFAULT_RECENT_MAX_BYTES = 4096;
|
|
51
|
+
const SESSIONS_REL = ['context', 'sessions'];
|
|
52
|
+
const ARCHIVE_MD_REL = ['context', 'sessions', 'archive.md'];
|
|
53
|
+
const SEVEN_DAYS_MS = 7 * 24 * 60 * 60 * 1000;
|
|
54
|
+
|
|
55
|
+
const TODAY_RE = /^today-(\d{4}-\d{2}-\d{2})\.md$/;
|
|
56
|
+
|
|
57
|
+
function buildCurateInstructions(archiveMaxBytes) {
|
|
58
|
+
return [
|
|
59
|
+
'You are a memory archivist for claude-memory-kit. The input below is a series of daily session summaries (one per day, oldest first) that are now older than 7 days. Consolidate them into a compact archive section.',
|
|
60
|
+
'',
|
|
61
|
+
'Output ONLY the consolidated Markdown. Do not write preamble. Do not acknowledge the task. Begin your response with the first heading.',
|
|
62
|
+
'',
|
|
63
|
+
'REQUIRED FORMAT:',
|
|
64
|
+
'',
|
|
65
|
+
'Group consolidated entries by ISO week start (Monday). Each week begins with:',
|
|
66
|
+
'',
|
|
67
|
+
'## Week of YYYY-MM-DD',
|
|
68
|
+
'',
|
|
69
|
+
'Under each week heading, emit bullets that summarize the work across the days in that week. Each bullet is a single line ≤120 chars. Bullets within a week appear in chronological order.',
|
|
70
|
+
'',
|
|
71
|
+
'HARD RULES:',
|
|
72
|
+
' 1. Preserve every citation ID matching /#[ULP]-[A-Z0-9]{6,8}/ verbatim. Never invent new IDs.',
|
|
73
|
+
` 2. Total output ≤ ${archiveMaxBytes} bytes.`,
|
|
74
|
+
' 3. Deduplicate aggressively: if the same fact appears across multiple days, emit it ONCE. The deterministic dedup pass after your output will collapse exact-after-canonical duplicates; YOUR job is to catch the looser semantic duplicates (paraphrases, restatements).',
|
|
75
|
+
' 4. No prose between bullets — only the bulleted list per week section.',
|
|
76
|
+
' 5. Your output goes directly into archive.md. Do not address the user, do not refer to yourself.',
|
|
77
|
+
'',
|
|
78
|
+
'=== BEGIN OLD DAILY SUMMARIES TO ARCHIVE ===',
|
|
79
|
+
].join('\n');
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function listAllTodayFiles(projectRoot) {
|
|
83
|
+
const sessionsDir = join(projectRoot, ...SESSIONS_REL);
|
|
84
|
+
if (!existsSync(sessionsDir)) return [];
|
|
85
|
+
const matches = [];
|
|
86
|
+
for (const name of readdirSync(sessionsDir)) {
|
|
87
|
+
const m = TODAY_RE.exec(name);
|
|
88
|
+
if (!m) continue;
|
|
89
|
+
matches.push({ name, date: m[1], path: join(sessionsDir, name) });
|
|
90
|
+
}
|
|
91
|
+
matches.sort((a, b) => (a.date < b.date ? -1 : a.date > b.date ? 1 : 0));
|
|
92
|
+
return matches;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function splitByAge(files, now) {
|
|
96
|
+
const cutoffMs = new Date(now).getTime() - SEVEN_DAYS_MS;
|
|
97
|
+
const old = [];
|
|
98
|
+
const current = [];
|
|
99
|
+
for (const f of files) {
|
|
100
|
+
const fileMs = new Date(f.date + 'T00:00:00Z').getTime();
|
|
101
|
+
if (Number.isFinite(fileMs) && fileMs < cutoffMs) old.push(f);
|
|
102
|
+
else current.push(f);
|
|
103
|
+
}
|
|
104
|
+
return { old, current };
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function readBuffer(files) {
|
|
108
|
+
return files
|
|
109
|
+
.map((f) => `## ${f.date}\n\n${readFileSync(f.path, 'utf8')}`)
|
|
110
|
+
.join('\n\n');
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Bullet-level dedup pass. Parses the Haiku output, finds bullets whose
|
|
114
|
+
// canonicalize() output matches across days, collapses duplicates into
|
|
115
|
+
// a single bullet with a `merged_from` HTML-comment line appended.
|
|
116
|
+
//
|
|
117
|
+
// The v0.1.0 contract for "merge high-similarity bullets via task 10"
|
|
118
|
+
// (tasks.md 34.2): Task 10's mergeFacts API operates on per-fact files
|
|
119
|
+
// under <tier>/memory/<id>.md — today-*.md bullets have no per-bullet
|
|
120
|
+
// ids, so direct mergeFacts use is not the right tool. Instead the kit
|
|
121
|
+
// reuses the canonicalize primitive (the same one Task 10 uses to
|
|
122
|
+
// detect merge collisions) at the bullet text level. Looser semantic-
|
|
123
|
+
// similarity dedup remains Haiku's responsibility per the prompt.
|
|
124
|
+
//
|
|
125
|
+
// Input: archive Markdown (sections of `## Week of ...` headers
|
|
126
|
+
// followed by `- bullet` lines) + the list of source-date strings the
|
|
127
|
+
// input came from.
|
|
128
|
+
//
|
|
129
|
+
// Output: same shape, with consecutive same-canonical bullets within a
|
|
130
|
+
// week collapsed and a `<!-- merged_from: ['YYYY-MM-DD', ...] -->`
|
|
131
|
+
// comment line appended after the consolidated bullet.
|
|
132
|
+
//
|
|
133
|
+
// SCOPE CONTRACT (skill-review I1, 2026-05-28): dedup only triggers
|
|
134
|
+
// INSIDE a `## Week of ...` section. Bullets that appear BEFORE the
|
|
135
|
+
// first such heading (e.g., Haiku ignores the prompt format and emits
|
|
136
|
+
// bullets without a week wrapper) pass through verbatim with NO dedup.
|
|
137
|
+
// This is intentional: without a section to scope-attribute the merge,
|
|
138
|
+
// the merged_from comment would lose its provenance meaning. If a
|
|
139
|
+
// future Haiku response shape needs implicit-section dedup, lift this
|
|
140
|
+
// behavior here + add a corresponding test pinning the new shape.
|
|
141
|
+
export function dedupBullets(archiveText, sourceDates) {
|
|
142
|
+
const lines = archiveText.split('\n');
|
|
143
|
+
const out = [];
|
|
144
|
+
const buffer = []; // pending bullets within current week
|
|
145
|
+
let inWeekSection = false;
|
|
146
|
+
|
|
147
|
+
function flushBuffer() {
|
|
148
|
+
if (buffer.length === 0) return;
|
|
149
|
+
// Group by canonical form. Preserve first-occurrence order.
|
|
150
|
+
const byCanonical = new Map();
|
|
151
|
+
const order = [];
|
|
152
|
+
for (const b of buffer) {
|
|
153
|
+
const key = canonicalize(b.text);
|
|
154
|
+
if (!key) {
|
|
155
|
+
// Empty after canonical — skip noise lines
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
if (!byCanonical.has(key)) {
|
|
159
|
+
byCanonical.set(key, { bullets: [], firstLine: b.line });
|
|
160
|
+
order.push(key);
|
|
161
|
+
}
|
|
162
|
+
byCanonical.get(key).bullets.push(b);
|
|
163
|
+
}
|
|
164
|
+
for (const key of order) {
|
|
165
|
+
const group = byCanonical.get(key);
|
|
166
|
+
const first = group.bullets[0];
|
|
167
|
+
out.push(first.line);
|
|
168
|
+
if (group.bullets.length > 1) {
|
|
169
|
+
// Collapsed multiple bullets into one. Record merged_from
|
|
170
|
+
// with the source dates the bullets came from. If the input
|
|
171
|
+
// bullet didn't carry a date attribution (rare — Haiku
|
|
172
|
+
// groups by week and our prompt tells it to dedup looser
|
|
173
|
+
// duplicates), fall back to "all source dates within this
|
|
174
|
+
// week section". v0.1.0 uses the simpler: every merged
|
|
175
|
+
// group attributes to the full set of sourceDates the
|
|
176
|
+
// weekly-curate call was given. This is conservative but
|
|
177
|
+
// correct — the audit trail says "these source days
|
|
178
|
+
// contributed to this consolidated bullet" without
|
|
179
|
+
// requiring per-bullet date tags Haiku may not emit.
|
|
180
|
+
const dates = sourceDates.slice().sort();
|
|
181
|
+
out.push(`<!-- merged_from: [${dates.map((d) => `'${d}'`).join(', ')}] -->`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
buffer.length = 0;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
for (const line of lines) {
|
|
188
|
+
if (/^## Week of /.test(line)) {
|
|
189
|
+
flushBuffer();
|
|
190
|
+
out.push(line);
|
|
191
|
+
inWeekSection = true;
|
|
192
|
+
continue;
|
|
193
|
+
}
|
|
194
|
+
// S5 fix: any other `## ` heading (e.g., `## Decisions` if Haiku
|
|
195
|
+
// ignores the prompt format) ends the current week section. Without
|
|
196
|
+
// this reset, bullets under the non-week heading would still buffer
|
|
197
|
+
// into the prior week's dedup group — wrong attribution.
|
|
198
|
+
if (/^## /.test(line)) {
|
|
199
|
+
flushBuffer();
|
|
200
|
+
out.push(line);
|
|
201
|
+
inWeekSection = false;
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
if (inWeekSection && /^-\s/.test(line)) {
|
|
205
|
+
const text = line.replace(/^-\s+/, '');
|
|
206
|
+
buffer.push({ line, text });
|
|
207
|
+
continue;
|
|
208
|
+
}
|
|
209
|
+
if (inWeekSection && /^<!--/.test(line)) {
|
|
210
|
+
// Comment lines from Haiku — pass through, don't buffer
|
|
211
|
+
flushBuffer();
|
|
212
|
+
out.push(line);
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
// Section break or unrecognized line — flush + emit
|
|
216
|
+
flushBuffer();
|
|
217
|
+
out.push(line);
|
|
218
|
+
}
|
|
219
|
+
flushBuffer();
|
|
220
|
+
return out.join('\n');
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function archiveMdPath(projectRoot) {
|
|
224
|
+
return join(projectRoot, ...ARCHIVE_MD_REL);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
function curateLogPath(projectRoot, date) {
|
|
228
|
+
return join(projectRoot, ...SESSIONS_REL, `${date}.curate.log`);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
function writeCurateLogEntry({ projectRoot, date, entry }) {
|
|
232
|
+
const path = curateLogPath(projectRoot, date);
|
|
233
|
+
mkdirSync(join(projectRoot, ...SESSIONS_REL), { recursive: true });
|
|
234
|
+
appendFileSync(path, JSON.stringify(entry) + '\n', 'utf8');
|
|
235
|
+
return path;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Run the weekly curate cycle.
|
|
240
|
+
*
|
|
241
|
+
* @returns {Promise<object>} action: 'curated' | 'skipped' | 'error'
|
|
242
|
+
*/
|
|
243
|
+
export async function weeklyCurate({
|
|
244
|
+
projectRoot,
|
|
245
|
+
backend,
|
|
246
|
+
now,
|
|
247
|
+
cooldownMs = DEFAULT_COOLDOWN_MS,
|
|
248
|
+
archiveMaxBytes = DEFAULT_ARCHIVE_MAX_BYTES,
|
|
249
|
+
recentMaxBytes = DEFAULT_RECENT_MAX_BYTES,
|
|
250
|
+
skipRecentRebuild = false,
|
|
251
|
+
} = {}) {
|
|
252
|
+
const ts = now ?? nowIso();
|
|
253
|
+
const date = ts.slice(0, 10);
|
|
254
|
+
const t0 = Date.now();
|
|
255
|
+
|
|
256
|
+
if (!projectRoot) {
|
|
257
|
+
return errorResult({
|
|
258
|
+
category: ERROR_CATEGORIES.MISSING_PROJECT_ROOT,
|
|
259
|
+
errors: ['projectRoot is required'],
|
|
260
|
+
duration_ms: Date.now() - t0,
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
if (!backend || typeof backend.compress !== 'function') {
|
|
264
|
+
return errorResult({
|
|
265
|
+
category: ERROR_CATEGORIES.MISSING_BACKEND,
|
|
266
|
+
errors: ['backend (CompressorBackend) is required'],
|
|
267
|
+
duration_ms: Date.now() - t0,
|
|
268
|
+
});
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const sessionsDir = join(projectRoot, ...SESSIONS_REL);
|
|
272
|
+
if (!existsSync(sessionsDir)) {
|
|
273
|
+
return {
|
|
274
|
+
action: 'skipped',
|
|
275
|
+
reason: 'no-context-dir',
|
|
276
|
+
duration_ms: Date.now() - t0,
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (isCooldownActive({ projectRoot, now: ts, cooldownMs })) {
|
|
281
|
+
const duration_ms = Date.now() - t0;
|
|
282
|
+
writeCurateLogEntry({
|
|
283
|
+
projectRoot,
|
|
284
|
+
date,
|
|
285
|
+
entry: {
|
|
286
|
+
ts,
|
|
287
|
+
scope: 'weekly-curate',
|
|
288
|
+
input_bytes: 0,
|
|
289
|
+
output_bytes: 0,
|
|
290
|
+
// I2 fix: null model_id on the cooldown-skip path. Haiku was
|
|
291
|
+
// never called — recording the backend's modelId would
|
|
292
|
+
// mis-attribute the (non-existent) call in NDJSON analytics.
|
|
293
|
+
model_id: null,
|
|
294
|
+
cost_usd: 0,
|
|
295
|
+
duration_ms,
|
|
296
|
+
success: true,
|
|
297
|
+
skipped_reason: 'cooldown',
|
|
298
|
+
},
|
|
299
|
+
});
|
|
300
|
+
return { action: 'skipped', reason: 'cooldown', duration_ms };
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
const files = listAllTodayFiles(projectRoot);
|
|
304
|
+
const { old, current } = splitByAge(files, ts);
|
|
305
|
+
|
|
306
|
+
// No old files → nothing to archive; we still rebuild recent.md
|
|
307
|
+
// from current (idempotent no-op if it was just rebuilt by daily
|
|
308
|
+
// cron, since dailyDistill writes the same output deterministically).
|
|
309
|
+
if (old.length === 0) {
|
|
310
|
+
const duration_ms = Date.now() - t0;
|
|
311
|
+
writeCurateLogEntry({
|
|
312
|
+
projectRoot,
|
|
313
|
+
date,
|
|
314
|
+
entry: {
|
|
315
|
+
ts,
|
|
316
|
+
scope: 'weekly-curate',
|
|
317
|
+
input_bytes: 0,
|
|
318
|
+
output_bytes: 0,
|
|
319
|
+
model_id:
|
|
320
|
+
typeof backend.modelId === 'function' ? backend.modelId() : null,
|
|
321
|
+
cost_usd: 0,
|
|
322
|
+
duration_ms,
|
|
323
|
+
success: true,
|
|
324
|
+
skipped_reason: 'no-old-files',
|
|
325
|
+
current_days: current.length,
|
|
326
|
+
},
|
|
327
|
+
});
|
|
328
|
+
return {
|
|
329
|
+
action: 'skipped',
|
|
330
|
+
reason: 'no-old-files',
|
|
331
|
+
currentDays: current.length,
|
|
332
|
+
duration_ms,
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
const buffer = readBuffer(old);
|
|
337
|
+
const input_bytes = Buffer.byteLength(buffer, 'utf8');
|
|
338
|
+
const instructions = buildCurateInstructions(archiveMaxBytes);
|
|
339
|
+
const sourceDates = old.map((f) => f.date);
|
|
340
|
+
|
|
341
|
+
let result;
|
|
342
|
+
try {
|
|
343
|
+
result = await backend.compress({
|
|
344
|
+
input: buffer,
|
|
345
|
+
instructions,
|
|
346
|
+
preserveCitationIds: true,
|
|
347
|
+
maxOutputBytes: archiveMaxBytes,
|
|
348
|
+
timeoutMs: 50_000,
|
|
349
|
+
});
|
|
350
|
+
touchCooldownMarker({ projectRoot, now: ts });
|
|
351
|
+
} catch (err) {
|
|
352
|
+
touchCooldownMarker({ projectRoot, now: ts });
|
|
353
|
+
const errorCategory =
|
|
354
|
+
err instanceof HaikuTimeoutError
|
|
355
|
+
? ERROR_CATEGORIES.HAIKU_TIMEOUT
|
|
356
|
+
: ERROR_CATEGORIES.COMPRESS_FAILED;
|
|
357
|
+
const duration_ms = Date.now() - t0;
|
|
358
|
+
writeCurateLogEntry({
|
|
359
|
+
projectRoot,
|
|
360
|
+
date,
|
|
361
|
+
entry: {
|
|
362
|
+
ts,
|
|
363
|
+
scope: 'weekly-curate',
|
|
364
|
+
input_bytes,
|
|
365
|
+
output_bytes: 0,
|
|
366
|
+
model_id:
|
|
367
|
+
typeof backend.modelId === 'function' ? backend.modelId() : null,
|
|
368
|
+
cost_usd: 0,
|
|
369
|
+
duration_ms,
|
|
370
|
+
success: false,
|
|
371
|
+
error_category: errorCategory,
|
|
372
|
+
},
|
|
373
|
+
});
|
|
374
|
+
return errorResult({
|
|
375
|
+
category: errorCategory,
|
|
376
|
+
errors: [err?.message ?? String(err)],
|
|
377
|
+
duration_ms,
|
|
378
|
+
});
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
const rawOutput = result?.outputText ?? '';
|
|
382
|
+
const dedupedOutput = dedupBullets(rawOutput, sourceDates);
|
|
383
|
+
const output_bytes = Buffer.byteLength(dedupedOutput, 'utf8');
|
|
384
|
+
|
|
385
|
+
// Append to archive.md (NOT overwrite — archive is append-only history).
|
|
386
|
+
const archivePath = archiveMdPath(projectRoot);
|
|
387
|
+
mkdirSync(join(projectRoot, ...SESSIONS_REL), { recursive: true });
|
|
388
|
+
const suffix = dedupedOutput.endsWith('\n') ? '' : '\n';
|
|
389
|
+
appendFileSync(archivePath, dedupedOutput + suffix + '\n', 'utf8');
|
|
390
|
+
|
|
391
|
+
// Delete OLD today-*.md files (audit retention via git history;
|
|
392
|
+
// committed tier per .gitignore.fragment).
|
|
393
|
+
// M3 fix: track per-file deletion errors in the NDJSON entry so ops
|
|
394
|
+
// can detect partial-deletion events (Windows file lock, race
|
|
395
|
+
// condition). Self-healing — next week's curate re-archives any
|
|
396
|
+
// surviving OLD file — but observability matters.
|
|
397
|
+
const deletionErrors = [];
|
|
398
|
+
for (const f of old) {
|
|
399
|
+
try {
|
|
400
|
+
unlinkSync(f.path);
|
|
401
|
+
} catch (err) {
|
|
402
|
+
deletionErrors.push({ path: f.path, error: err?.message ?? String(err) });
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Rebuild recent.md from current week via dailyDistill (cooldownMs=0
|
|
407
|
+
// override per §8.7.2 — both Haiku calls belong to a single curate
|
|
408
|
+
// cycle, not two independent invocations).
|
|
409
|
+
let recentPath;
|
|
410
|
+
let recentResult;
|
|
411
|
+
if (!skipRecentRebuild && current.length > 0) {
|
|
412
|
+
recentResult = await dailyDistill({
|
|
413
|
+
projectRoot,
|
|
414
|
+
backend,
|
|
415
|
+
now: ts,
|
|
416
|
+
cooldownMs: 0,
|
|
417
|
+
maxOutputBytes: recentMaxBytes,
|
|
418
|
+
});
|
|
419
|
+
if (recentResult?.outputPath) recentPath = recentResult.outputPath;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
const duration_ms = Date.now() - t0;
|
|
423
|
+
writeCurateLogEntry({
|
|
424
|
+
projectRoot,
|
|
425
|
+
date,
|
|
426
|
+
entry: {
|
|
427
|
+
ts,
|
|
428
|
+
scope: 'weekly-curate',
|
|
429
|
+
input_bytes,
|
|
430
|
+
output_bytes,
|
|
431
|
+
model_id:
|
|
432
|
+
result?.modelId ??
|
|
433
|
+
(typeof backend.modelId === 'function' ? backend.modelId() : null),
|
|
434
|
+
cost_usd: result?.costUSD ?? 0,
|
|
435
|
+
duration_ms,
|
|
436
|
+
success: true,
|
|
437
|
+
archived_days: old.length,
|
|
438
|
+
current_days: current.length,
|
|
439
|
+
recent_rebuild_action: recentResult?.action ?? 'skipped',
|
|
440
|
+
...(deletionErrors.length > 0 ? { deletion_errors: deletionErrors } : {}),
|
|
441
|
+
},
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
return {
|
|
445
|
+
action: 'curated',
|
|
446
|
+
archivedDays: old.length,
|
|
447
|
+
currentDays: current.length,
|
|
448
|
+
archivedPath: archivePath,
|
|
449
|
+
recentPath,
|
|
450
|
+
bytesIn: input_bytes,
|
|
451
|
+
bytesOut: output_bytes,
|
|
452
|
+
duration_ms,
|
|
453
|
+
};
|
|
454
|
+
}
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
// Per-fact archive writer (Task 7, refactored in cleanup-layer-2-cross-module-drift).
|
|
2
|
+
// Single public boundary: writeFact(opts) → result. See design §2.2 + §4.
|
|
3
|
+
//
|
|
4
|
+
// Uses shared modules: tier-paths (path resolution), frontmatter (js-yaml
|
|
5
|
+
// serialize), audit-log (canonical NDJSON), result-shapes (errorCategory enum).
|
|
6
|
+
// See CLAUDE.md "Shared modules" rule.
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
existsSync,
|
|
10
|
+
mkdirSync,
|
|
11
|
+
readdirSync,
|
|
12
|
+
readFileSync,
|
|
13
|
+
statSync,
|
|
14
|
+
writeFileSync,
|
|
15
|
+
} from 'node:fs';
|
|
16
|
+
import { join } from 'node:path';
|
|
17
|
+
import { generateId } from '@lh8ppl/cmk-canonicalize';
|
|
18
|
+
import { VALID_TIERS, resolveTierRoot, resolveFactDir } from './tier-paths.mjs';
|
|
19
|
+
import { parse, format } from './frontmatter.mjs';
|
|
20
|
+
import { appendAuditEntry, nowIso, REASON_CODES } from './audit-log.mjs';
|
|
21
|
+
import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
|
|
22
|
+
|
|
23
|
+
const VALID_TYPES = new Set(['user', 'feedback', 'project', 'reference']);
|
|
24
|
+
const VALID_WRITE_SOURCES = new Set([
|
|
25
|
+
'user-explicit',
|
|
26
|
+
'auto-extract',
|
|
27
|
+
'compressor',
|
|
28
|
+
'manual-edit',
|
|
29
|
+
'imported',
|
|
30
|
+
]);
|
|
31
|
+
const VALID_TRUST = new Set(['high', 'medium', 'low']);
|
|
32
|
+
const SLUG_PATTERN = /^[a-z0-9][a-z0-9_-]*$/i;
|
|
33
|
+
|
|
34
|
+
// Layer-2 review: PR-1 rejected \n / \r / : in scalar frontmatter fields as
|
|
35
|
+
// a minimum fix for the naive serializer (finding B2). PR-2's frontmatter.mjs
|
|
36
|
+
// (js-yaml CORE_SCHEMA) quotes those chars properly. The B2 restriction is
|
|
37
|
+
// LIFTED here — titles/sourceFile/sourceSha1 may contain newlines, colons,
|
|
38
|
+
// and other YAML-special chars; they round-trip correctly via parse/format.
|
|
39
|
+
// Round-trip tests in cli-write-fact.test.js (`B2 relaxation`) prove it.
|
|
40
|
+
|
|
41
|
+
function validateOptions(opts) {
|
|
42
|
+
const errors = [];
|
|
43
|
+
if (!opts.tier || !VALID_TIERS.has(opts.tier)) {
|
|
44
|
+
errors.push("tier: must be 'U', 'P', or 'L'");
|
|
45
|
+
}
|
|
46
|
+
if (!opts.type || !VALID_TYPES.has(opts.type)) {
|
|
47
|
+
errors.push('type: must be one of user/feedback/project/reference');
|
|
48
|
+
}
|
|
49
|
+
if (
|
|
50
|
+
!opts.slug ||
|
|
51
|
+
typeof opts.slug !== 'string' ||
|
|
52
|
+
!SLUG_PATTERN.test(opts.slug)
|
|
53
|
+
) {
|
|
54
|
+
errors.push(
|
|
55
|
+
'slug: must start with alphanumeric and contain only [A-Za-z0-9_-]',
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
if (!opts.title || typeof opts.title !== 'string' || !opts.title.trim()) {
|
|
59
|
+
errors.push('title: required, non-empty string');
|
|
60
|
+
}
|
|
61
|
+
if (opts.body == null || typeof opts.body !== 'string' || !opts.body.length) {
|
|
62
|
+
errors.push('body: required, non-empty string');
|
|
63
|
+
}
|
|
64
|
+
if (!opts.writeSource || !VALID_WRITE_SOURCES.has(opts.writeSource)) {
|
|
65
|
+
errors.push(
|
|
66
|
+
'writeSource: must be one of user-explicit/auto-extract/compressor/manual-edit/imported',
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
if (!opts.trust || !VALID_TRUST.has(opts.trust)) {
|
|
70
|
+
errors.push('trust: must be one of high/medium/low');
|
|
71
|
+
}
|
|
72
|
+
if (
|
|
73
|
+
!opts.sourceFile ||
|
|
74
|
+
typeof opts.sourceFile !== 'string' ||
|
|
75
|
+
!opts.sourceFile.length
|
|
76
|
+
) {
|
|
77
|
+
errors.push('sourceFile: required, non-empty string');
|
|
78
|
+
}
|
|
79
|
+
if (
|
|
80
|
+
typeof opts.sourceLine !== 'number' ||
|
|
81
|
+
!Number.isInteger(opts.sourceLine) ||
|
|
82
|
+
opts.sourceLine < 1
|
|
83
|
+
) {
|
|
84
|
+
errors.push('sourceLine: required, positive integer');
|
|
85
|
+
}
|
|
86
|
+
if (
|
|
87
|
+
!opts.sourceSha1 ||
|
|
88
|
+
typeof opts.sourceSha1 !== 'string' ||
|
|
89
|
+
!opts.sourceSha1.length
|
|
90
|
+
) {
|
|
91
|
+
errors.push('sourceSha1: required, non-empty string');
|
|
92
|
+
}
|
|
93
|
+
return errors;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function buildFrontmatterObject(opts, computed) {
|
|
97
|
+
// Key order matters for visual diff stability — insertion order = on-disk order.
|
|
98
|
+
const fm = {
|
|
99
|
+
id: computed.id,
|
|
100
|
+
type: opts.type,
|
|
101
|
+
title: opts.title,
|
|
102
|
+
created_at: computed.createdAt,
|
|
103
|
+
write_source: opts.writeSource,
|
|
104
|
+
trust: opts.trust,
|
|
105
|
+
source_file: opts.sourceFile,
|
|
106
|
+
source_line: opts.sourceLine,
|
|
107
|
+
source_sha1: opts.sourceSha1,
|
|
108
|
+
};
|
|
109
|
+
if (opts.mergedFrom) fm.merged_from = opts.mergedFrom;
|
|
110
|
+
if (opts.supersededBy) fm.superseded_by = opts.supersededBy;
|
|
111
|
+
if (opts.tags) fm.tags = opts.tags;
|
|
112
|
+
if (opts.related) fm.related = opts.related;
|
|
113
|
+
if (opts.isPrivate === true) fm.private = true;
|
|
114
|
+
return fm;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Per Layer-2 review M2: filter INDEX.md from the dedup scan. Pre-fix the
|
|
118
|
+
// inline scanner here didn't exclude INDEX.md; harmless in practice (it
|
|
119
|
+
// has no `id:` matching real ids) but inconsistent with reindex/forget.
|
|
120
|
+
function findExistingFactById(factDir, id) {
|
|
121
|
+
if (!existsSync(factDir)) return null;
|
|
122
|
+
for (const entry of readdirSync(factDir, { withFileTypes: true })) {
|
|
123
|
+
if (!entry.isFile()) continue;
|
|
124
|
+
if (!entry.name.endsWith('.md')) continue;
|
|
125
|
+
if (entry.name === 'INDEX.md') continue;
|
|
126
|
+
const p = join(factDir, entry.name);
|
|
127
|
+
if (!statSync(p).isFile()) continue;
|
|
128
|
+
const { frontmatter } = parse(readFileSync(p, 'utf8'));
|
|
129
|
+
if (frontmatter?.id === id) return p;
|
|
130
|
+
}
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function readExistingFactId(path) {
|
|
135
|
+
if (!existsSync(path)) return null;
|
|
136
|
+
const { frontmatter } = parse(readFileSync(path, 'utf8'));
|
|
137
|
+
return frontmatter?.id ?? null;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export function writeFact(opts = {}) {
|
|
141
|
+
const errors = validateOptions(opts);
|
|
142
|
+
if (errors.length > 0) {
|
|
143
|
+
return errorResult({
|
|
144
|
+
category: ERROR_CATEGORIES.SCHEMA,
|
|
145
|
+
errors,
|
|
146
|
+
id: null,
|
|
147
|
+
path: null,
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const id = opts.id ?? generateId(opts.tier, opts.body);
|
|
152
|
+
const createdAt = opts.createdAt ?? nowIso();
|
|
153
|
+
const tierRoot = resolveTierRoot(opts);
|
|
154
|
+
const factDir = resolveFactDir(opts.tier, tierRoot);
|
|
155
|
+
const filename = `${opts.type}_${opts.slug}.md`;
|
|
156
|
+
const path = join(factDir, filename);
|
|
157
|
+
|
|
158
|
+
const existingIdAtPath = readExistingFactId(path);
|
|
159
|
+
if (existingIdAtPath !== null) {
|
|
160
|
+
if (existingIdAtPath === id) {
|
|
161
|
+
appendAuditEntry(tierRoot, {
|
|
162
|
+
ts: createdAt,
|
|
163
|
+
action: 'skipped',
|
|
164
|
+
tier: opts.tier,
|
|
165
|
+
id,
|
|
166
|
+
reasonCode: REASON_CODES.DUPLICATE,
|
|
167
|
+
paths: { before: path },
|
|
168
|
+
});
|
|
169
|
+
return { action: 'skipped', skipReason: 'duplicate', id, path };
|
|
170
|
+
}
|
|
171
|
+
return errorResult({
|
|
172
|
+
category: ERROR_CATEGORIES.COLLISION,
|
|
173
|
+
errors: [
|
|
174
|
+
`File exists at ${path} with different id ${existingIdAtPath}; refusing overwrite`,
|
|
175
|
+
],
|
|
176
|
+
id,
|
|
177
|
+
path,
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const elsewhere = findExistingFactById(factDir, id);
|
|
182
|
+
if (elsewhere) {
|
|
183
|
+
appendAuditEntry(tierRoot, {
|
|
184
|
+
ts: createdAt,
|
|
185
|
+
action: 'skipped',
|
|
186
|
+
tier: opts.tier,
|
|
187
|
+
id,
|
|
188
|
+
reasonCode: REASON_CODES.DUPLICATE_ELSEWHERE,
|
|
189
|
+
paths: { before: elsewhere, after: path },
|
|
190
|
+
});
|
|
191
|
+
return {
|
|
192
|
+
action: 'skipped',
|
|
193
|
+
skipReason: 'duplicate-elsewhere',
|
|
194
|
+
id,
|
|
195
|
+
path,
|
|
196
|
+
duplicateAt: elsewhere,
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
mkdirSync(factDir, { recursive: true });
|
|
201
|
+
const frontmatter = buildFrontmatterObject(opts, { id, createdAt });
|
|
202
|
+
writeFileSync(path, format({ frontmatter, body: `\n${opts.body}\n` }), 'utf8');
|
|
203
|
+
|
|
204
|
+
return { action: 'created', id, path };
|
|
205
|
+
}
|