memarium 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +146 -0
  3. package/assets/scripts/merge-books.mjs +921 -0
  4. package/assets/workflows/memarium-aggregate.yml +66 -0
  5. package/dist/bin/memarium.js +6 -0
  6. package/dist/src/aggregated-store.js +95 -0
  7. package/dist/src/cli.js +175 -0
  8. package/dist/src/commands/cat.js +20 -0
  9. package/dist/src/commands/doctor.js +383 -0
  10. package/dist/src/commands/init-wizard.js +201 -0
  11. package/dist/src/commands/init.js +45 -0
  12. package/dist/src/commands/list.js +19 -0
  13. package/dist/src/commands/prune.js +108 -0
  14. package/dist/src/commands/resume/config-pathmap.js +38 -0
  15. package/dist/src/commands/resume/fuzzy-match.js +13 -0
  16. package/dist/src/commands/resume/list-sessions.js +54 -0
  17. package/dist/src/commands/resume/render-prompt.js +121 -0
  18. package/dist/src/commands/resume/resume.js +121 -0
  19. package/dist/src/commands/show.js +21 -0
  20. package/dist/src/commands/sync.js +279 -0
  21. package/dist/src/commands/upgrade.js +47 -0
  22. package/dist/src/commands/workflow.js +126 -0
  23. package/dist/src/config.js +98 -0
  24. package/dist/src/content-project-inference.js +185 -0
  25. package/dist/src/device.js +47 -0
  26. package/dist/src/digest/manifest.js +121 -0
  27. package/dist/src/digest/project-filter.js +32 -0
  28. package/dist/src/digest/session-signal.js +106 -0
  29. package/dist/src/digest/toc.js +127 -0
  30. package/dist/src/git-ops.js +359 -0
  31. package/dist/src/index-store.js +35 -0
  32. package/dist/src/migrate.js +72 -0
  33. package/dist/src/project-identity.js +139 -0
  34. package/dist/src/project-resolve.js +42 -0
  35. package/dist/src/prompts.js +87 -0
  36. package/dist/src/repo-data-dir.js +25 -0
  37. package/dist/src/slug.js +28 -0
  38. package/dist/src/sources/base.js +1 -0
  39. package/dist/src/sources/claude-code.js +294 -0
  40. package/dist/src/sources/vscode-copilot.js +400 -0
  41. package/dist/src/types.js +1 -0
  42. package/dist/src/writer.js +240 -0
  43. package/package.json +60 -0
@@ -0,0 +1,921 @@
1
+ #!/usr/bin/env node
2
+ // Aggregate every device branch's book/ into main.
3
+ //
4
+ // Called from .github/workflows/memarium-aggregate.yml — checked-in on main,
5
+ // runs on every push to a non-main branch. Purely mechanical; never touches
6
+ // an LLM. The LLM work happens in-session via the /memarium skill on each
7
+ // device, then `memarium publish` writes per-device chronicle/topic/card
8
+ // files into that device's branch. This script merges all those device
9
+ // branches into main.
10
+ //
11
+ // memarium v0.2 schema (book index v2):
12
+ //
13
+ // chronicles/ — thread-grain diary entries, INSERT-only on each device.
14
+ // Across devices, dedup by threadId (latest updatedAt wins).
15
+ //
16
+ // topics/ — mid-grain knowledge pages, FULL-REWRITTEN per session.
17
+ // We CANNOT mechanically merge two devices' rewrites of the
18
+ // same topic — they diverge in voice and structure. So we
19
+ // preserve each as <topicSlug>.<device>.md.
20
+ //
21
+ // cards/ — atomic insight cards, INSERT/UPDATE per slug per project.
22
+ // Across devices, union by (project, slug); slug collision
23
+ // resolves to latest updatedAt. _global/cards/ unioned
24
+ // unconditionally.
25
+ //
26
+ // Algorithm:
27
+ // 1. List remote device branches (refs/remotes/origin/*, minus main + HEAD).
28
+ // 2. For each, read its BookIndex v2 via `git show`. Skip if missing or v1.
29
+ // 3. Walk each per-device entries; bucket into 3 collections.
30
+ // 4. Apply per-collection merge rules; copy files from origin device branch.
31
+ // 5. Prune main-side files that no live device claims.
32
+ // 6. Regen book/index.md + book/_meta/timeline.md + per-project index pages.
33
+ // 7. git add book/ + commit (no-op if nothing changed).
34
+ //
35
+ // The caller (yaml step) takes care of `git push`.
36
+
37
+ import { execSync } from "node:child_process";
38
+ import { existsSync, mkdirSync, writeFileSync, readdirSync, statSync, rmSync, unlinkSync } from "node:fs";
39
+ import { dirname, join, relative } from "node:path";
40
+
41
+ const REPO_ROOT = process.cwd();
42
+ const BOOK_INDEX_PATH = ".memarium/index.book.json";
43
+ const SPOOL_INDEX_PATH = ".memarium/index.json";
44
+ const AGGREGATED_INDEX_PATH = ".memarium/index.aggregated.json";
45
+ const MEMORY_INDEX_PATH = ".memarium/index.memory.json";
46
+ const ENTITY_INDEX_PATH = ".memarium/index.entity.json";
47
+ const QA_INDEX_PATH = ".memarium/index.qa.json";
48
+
49
+ /**
50
+ * Guard against path-traversal attacks in memory entry paths.
51
+ * A device's index.memory.json could (if malicious or corrupted) point
52
+ * entry.path outside memory/ (e.g. ".github/workflows/foo.yml", "../../x").
53
+ * Only allow relative paths that start with "memory/" and contain no ".."
54
+ * segments or absolute-path markers.
55
+ */
56
+ function isSafeMemoryPath(p) {
57
+ if (typeof p !== "string" || p.length === 0) return false;
58
+ if (p.includes("\0")) return false;
59
+ // must be a relative path under memory/, no traversal
60
+ const norm = p.split("\\").join("/");
61
+ if (norm.startsWith("/")) return false;
62
+ if (!norm.startsWith("memory/")) return false;
63
+ if (norm.split("/").some((seg) => seg === "..")) return false;
64
+ return true;
65
+ }
66
+
67
+ /**
68
+ * Guard against path-traversal attacks in entity entry paths.
69
+ * Identical logic to isSafeMemoryPath but restricted to memory/entities/,
70
+ * and further requires a .md suffix (entity prune only deletes *.md, so a
71
+ * non-md file would persist). Entries that fail this check are logged and
72
+ * skipped.
73
+ */
74
+ function isSafeEntityPath(p) {
75
+ if (typeof p !== "string" || p.length === 0) return false;
76
+ if (p.includes("\0")) return false;
77
+ const norm = p.split("\\").join("/");
78
+ if (norm.startsWith("/")) return false;
79
+ if (!norm.startsWith("memory/entities/")) return false;
80
+ if (norm.split("/").some((seg) => seg === "..")) return false;
81
+ if (!norm.endsWith(".md")) return false;
82
+ return true;
83
+ }
84
+
85
+ /**
86
+ * Guard against path-traversal in qa entry paths. Identical to isSafeEntityPath
87
+ * but restricted to memory/qa/ and requires a .md suffix (prune only deletes *.md).
88
+ */
89
+ function isSafeQaPath(p) {
90
+ if (typeof p !== "string" || p.length === 0) return false;
91
+ if (p.includes("\0")) return false;
92
+ const norm = p.split("\\").join("/");
93
+ if (norm.startsWith("/")) return false;
94
+ if (!norm.startsWith("memory/qa/")) return false;
95
+ if (norm.split("/").some((seg) => seg === "..")) return false;
96
+ if (!norm.endsWith(".md")) return false;
97
+ return true;
98
+ }
99
+
100
+ function sh(cmd, args) {
101
+ return execSync([cmd, ...args].join(" "), { encoding: "utf8", maxBuffer: 64 * 1024 * 1024 });
102
+ }
103
+
104
+ function shOk(cmd, args) {
105
+ try {
106
+ return { ok: true, stdout: sh(cmd, args) };
107
+ } catch (err) {
108
+ return { ok: false, stdout: "", stderr: String(err.stderr ?? "") };
109
+ }
110
+ }
111
+
112
+ function listDeviceBranches() {
113
+ const raw = sh("git", ["for-each-ref", "--format='%(refname:short)'", "refs/remotes/origin/"]);
114
+ return raw
115
+ .split("\n")
116
+ .map((s) => s.trim().replace(/^'|'$/g, ""))
117
+ .filter(Boolean)
118
+ .filter((ref) => ref !== "origin/HEAD" && ref !== "origin/main")
119
+ .filter((ref) => !ref.includes("->"))
120
+ .map((ref) => ({ ref, device: ref.replace(/^origin\//, "") }));
121
+ }
122
+
123
+ function readFileFromBranch(ref, path) {
124
+ const r = shOk("git", ["show", `${ref}:${path}`]);
125
+ return r.ok ? r.stdout : null;
126
+ }
127
+
128
+ function loadBookIndexFromBranch(ref) {
129
+ const content = readFileFromBranch(ref, BOOK_INDEX_PATH);
130
+ if (content === null) return null;
131
+ try {
132
+ const parsed = JSON.parse(content);
133
+ if (parsed.version !== 2) {
134
+ // Pre-v0.2 device — silently skip. The device just needs to upgrade
135
+ // memarium + run /memarium once to get a v2 index.
136
+ return null;
137
+ }
138
+ if (!parsed.chronicles || !parsed.topics || !parsed.cards) return null;
139
+ return parsed;
140
+ } catch {
141
+ return null;
142
+ }
143
+ }
144
+
145
+ /** Read the device-side spool index (.memarium/index.json). Keyed by
146
+ * `${tool}:${sessionId}`. Used by the raw_sessions aggregation pass
147
+ * added in 0.8.0 to union every device's raw .md files into main. */
148
+ function loadSpoolIndexFromBranch(ref) {
149
+ const content = readFileFromBranch(ref, SPOOL_INDEX_PATH);
150
+ if (content === null) return null;
151
+ try {
152
+ const parsed = JSON.parse(content);
153
+ if (parsed.version !== 1 || !parsed.entries) return null;
154
+ return parsed;
155
+ } catch {
156
+ return null;
157
+ }
158
+ }
159
+
160
+ /** Read the device-side memory index (.memarium/index.memory.json).
161
+ * Added in 0.8.6 to union typed memory entries across devices. */
162
+ function loadMemoryIndexFromBranch(ref) {
163
+ const content = readFileFromBranch(ref, MEMORY_INDEX_PATH);
164
+ if (content === null) return null;
165
+ try {
166
+ const idx = JSON.parse(content);
167
+ if (!idx || idx.version !== 1 || !idx.entries) return null;
168
+ return idx;
169
+ } catch {
170
+ return null;
171
+ }
172
+ }
173
+
174
+ /** Read the device-side entity index (.memarium/index.entity.json).
175
+ * Mirrors loadMemoryIndexFromBranch — unions entity wiki pages across devices. */
176
+ function loadEntityIndexFromBranch(ref) {
177
+ const content = readFileFromBranch(ref, ENTITY_INDEX_PATH);
178
+ if (content === null) return null;
179
+ try {
180
+ const idx = JSON.parse(content);
181
+ if (!idx || idx.version !== 1 || !idx.entries) return null;
182
+ return idx;
183
+ } catch {
184
+ return null;
185
+ }
186
+ }
187
+
188
+ /** Read the device-side qa index (.memarium/index.qa.json). Mirrors
189
+ * loadEntityIndexFromBranch — unions distilled Q&A pages across devices. */
190
+ function loadQaIndexFromBranch(ref) {
191
+ const content = readFileFromBranch(ref, QA_INDEX_PATH);
192
+ if (content === null) return null;
193
+ try {
194
+ const idx = JSON.parse(content);
195
+ if (!idx || idx.version !== 1 || !idx.entries) return null;
196
+ return idx;
197
+ } catch {
198
+ return null;
199
+ }
200
+ }
201
+
202
+ function writeRel(relPath, content) {
203
+ const abs = join(REPO_ROOT, relPath);
204
+ mkdirSync(dirname(abs), { recursive: true });
205
+ writeFileSync(abs, content);
206
+ }
207
+
208
+ // ---------------- main ----------------
209
+
210
+ function main() {
211
+ const branches = listDeviceBranches();
212
+ if (branches.length === 0) {
213
+ console.log("no device branches found — nothing to aggregate");
214
+ return;
215
+ }
216
+ console.log(`found ${branches.length} device branch(es): ${branches.map((b) => b.device).join(", ")}`);
217
+
218
+ const perDevice = [];
219
+ for (const { ref, device } of branches) {
220
+ const bookIndex = loadBookIndexFromBranch(ref);
221
+ if (!bookIndex) {
222
+ console.log(` ${device}: no v2 .memarium/index.book.json (book aggregation skipped for this device; raw_sessions still aggregated below)`);
223
+ continue;
224
+ }
225
+ perDevice.push({ ref, device, bookIndex });
226
+ }
227
+ // 0.8.3: don't early-return when no device has a BookIndex — the
228
+ // raw_sessions aggregation pass below works off `.memarium/index.json`
229
+ // (the spool index, separate from index.book.json) and is useful
230
+ // even before any device has run /memarium digest. Pre-0.8.3 this
231
+ // gated raw_sessions on books existing, so cross-device resume was
232
+ // silently disabled until someone ran /memarium.
233
+
234
+ // -------- chronicles: dedup by threadId, latest updatedAt wins --------
235
+ const chronicleByThread = new Map(); // threadId -> { ref, device, entry }
236
+ for (const { ref, device, bookIndex } of perDevice) {
237
+ for (const c of Object.values(bookIndex.chronicles)) {
238
+ if (c.skip || !c.path) continue;
239
+ const existing = chronicleByThread.get(c.threadId);
240
+ if (!existing || c.updatedAt > existing.entry.updatedAt) {
241
+ chronicleByThread.set(c.threadId, { ref, device, entry: c });
242
+ }
243
+ }
244
+ }
245
+ const keptChroniclePaths = [];
246
+ for (const { ref, entry } of chronicleByThread.values()) {
247
+ const body = readFileFromBranch(ref, entry.path);
248
+ if (body === null) {
249
+ console.log(` warn: ${ref}:${entry.path} missing despite index ref; skipping`);
250
+ continue;
251
+ }
252
+ writeRel(entry.path, body);
253
+ keptChroniclePaths.push(entry.path);
254
+ }
255
+ console.log(`chronicles: kept ${keptChroniclePaths.length} unique threads`);
256
+
257
+ // -------- topics: each device's rewrite preserved as <slug>.<device>.md --------
258
+ // We never overwrite "the" topic file because two devices' rewrites diverge
259
+ // in narrative structure and merging mechanically would garble both.
260
+ const keptTopicPaths = [];
261
+ for (const { ref, device, bookIndex } of perDevice) {
262
+ for (const t of Object.values(bookIndex.topics)) {
263
+ const body = readFileFromBranch(ref, t.path);
264
+ if (body === null) continue;
265
+ // book/<project>/topics/<slug>.<device>.md
266
+ const targetPath = `book/${t.project}/topics/${t.topicSlug}.${device}.md`;
267
+ writeRel(targetPath, body);
268
+ keptTopicPaths.push(targetPath);
269
+ }
270
+ }
271
+ console.log(`topics: wrote ${keptTopicPaths.length} per-device topic files`);
272
+
273
+ // -------- cards: union by (project, slug); collision → latest updatedAt --------
274
+ // _global/cards/ same rule (project="_global").
275
+ const cardByKey = new Map(); // "<project>/<slug>" -> { ref, entry }
276
+ for (const { ref, bookIndex } of perDevice) {
277
+ for (const c of Object.values(bookIndex.cards)) {
278
+ const k = `${c.project}/${c.cardSlug}`;
279
+ const existing = cardByKey.get(k);
280
+ if (!existing || c.updatedAt > existing.entry.updatedAt) {
281
+ cardByKey.set(k, { ref, entry: c });
282
+ }
283
+ }
284
+ }
285
+ const keptCardPaths = [];
286
+ for (const { ref, entry } of cardByKey.values()) {
287
+ const body = readFileFromBranch(ref, entry.path);
288
+ if (body === null) continue;
289
+ writeRel(entry.path, body);
290
+ keptCardPaths.push(entry.path);
291
+ }
292
+ console.log(`cards: kept ${keptCardPaths.length} unique slugs (incl. _global)`);
293
+
294
+ // -------- raw_sessions: union by tool:sessionId, latest sourceMtimeMs wins --------
295
+ // P7 (0.8.0): cross-device raw_sessions aggregation. Devices push only their
296
+ // own raw_sessions/ to their device branch; main holds the union so any
297
+ // device can `memarium resume <id>` against any other device's session.
298
+ // Pure plaintext blob copy — `git show` yields the session md as-is.
299
+ const rawByKey = new Map(); // "tool:sessionId" -> { ref, device, entry }
300
+ // 0.8.3: iterate `branches` (ALL device branches) instead of `perDevice`
301
+ // (only those with v2 BookIndex). raw_sessions doesn't require book/ to
302
+ // exist — both indices are independent.
303
+ for (const { ref, device } of branches) {
304
+ const spoolIdx = loadSpoolIndexFromBranch(ref);
305
+ if (!spoolIdx) {
306
+ console.log(` ${device}: no v1 .memarium/index.json — no raw_sessions to aggregate`);
307
+ continue;
308
+ }
309
+ for (const e of Object.values(spoolIdx.entries)) {
310
+ if (!e || !e.tool || !e.sessionId || !e.relativePath) continue;
311
+ const k = `${e.tool}:${e.sessionId}`;
312
+ const existing = rawByKey.get(k);
313
+ if (!existing || (e.sourceMtimeMs ?? 0) > (existing.entry.sourceMtimeMs ?? 0)) {
314
+ rawByKey.set(k, { ref, device, entry: e });
315
+ }
316
+ }
317
+ }
318
+ const keptRawPaths = [];
319
+ const aggregatedEntries = {};
320
+ for (const [k, { ref, device, entry }] of rawByKey.entries()) {
321
+ const body = readFileFromBranch(ref, entry.relativePath);
322
+ if (body === null) {
323
+ console.log(` warn: ${ref}:${entry.relativePath} missing despite spool index; skipping`);
324
+ continue;
325
+ }
326
+ writeRel(entry.relativePath, body);
327
+ keptRawPaths.push(entry.relativePath);
328
+ aggregatedEntries[k] = { ...entry, originDevice: device };
329
+ }
330
+ console.log(`raw_sessions: kept ${keptRawPaths.length} unique sessions from ${branches.length} device branch(es)`);
331
+
332
+ // Write the union index ONLY when at least one device had spool data.
333
+ // Skipping the file on book-only repos (no `memarium sync` ever run yet)
334
+ // keeps the test's "no aggregated artifacts when no spool" guarantee.
335
+ if (keptRawPaths.length > 0) {
336
+ writeRel(
337
+ AGGREGATED_INDEX_PATH,
338
+ JSON.stringify({ version: 1, entries: aggregatedEntries }, null, 2) + "\n",
339
+ );
340
+ }
341
+
342
+ // -------- memory: union by id, latest updatedAt wins (0.8.6) --------
343
+ const memByKey = new Map(); // id -> { ref, device, entry }
344
+ let anyMemoryIndexSeen = false;
345
+ for (const { ref, device } of branches) {
346
+ const memIdx = loadMemoryIndexFromBranch(ref);
347
+ if (!memIdx) continue;
348
+ anyMemoryIndexSeen = true;
349
+ for (const e of Object.values(memIdx.entries)) {
350
+ if (!e || !e.id || !e.path) continue;
351
+ if (!isSafeMemoryPath(e.path)) {
352
+ console.log(`memory: skipping entry ${e.id} with unsafe path ${JSON.stringify(e.path)}`);
353
+ continue;
354
+ }
355
+ const relPath = e.path.split("\\").join("/");
356
+ if (relPath.startsWith("memory/entities/")) continue; // entity pass owns this subtree
357
+ if (relPath.startsWith("memory/qa/")) continue; // qa pass owns this subtree
358
+ const existing = memByKey.get(e.id);
359
+ if (!existing || (e.updatedAt ?? "") > (existing.entry.updatedAt ?? "")) {
360
+ memByKey.set(e.id, { ref, device, entry: e });
361
+ }
362
+ }
363
+ }
364
+ const keptMemoryPaths = [];
365
+ const aggregatedMemory = {};
366
+ for (const [id, { ref, device, entry }] of memByKey.entries()) {
367
+ const relPath = entry.path.split("\\").join("/");
368
+ const body = readFileFromBranch(ref, relPath);
369
+ if (body === null) continue;
370
+ writeRel(relPath, body);
371
+ keptMemoryPaths.push(relPath);
372
+ aggregatedMemory[id] = { ...entry, path: relPath, originDevice: device };
373
+ }
374
+
375
+ // prune stale aggregated memory md (entries removed on all devices)
376
+ // Scoped to memory/ but skips memory/_primer/ (generated, not indexed)
377
+ // and memory/entities/ (managed by the entity pass below).
378
+ // GUARD: only run the prune when at least one device contributed a memory
379
+ // index this run. If anyMemoryIndexSeen is false we have no authoritative
380
+ // view of what should exist, so wiping main's memory/ would be data loss
381
+ // (e.g. a device that hasn't upgraded yet has no index.memory.json).
382
+ if (anyMemoryIndexSeen) {
383
+ const keptSet = new Set(keptMemoryPaths);
384
+ const memDir = join(REPO_ROOT, "memory");
385
+ if (existsSync(memDir)) {
386
+ const stack = [memDir];
387
+ while (stack.length) {
388
+ const cur = stack.pop();
389
+ let ents;
390
+ try { ents = readdirSync(cur, { withFileTypes: true }); } catch { continue; }
391
+ for (const d of ents) {
392
+ const abs = join(cur, d.name);
393
+ if (d.isDirectory()) { stack.push(abs); continue; }
394
+ if (!d.name.endsWith(".md")) continue;
395
+ const rel = relative(REPO_ROOT, abs).split("\\").join("/");
396
+ // never prune generated primers; only prune indexed memory md
397
+ if (rel.startsWith("memory/_primer/")) continue;
398
+ // entity files are managed exclusively by the entity pass below
399
+ if (rel.startsWith("memory/entities/")) continue;
400
+ if (rel.startsWith("memory/qa/")) continue;
401
+ if (!keptSet.has(rel)) { try { unlinkSync(abs); } catch {} }
402
+ }
403
+ }
404
+ }
405
+ }
406
+
407
+ // Always rewrite the index when at least one device had a memory index,
408
+ // so a fully-removed entry set produces an empty index rather than a stale one.
409
+ if (anyMemoryIndexSeen) {
410
+ writeRel(MEMORY_INDEX_PATH, JSON.stringify({ version: 1, entries: aggregatedMemory }, null, 2) + "\n");
411
+ }
412
+ console.log(`memory: kept ${keptMemoryPaths.length} entries from ${branches.length} device branch(es)`);
413
+
414
+ // -------- entities: union by id, latest updatedAt wins --------
415
+ // Mirrors the memory pass 1:1 but scoped to memory/entities/ and
416
+ // reading from .memarium/index.entity.json on each device branch.
417
+ const entityByKey = new Map(); // id -> { ref, device, entry }
418
+ let anyEntityIndexSeen = false;
419
+ for (const { ref, device } of branches) {
420
+ const entityIdx = loadEntityIndexFromBranch(ref);
421
+ if (!entityIdx) continue;
422
+ anyEntityIndexSeen = true;
423
+ for (const e of Object.values(entityIdx.entries)) {
424
+ if (!e || !e.id || !e.path) continue;
425
+ if (!isSafeEntityPath(e.path)) {
426
+ console.log(`entities: skipping entry ${e.id} with unsafe path ${JSON.stringify(e.path)}`);
427
+ continue;
428
+ }
429
+ const existing = entityByKey.get(e.id);
430
+ if (!existing || (e.updatedAt ?? "") > (existing.entry.updatedAt ?? "")) {
431
+ entityByKey.set(e.id, { ref, device, entry: e });
432
+ }
433
+ }
434
+ }
435
+ const keptEntityPaths = [];
436
+ const aggregatedEntities = {};
437
+ for (const [id, { ref, device, entry }] of entityByKey.entries()) {
438
+ const relPath = entry.path.split("\\").join("/");
439
+ const body = readFileFromBranch(ref, relPath);
440
+ if (body === null) continue;
441
+ writeRel(relPath, body);
442
+ keptEntityPaths.push(relPath);
443
+ aggregatedEntities[id] = { ...entry, path: relPath, originDevice: device };
444
+ }
445
+
446
+ // prune stale entity md (entries removed on all devices)
447
+ // Scoped exclusively to memory/entities/ — the memory pass above never touches this subtree.
448
+ // GUARD: only run the prune when at least one device contributed an entity
449
+ // index this run. If anyEntityIndexSeen is false, keptEntityPaths is empty
450
+ // and running the prune would wipe ALL of main's memory/entities/ — data loss
451
+ // (e.g. a device that hasn't upgraded yet has no index.entity.json).
452
+ if (anyEntityIndexSeen) {
453
+ const keptEntitySet = new Set(keptEntityPaths);
454
+ const entityDir = join(REPO_ROOT, "memory", "entities");
455
+ if (existsSync(entityDir)) {
456
+ const stack = [entityDir];
457
+ while (stack.length) {
458
+ const cur = stack.pop();
459
+ let ents;
460
+ try { ents = readdirSync(cur, { withFileTypes: true }); } catch { continue; }
461
+ for (const d of ents) {
462
+ const abs = join(cur, d.name);
463
+ if (d.isDirectory()) { stack.push(abs); continue; }
464
+ if (!d.name.endsWith(".md")) continue;
465
+ const rel = relative(REPO_ROOT, abs).split("\\").join("/");
466
+ if (!keptEntitySet.has(rel)) { try { unlinkSync(abs); } catch {} }
467
+ }
468
+ }
469
+ }
470
+ }
471
+
472
+ // Always rewrite the index when at least one device had an entity index.
473
+ if (anyEntityIndexSeen) {
474
+ writeRel(ENTITY_INDEX_PATH, JSON.stringify({ version: 1, entries: aggregatedEntities }, null, 2) + "\n");
475
+ }
476
+ console.log(`entities: kept ${keptEntityPaths.length} entries from ${branches.length} device branch(es)`);
477
+
478
+ // -------- qa: union by id, latest updatedAt wins --------
479
+ // Mirrors the entity pass 1:1 but scoped to memory/qa/ and reading from
480
+ // .memarium/index.qa.json on each device branch.
481
+ const qaByKey = new Map(); // id -> { ref, device, entry }
482
+ let anyQaIndexSeen = false;
483
+ for (const { ref, device } of branches) {
484
+ const qaIdx = loadQaIndexFromBranch(ref);
485
+ if (!qaIdx) continue;
486
+ anyQaIndexSeen = true;
487
+ for (const e of Object.values(qaIdx.entries)) {
488
+ if (!e || !e.id || !e.path) continue;
489
+ if (!isSafeQaPath(e.path)) {
490
+ console.log(`qa: skipping entry ${e.id} with unsafe path ${JSON.stringify(e.path)}`);
491
+ continue;
492
+ }
493
+ const existing = qaByKey.get(e.id);
494
+ if (!existing || (e.updatedAt ?? "") > (existing.entry.updatedAt ?? "")) {
495
+ qaByKey.set(e.id, { ref, device, entry: e });
496
+ }
497
+ }
498
+ }
499
+ const keptQaPaths = [];
500
+ const aggregatedQa = {};
501
+ for (const [id, { ref, device, entry }] of qaByKey.entries()) {
502
+ const relPath = entry.path.split("\\").join("/");
503
+ const body = readFileFromBranch(ref, relPath);
504
+ if (body === null) continue;
505
+ writeRel(relPath, body);
506
+ keptQaPaths.push(relPath);
507
+ aggregatedQa[id] = { ...entry, path: relPath, originDevice: device };
508
+ }
509
+
510
+ if (anyQaIndexSeen) {
511
+ const keptQaSet = new Set(keptQaPaths);
512
+ const qaDir = join(REPO_ROOT, "memory", "qa");
513
+ if (existsSync(qaDir)) {
514
+ const stack = [qaDir];
515
+ while (stack.length) {
516
+ const cur = stack.pop();
517
+ let ents;
518
+ try { ents = readdirSync(cur, { withFileTypes: true }); } catch { continue; }
519
+ for (const d of ents) {
520
+ const abs = join(cur, d.name);
521
+ if (d.isDirectory()) { stack.push(abs); continue; }
522
+ if (!d.name.endsWith(".md")) continue;
523
+ const rel = relative(REPO_ROOT, abs).split("\\").join("/");
524
+ if (!keptQaSet.has(rel)) { try { unlinkSync(abs); } catch {} }
525
+ }
526
+ }
527
+ }
528
+ }
529
+
530
+ if (anyQaIndexSeen) {
531
+ writeRel(QA_INDEX_PATH, JSON.stringify({ version: 1, entries: aggregatedQa }, null, 2) + "\n");
532
+ }
533
+ console.log(`qa: kept ${keptQaPaths.length} entries from ${branches.length} device branch(es)`);
534
+
535
+ // -------- prune --------
536
+ pruneStale(keptChroniclePaths, keptTopicPaths, keptCardPaths, perDevice);
537
+ pruneRawSessions(keptRawPaths);
538
+
539
+ // -------- regen catalog --------
540
+ // Skip when no books were aggregated — otherwise an empty book/index.md
541
+ // gets written and the "no v2 BookIndex anywhere" test fails on a stray
542
+ // book/ directory.
543
+ let catalogPaths = [];
544
+ if (perDevice.length > 0) {
545
+ catalogPaths = regenCatalog({
546
+ chronicles: [...chronicleByThread.values()].map((x) => x.entry),
547
+ topics: [...perDevice.flatMap(({ device, bookIndex }) =>
548
+ Object.values(bookIndex.topics).map((t) => ({ ...t, device }))
549
+ )],
550
+ cards: [...cardByKey.values()].map((x) => x.entry),
551
+ devices: perDevice.map((x) => x.device),
552
+ });
553
+ console.log(`catalog: wrote ${catalogPaths.length} files`);
554
+ }
555
+
556
+ // -------- commit --------
557
+ // Build add list dynamically: book/, raw_sessions/, and
558
+ // .memarium/index.aggregated.json each only exist when at least one
559
+ // device contributed to the corresponding aggregation. `git add` on a
560
+ // non-existent path is a hard error, so we gate per-path.
561
+ const addPaths = [];
562
+ if (existsSync(join(REPO_ROOT, "book"))) addPaths.push("book/");
563
+ if (existsSync(join(REPO_ROOT, "raw_sessions"))) addPaths.push("raw_sessions/");
564
+ if (existsSync(join(REPO_ROOT, AGGREGATED_INDEX_PATH))) addPaths.push(AGGREGATED_INDEX_PATH);
565
+ if (existsSync(join(REPO_ROOT, "memory"))) addPaths.push("memory/");
566
+ if (existsSync(join(REPO_ROOT, MEMORY_INDEX_PATH))) addPaths.push(MEMORY_INDEX_PATH);
567
+ if (existsSync(join(REPO_ROOT, ENTITY_INDEX_PATH))) addPaths.push(ENTITY_INDEX_PATH);
568
+ if (existsSync(join(REPO_ROOT, QA_INDEX_PATH))) addPaths.push(QA_INDEX_PATH);
569
+ if (addPaths.length === 0) {
570
+ console.log("nothing to aggregate (no books, no raw_sessions)");
571
+ return;
572
+ }
573
+ sh("git", ["add", ...addPaths]);
574
+ const status = sh("git", ["status", "--porcelain"]);
575
+ if (!status.trim()) {
576
+ console.log("no changes to commit");
577
+ return;
578
+ }
579
+ const msg = `memarium aggregate: ${chronicleByThread.size} chronicles, ${keptTopicPaths.length} topic-versions, ${cardByKey.size} cards, ${keptRawPaths.length} raw_sessions, +${keptMemoryPaths.length} memory, +${keptEntityPaths.length} entities, +${keptQaPaths.length} qa across ${branches.length} device(s)`;
580
+ sh("git", ["commit", "-m", JSON.stringify(msg)]);
581
+ console.log(`committed: ${msg}`);
582
+ }
583
+
584
+ // ---------------- pruning ----------------
585
+
586
+ /**
587
+ * Remove main-side files no live device claims.
588
+ *
589
+ * Without pruning, deleting a chronicle / topic / card on a device wouldn't
590
+ * propagate to main — files would accumulate as ghosts. We rebuild the
591
+ * "live" set from the kept paths above, then walk main's book/<proj>/
592
+ * subdirs and delete anything not in that set.
593
+ *
594
+ * For per-device topic files (<slug>.<device>.md), we additionally delete
595
+ * topic files whose device suffix isn't in the current `liveDevices` list,
596
+ * so retiring a device cleans up its old topic forks.
597
+ */
598
+ function pruneStale(chroniclePaths, topicPaths, cardPaths, perDevice) {
599
+ const liveSet = new Set([...chroniclePaths, ...topicPaths, ...cardPaths]);
600
+ const liveDevices = new Set(perDevice.map((d) => d.device));
601
+ const bookRoot = join(REPO_ROOT, "book");
602
+ if (!existsSync(bookRoot)) return;
603
+
604
+ for (const projectName of readdirSync(bookRoot)) {
605
+ if (projectName === "_meta") continue;
606
+ const projDir = join(bookRoot, projectName);
607
+ if (!statSync(projDir).isDirectory()) continue;
608
+
609
+ pruneSubdir(projDir, "chronicle", liveSet);
610
+ pruneSubdir(projDir, "cards", liveSet);
611
+ // For topics, also enforce device suffix: <slug>.<device>.md
612
+ pruneTopicsDir(projDir, liveSet, liveDevices, projectName);
613
+ }
614
+ }
615
+
616
+ function pruneSubdir(projDir, sub, liveSet) {
617
+ const dir = join(projDir, sub);
618
+ if (!existsSync(dir)) return;
619
+ const proj = projDir.split("/").pop();
620
+ for (const name of readdirSync(dir)) {
621
+ const rel = `book/${proj}/${sub}/${name}`;
622
+ if (!liveSet.has(rel)) {
623
+ rmSync(join(dir, name), { force: true });
624
+ }
625
+ }
626
+ }
627
+
628
+ /**
629
+ * Walk raw_sessions/ on main and remove any .md not in the kept set. Same
630
+ * intent as pruneStale but for the cross-device raw_sessions aggregation
631
+ * (P7). Sweeps now-empty parent directories so a device retiring
632
+ * doesn't leave its <tool>/<project>/<date>/ skeleton behind.
633
+ */
634
+ function pruneRawSessions(keptRawPaths) {
635
+ const liveSet = new Set(keptRawPaths);
636
+ const rawRoot = join(REPO_ROOT, "raw_sessions");
637
+ if (!existsSync(rawRoot)) return;
638
+ const dirsTouched = new Set();
639
+ const stack = [rawRoot];
640
+ while (stack.length > 0) {
641
+ const cur = stack.pop();
642
+ let entries;
643
+ try { entries = readdirSync(cur, { withFileTypes: true }); } catch { continue; }
644
+ for (const e of entries) {
645
+ const abs = join(cur, e.name);
646
+ if (e.isDirectory()) {
647
+ stack.push(abs);
648
+ } else if (e.isFile() && e.name.endsWith(".md")) {
649
+ const rel = abs.slice(REPO_ROOT.length + 1);
650
+ if (!liveSet.has(rel)) {
651
+ rmSync(abs, { force: true });
652
+ dirsTouched.add(dirname(abs));
653
+ }
654
+ }
655
+ }
656
+ }
657
+ // Sweep empty dirs upward from each touched dir.
658
+ for (const d of dirsTouched) {
659
+ let cur = d;
660
+ while (cur.startsWith(rawRoot) && cur !== rawRoot) {
661
+ let entries = [];
662
+ try { entries = readdirSync(cur); } catch { break; }
663
+ if (entries.length > 0) break;
664
+ try { rmSync(cur, { recursive: false, force: true }); } catch { break; }
665
+ cur = dirname(cur);
666
+ }
667
+ }
668
+ }
669
+
670
+ function pruneTopicsDir(projDir, liveSet, _liveDevices, projectName) {
671
+ const dir = join(projDir, "topics");
672
+ if (!existsSync(dir)) return;
673
+ // liveSet already encodes which <slug>.<device>.md files this run produced;
674
+ // anything else (retired-device leftovers, stale slugs) is stale. Don't
675
+ // pattern-match the device suffix — device names contain dots ("Mac.lan").
676
+ for (const name of readdirSync(dir)) {
677
+ const rel = `book/${projectName}/topics/${name}`;
678
+ if (!liveSet.has(rel)) rmSync(join(dir, name), { force: true });
679
+ }
680
+ }
681
+
682
+ // ---------------- catalog regen ----------------
683
+
684
+ /**
685
+ * Render book/index.md + book/_meta/timeline.md + book/<project>/index.md.
686
+ *
687
+ * Logically duplicates src/digest/book-catalog.ts but lives here so the CI
688
+ * yaml only needs node 20 + this file (no npm install of memarium on every
689
+ * workflow run).
690
+ */
691
+ function regenCatalog({ chronicles, topics, cards, devices }) {
692
+ const written = [];
693
+
694
+ const chrsByProj = bucketBy(chronicles, (c) => c.project);
695
+ const topsByProj = bucketBy(topics, (t) => t.project);
696
+ const crdsByProj = bucketBy(cards, (c) => c.project);
697
+
698
+ const projectSet = new Set();
699
+ for (const p of chrsByProj.keys()) projectSet.add(p);
700
+ for (const p of topsByProj.keys()) projectSet.add(p);
701
+ for (const p of crdsByProj.keys()) projectSet.add(p);
702
+ const projects = [...projectSet].sort((a, b) => {
703
+ if (a === "_global") return 1;
704
+ if (b === "_global") return -1;
705
+ return a.localeCompare(b);
706
+ });
707
+
708
+ // Front page.
709
+ writeRel("book/index.md", renderFront({
710
+ projects, chrsByProj, topsByProj, crdsByProj,
711
+ latestUpdate: latestUpdate({ chronicles, topics, cards }),
712
+ devices,
713
+ }));
714
+ written.push("book/index.md");
715
+
716
+ // Global timeline.
717
+ writeRel("book/_meta/timeline.md", renderTimeline({ chronicles, topics, cards }));
718
+ written.push("book/_meta/timeline.md");
719
+
720
+ // Per-project index pages.
721
+ for (const p of projects) {
722
+ const path = `book/${p}/index.md`;
723
+ writeRel(path, renderProjectIndex(p, {
724
+ chronicles: chrsByProj.get(p) ?? [],
725
+ topics: topsByProj.get(p) ?? [],
726
+ cards: crdsByProj.get(p) ?? [],
727
+ }));
728
+ written.push(path);
729
+ }
730
+
731
+ return written;
732
+ }
733
+
734
+ function renderFront({ projects, chrsByProj, topsByProj, crdsByProj, latestUpdate, devices }) {
735
+ const totalChrs = sumMap(chrsByProj);
736
+ const totalTops = sumMap(topsByProj);
737
+ const totalCrds = sumMap(crdsByProj);
738
+ const t = strings();
739
+ const lines = [];
740
+ lines.push("---");
741
+ lines.push(`title: ${t.notebook}`);
742
+ lines.push(`updated: ${latestUpdate}`);
743
+ lines.push("---");
744
+ lines.push("");
745
+ lines.push(`# ${t.notebook}`);
746
+ lines.push("");
747
+ lines.push(t.aggregatedFrom(devices.length, latestUpdate));
748
+ lines.push("");
749
+ lines.push(t.totals(projects.length, totalChrs, totalTops, totalCrds));
750
+ lines.push("");
751
+ lines.push("> Generated by `scripts/merge-books.mjs` in CI. Don't edit by hand.");
752
+ lines.push("");
753
+ lines.push(`**${t.devicesLabel}**: ${devices.map((d) => `\`${d}\``).join(", ")}`);
754
+ lines.push("");
755
+ lines.push(`## ${t.projects}`);
756
+ lines.push("");
757
+ for (const p of projects) {
758
+ const chrs = chrsByProj.get(p) ?? [];
759
+ const tops = topsByProj.get(p) ?? [];
760
+ const crds = crdsByProj.get(p) ?? [];
761
+ if (chrs.length === 0 && tops.length === 0 && crds.length === 0) continue;
762
+ lines.push(`### [${p}](${p}/index.md)`);
763
+ if (chrs.length > 0) lines.push(`- ${chrs.length} ${t.chronicles}`);
764
+ if (tops.length > 0) {
765
+ const slugs = [...new Set(tops.map((t) => t.topicSlug))];
766
+ lines.push(`- ${slugs.length} ${t.topics} (${tops.length} ${t.deviceVersions})`);
767
+ }
768
+ if (crds.length > 0) lines.push(`- ${crds.length} ${t.cards}`);
769
+ lines.push("");
770
+ }
771
+ lines.push("---");
772
+ lines.push(`- [${t.globalTimeline}](_meta/timeline.md)`);
773
+ lines.push("");
774
+ return lines.join("\n");
775
+ }
776
+
777
+ function renderTimeline({ chronicles, topics, cards }) {
778
+ const events = [];
779
+ for (const c of chronicles) {
780
+ events.push({ ts: c.updatedAt, line: `📝 [${c.title}](../${c.path}) — _${c.project}_ chronicle` });
781
+ }
782
+ for (const t of topics) {
783
+ events.push({ ts: t.updatedAt, line: `📚 ${t.topicSlug} — _${t.project}_ topic by ${t.device}` });
784
+ }
785
+ for (const c of cards) {
786
+ events.push({ ts: c.updatedAt, line: `💡 [${c.cardSlug}](../${c.path}) — _${c.project}_ ${c.type} card` });
787
+ }
788
+ events.sort((a, b) => (a.ts < b.ts ? 1 : a.ts > b.ts ? -1 : 0));
789
+ const t = strings();
790
+ const lines = [];
791
+ lines.push(`# ${t.globalTimeline}`);
792
+ lines.push("");
793
+ lines.push("Newest first across every project + device.");
794
+ lines.push("");
795
+ let lastDate = "";
796
+ for (const e of events) {
797
+ const date = (e.ts || "").slice(0, 10);
798
+ if (date !== lastDate) { lines.push(""); lines.push(`## ${date}`); lines.push(""); lastDate = date; }
799
+ lines.push(`- ${e.line}`);
800
+ }
801
+ lines.push("");
802
+ return lines.join("\n");
803
+ }
804
+
805
+ /**
806
+ * Pick a locale-specific string table for the rendered book pages.
807
+ * Driven by MEMARIUM_LOCALE env var (set by the workflow from config.bookLocale).
808
+ * Defaults to "en". Falls back to "en" on any unknown locale.
809
+ */
810
+ function strings() {
811
+ const locale = (process.env.MEMARIUM_LOCALE || "en").toLowerCase();
812
+ if (locale === "zh") return STRINGS_ZH;
813
+ return STRINGS_EN;
814
+ }
815
+
816
+ const STRINGS_EN = {
817
+ notebook: "notebook",
818
+ aggregatedFrom: (n, ts) => `Aggregated from ${n} device${n === 1 ? "" : "s"} · updated ${ts}`,
819
+ totals: (p, ch, tp, cd) =>
820
+ `${p} project${p === 1 ? "" : "s"} · ${ch} chronicle${ch === 1 ? "" : "s"} · ${tp} topic${tp === 1 ? "" : "s"} · ${cd} card${cd === 1 ? "" : "s"}`,
821
+ devicesLabel: "Devices",
822
+ projects: "Projects",
823
+ chronicles: "chronicle(s)",
824
+ topics: "topic(s)",
825
+ deviceVersions: "device-version(s)",
826
+ cards: "card(s)",
827
+ globalTimeline: "Global timeline",
828
+ };
829
+
830
+ const STRINGS_ZH = {
831
+ notebook: "笔记本",
832
+ aggregatedFrom: (n, ts) => `聚合自 ${n} 台设备 · 更新于 ${ts}`,
833
+ totals: (p, ch, tp, cd) => `${p} 项目 · ${ch} 篇流水账 · ${tp} 个 topic · ${cd} 张卡片`,
834
+ devicesLabel: "设备",
835
+ projects: "项目",
836
+ chronicles: "篇流水账",
837
+ topics: "个 topic",
838
+ deviceVersions: "个 device-versions",
839
+ cards: "张卡片",
840
+ globalTimeline: "全局时间线",
841
+ };
842
+
843
+ function renderProjectIndex(project, args) {
844
+ const chrs = (args.chronicles ?? []).filter((c) => !c.skip && c.path);
845
+ const tops = args.topics ?? [];
846
+ const crds = args.cards ?? [];
847
+ const lines = [];
848
+ lines.push("---");
849
+ lines.push(`title: ${project}`);
850
+ lines.push("---");
851
+ lines.push("");
852
+ lines.push(`# ${project}`);
853
+ lines.push("");
854
+ lines.push(`${chrs.length} chronicles · ${new Set(tops.map((t) => t.topicSlug)).size} topics · ${crds.length} cards`);
855
+ lines.push("");
856
+ if (tops.length > 0) {
857
+ const bySlug = bucketBy(tops, (t) => t.topicSlug);
858
+ lines.push("## Topics (per-device versions)");
859
+ lines.push("");
860
+ for (const [slug, list] of [...bySlug.entries()].sort()) {
861
+ const versions = list.map((v) => `[${v.device}](topics/${slug}.${v.device}.md)`).join(" · ");
862
+ lines.push(`- **${slug}**: ${versions}`);
863
+ }
864
+ lines.push("");
865
+ }
866
+ if (chrs.length > 0) {
867
+ lines.push("## Chronicles (newest first)");
868
+ lines.push("");
869
+ for (const c of [...chrs].sort((a, b) => (a.updatedAt < b.updatedAt ? 1 : -1))) {
870
+ lines.push(`- [${c.title}](chronicle/${baseName(c.path)}) — ${c.updatedAt}`);
871
+ }
872
+ lines.push("");
873
+ }
874
+ if (crds.length > 0) {
875
+ lines.push("## Cards");
876
+ lines.push("");
877
+ const byType = bucketBy(crds, (c) => c.type);
878
+ for (const [type, list] of [...byType.entries()].sort()) {
879
+ lines.push(`### ${type}`);
880
+ for (const c of list.sort((a, b) => a.cardSlug.localeCompare(b.cardSlug))) {
881
+ lines.push(`- [${c.cardSlug}](cards/${c.cardSlug}.md)`);
882
+ }
883
+ lines.push("");
884
+ }
885
+ }
886
+ return lines.join("\n");
887
+ }
888
+
889
+ function bucketBy(xs, key) {
890
+ const m = new Map();
891
+ for (const x of xs) {
892
+ const k = key(x);
893
+ let arr = m.get(k);
894
+ if (!arr) { arr = []; m.set(k, arr); }
895
+ arr.push(x);
896
+ }
897
+ return m;
898
+ }
899
+
900
+ function sumMap(m) {
901
+ let n = 0;
902
+ for (const list of m.values()) n += list.length;
903
+ return n;
904
+ }
905
+
906
+ function latestUpdate({ chronicles, topics, cards }) {
907
+ const ts = [];
908
+ for (const c of chronicles) ts.push(c.updatedAt);
909
+ for (const t of topics) ts.push(t.updatedAt);
910
+ for (const c of cards) ts.push(c.updatedAt);
911
+ if (ts.length === 0) return "—";
912
+ ts.sort();
913
+ return ts[ts.length - 1].slice(0, 10);
914
+ }
915
+
916
+ function baseName(path) {
917
+ const ix = path.lastIndexOf("/");
918
+ return ix < 0 ? path : path.slice(ix + 1);
919
+ }
920
+
921
+ main();