@agfpd/iapeer-memory-core 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,289 @@
1
+ /**
2
+ * Migration of a harness's built-in per-peer auto-memory into the vault's
3
+ * agent-memory zone (`06_Agent_Memory/<agent>/`).
4
+ *
5
+ * TS port of the reference `scripts/migrate-auto-memory.py` (behavioural
6
+ * parity against `tests/python/test_migrate_auto_memory.py`, 16 fixtures).
7
+ * Deterministic, no LLM:
8
+ *
9
+ * 1. parse each source `.md` frontmatter (flat parser — auto-memory is
10
+ * simple);
11
+ * 2. map the harness `type` → vault `subtype` (taxonomy tokens):
12
+ * user → person_profile, feedback → feedback, project → context,
13
+ * reference → reference, anything else → context. A `feedback` note
14
+ * that is semantically a pitfall cannot be told apart
15
+ * deterministically — re-filing to `pitfall` is the agent's manual step
16
+ * after migration (distill phase 5);
17
+ * 3. build the agent-memory frontmatter (title from filename, type/status
18
+ * tokens from the taxonomy, description through the SHARED YAML-safe
19
+ * serialiser, created from birthtime/mtime, author = agent);
20
+ * 4. per-file: backup → write target (atomic) → unlink source. Idempotent:
21
+ * an existing target file is skipped, never overwritten.
22
+ *
23
+ * ADAPTER SCOPE: the ENGINE is source-agnostic — the adapter supplies the
24
+ * source directory (claude: `~/.claude/agent-memory/<agent>/` for launchd
25
+ * peers, `~/.claude/projects/<slug>/memory/` for project sessions). The
26
+ * codex memories source location/format is NOT fact-checked yet — wiring
27
+ * it up is the codex-adapter's job once verified against a live codex
28
+ * (никогда не выдумываем формат из памяти модели).
29
+ */
30
+
31
+ import fs from "node:fs";
32
+ import path from "node:path";
33
+ import type { TaxonomyPreset } from "./taxonomy.js";
34
+ import { yamlSafeScalar } from "./fm-update.js";
35
+
36
+ /** Source files that are backed up but never copied into the vault. */
37
+ export const SKIP_FILES: ReadonlySet<string> = new Set(["MEMORY.md"]);
38
+
39
+ /** Flat frontmatter parser — first line of each `key: value` only. */
40
+ export function parseFlatFrontmatter(text: string): [Record<string, string>, string] {
41
+ const m = /^---[^\S\n]*\n([\s\S]*?\n)---[^\S\n]*(?:\n|$)/.exec(text);
42
+ if (!m) return [{}, text];
43
+ const fm: Record<string, string> = {};
44
+ for (const line of m[1].split("\n")) {
45
+ if (!line.trim() || line.startsWith("#")) continue;
46
+ const i = line.indexOf(":");
47
+ if (i === -1) continue;
48
+ fm[line.slice(0, i).trim()] = line.slice(i + 1).trim();
49
+ }
50
+ return [fm, text.slice(m[0].length)];
51
+ }
52
+
53
+ /** Harness auto-memory `type` → vault subtype token (taxonomy-driven). */
54
+ export function mapTypeToSubtype(oldType: string, taxonomy: TaxonomyPreset): string {
55
+ const s = taxonomy.subtypes;
56
+ switch (oldType.trim().toLowerCase()) {
57
+ case "user":
58
+ return s.personProfile;
59
+ case "feedback":
60
+ return s.feedback;
61
+ case "project":
62
+ return s.context;
63
+ case "reference":
64
+ return s.reference;
65
+ default:
66
+ return s.context;
67
+ }
68
+ }
69
+
70
+ function fileCreatedDate(p: string): string {
71
+ const st = fs.statSync(p);
72
+ const birth = st.birthtimeMs > 0 ? st.birthtimeMs : st.mtimeMs;
73
+ return new Date(birth).toISOString().slice(0, 10);
74
+ }
75
+
76
+ /** Agent-memory frontmatter; description through the shared YAML-safe rules. */
77
+ export function buildNewFrontmatter(opts: {
78
+ title: string;
79
+ subtype: string;
80
+ description: string;
81
+ created: string;
82
+ author: string;
83
+ taxonomy: TaxonomyPreset;
84
+ }): string {
85
+ const desc = opts.description ? yamlSafeScalar(opts.description) : "''";
86
+ return [
87
+ "---",
88
+ `title: ${opts.title}`,
89
+ `type: ${opts.taxonomy.types.agentMemory}`,
90
+ `subtype: ${opts.subtype}`,
91
+ `status: ${opts.taxonomy.statusTokens.current}`,
92
+ `description: ${desc}`,
93
+ `created: ${opts.created}`,
94
+ `author: ${opts.author}`,
95
+ "---",
96
+ ].join("\n") + "\n";
97
+ }
98
+
99
+ export type MigrationPlan = {
100
+ source: string;
101
+ target: string;
102
+ files: Array<{ name: string; oldType?: string; subtype?: string; error?: string }>;
103
+ skippedSystem: string[];
104
+ skippedAlreadyInTarget: string[];
105
+ subtypeCounts: Record<string, number>;
106
+ totalToMigrate: number;
107
+ };
108
+
109
+ /** Scan the source and build the plan WITHOUT writing anything (dry-run). */
110
+ export function planMigration(opts: {
111
+ sourceDir: string;
112
+ agent: string;
113
+ vault: string;
114
+ taxonomy: TaxonomyPreset;
115
+ }): MigrationPlan {
116
+ const targetDir = path.join(opts.vault, opts.taxonomy.folders.agentMemory, opts.agent);
117
+ const files: MigrationPlan["files"] = [];
118
+ const skippedSystem: string[] = [];
119
+ const skippedAlreadyInTarget: string[] = [];
120
+ const subtypeCounts: Record<string, number> = {};
121
+
122
+ const entries = fs
123
+ .readdirSync(opts.sourceDir, { withFileTypes: true })
124
+ .filter((e) => e.isFile() && e.name.endsWith(".md"))
125
+ .map((e) => e.name)
126
+ .sort();
127
+
128
+ for (const name of entries) {
129
+ if (SKIP_FILES.has(name)) {
130
+ skippedSystem.push(name);
131
+ continue;
132
+ }
133
+ if (fs.existsSync(path.join(targetDir, name))) {
134
+ skippedAlreadyInTarget.push(name);
135
+ continue;
136
+ }
137
+ let text: string;
138
+ try {
139
+ text = new TextDecoder("utf-8", { fatal: true }).decode(
140
+ fs.readFileSync(path.join(opts.sourceDir, name)),
141
+ );
142
+ } catch {
143
+ files.push({ name, error: "unreadable" });
144
+ continue;
145
+ }
146
+ const [fm] = parseFlatFrontmatter(text);
147
+ const oldType = (fm.type ?? "").trim().toLowerCase();
148
+ const subtype = mapTypeToSubtype(oldType, opts.taxonomy);
149
+ subtypeCounts[subtype] = (subtypeCounts[subtype] ?? 0) + 1;
150
+ files.push({ name, oldType: oldType || "(none)", subtype });
151
+ }
152
+
153
+ return {
154
+ source: opts.sourceDir,
155
+ target: targetDir,
156
+ files,
157
+ skippedSystem,
158
+ skippedAlreadyInTarget,
159
+ subtypeCounts,
160
+ totalToMigrate: files.length,
161
+ };
162
+ }
163
+
164
+ export type MigrationResult = {
165
+ migrated: string[];
166
+ skipped: string[];
167
+ errors: string[];
168
+ backupDir: string;
169
+ sourceRemoved: boolean;
170
+ };
171
+
172
+ /**
173
+ * Apply the migration: per-file backup → convert+write target → unlink
174
+ * source. A failed write leaves the source intact (the backup already
175
+ * exists). The source dir is removed only when it ends up empty (`rmdir`,
176
+ * never a recursive delete).
177
+ */
178
+ export function applyMigration(opts: {
179
+ sourceDir: string;
180
+ agent: string;
181
+ vault: string;
182
+ backupRoot: string;
183
+ taxonomy: TaxonomyPreset;
184
+ /** Injectable for tests. */
185
+ now?: Date;
186
+ }): MigrationResult {
187
+ const targetDir = path.join(opts.vault, opts.taxonomy.folders.agentMemory, opts.agent);
188
+ fs.mkdirSync(targetDir, { recursive: true });
189
+
190
+ const now = opts.now ?? new Date();
191
+ const pad = (n: number) => String(n).padStart(2, "0");
192
+ const stamp = `${now.getFullYear()}${pad(now.getMonth() + 1)}${pad(now.getDate())}-${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`;
193
+ const backupDir = path.join(opts.backupRoot, `${opts.agent}-${stamp}`);
194
+ fs.mkdirSync(backupDir, { recursive: true });
195
+
196
+ const migrated: string[] = [];
197
+ const skipped: string[] = [];
198
+ const errors: string[] = [];
199
+
200
+ const entries = fs
201
+ .readdirSync(opts.sourceDir, { withFileTypes: true })
202
+ .filter((e) => e.isFile())
203
+ .map((e) => e.name)
204
+ .sort();
205
+
206
+ for (const name of entries) {
207
+ const srcPath = path.join(opts.sourceDir, name);
208
+
209
+ // 1. Backup BEFORE any processing.
210
+ try {
211
+ fs.copyFileSync(srcPath, path.join(backupDir, name));
212
+ } catch (err) {
213
+ errors.push(`${name}: backup failed — ${String(err)}`);
214
+ continue;
215
+ }
216
+
217
+ // 2a. Non-md and SKIP_FILES: backup-only, removed from the source.
218
+ if (!name.endsWith(".md") || SKIP_FILES.has(name)) {
219
+ try {
220
+ fs.unlinkSync(srcPath);
221
+ } catch (err) {
222
+ errors.push(`${name}: unlink after backup failed — ${String(err)}`);
223
+ }
224
+ continue;
225
+ }
226
+
227
+ // 2b. Markdown auto-memory: convert + atomic write + unlink source.
228
+ const targetFile = path.join(targetDir, name);
229
+ if (fs.existsSync(targetFile)) {
230
+ skipped.push(name);
231
+ try {
232
+ fs.unlinkSync(srcPath);
233
+ } catch (err) {
234
+ errors.push(`${name}: unlink (already migrated) failed — ${String(err)}`);
235
+ }
236
+ continue;
237
+ }
238
+
239
+ let text: string;
240
+ try {
241
+ text = new TextDecoder("utf-8", { fatal: true }).decode(fs.readFileSync(srcPath));
242
+ } catch (err) {
243
+ errors.push(`${name}: read failed — ${String(err)}`);
244
+ continue;
245
+ }
246
+
247
+ const [fm, body] = parseFlatFrontmatter(text);
248
+ const newFm = buildNewFrontmatter({
249
+ title: name.slice(0, -3),
250
+ subtype: mapTypeToSubtype(fm.type ?? "", opts.taxonomy),
251
+ description: (fm.description ?? "").trim(),
252
+ created: fileCreatedDate(srcPath),
253
+ author: opts.agent,
254
+ taxonomy: opts.taxonomy,
255
+ });
256
+ const newText = body
257
+ ? body.startsWith("\n")
258
+ ? newFm + body
259
+ : newFm + "\n" + body
260
+ : newFm;
261
+
262
+ try {
263
+ const tmp = `${targetFile}.tmp`;
264
+ fs.writeFileSync(tmp, newText, "utf-8");
265
+ fs.renameSync(tmp, targetFile);
266
+ } catch (err) {
267
+ errors.push(`${name}: write failed — ${String(err)}`);
268
+ continue; // source untouched — write failed
269
+ }
270
+
271
+ try {
272
+ fs.unlinkSync(srcPath);
273
+ migrated.push(name);
274
+ } catch (err) {
275
+ errors.push(`${name}: written to target but source unlink failed — ${String(err)}`);
276
+ migrated.push(name);
277
+ }
278
+ }
279
+
280
+ let sourceRemoved = false;
281
+ try {
282
+ fs.rmdirSync(opts.sourceDir);
283
+ sourceRemoved = true;
284
+ } catch {
285
+ // not empty / no rights — left in place, surfaced via errors/remnants
286
+ }
287
+
288
+ return { migrated, skipped, errors, backupDir, sourceRemoved };
289
+ }
package/src/parser.ts ADDED
@@ -0,0 +1,269 @@
1
+ import matter from "gray-matter";
2
+ import { noteTitleFromPath } from "./utils.js";
3
+ import { linksSectionPattern, type TaxonomyPreset } from "./taxonomy.js";
4
+
5
+ const WIKILINK_RE = /\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g;
6
+
7
+ /**
8
+ * Strip a wikilink target down to its stored form, KEEPING the path.
9
+ *
10
+ * Obsidian allows four wikilink shapes:
11
+ * [[Name]]
12
+ * [[Name|alias]]
13
+ * [[Folder/Subfolder/Name]]
14
+ * [[Folder/Subfolder/Name|alias]]
15
+ *
16
+ * The regex above already strips the `|alias` part. We only strip a trailing
17
+ * `.md` and trim — the folder path is deliberately preserved so the resolver
18
+ * (indexer.resolveWikilinks) can do path-aware resolution: an explicit
19
+ * `[[03_Проекты/A/Фаза]]` must NOT collapse to the bare basename and risk
20
+ * resolving to a same-named note in another project.
21
+ */
22
+ function stripWikilinkTarget(raw: string): string {
23
+ return raw.replace(/\.md$/i, "").trim();
24
+ }
25
+
26
+ /**
27
+ * Last path segment of a (possibly path-qualified) wikilink target. Used by
28
+ * the resolver for the basename-uniqueness fallback when there is no path.
29
+ */
30
+ export function wikilinkBasename(target: string): string {
31
+ const seg = target.includes("/") ? (target.split("/").pop() ?? target) : target;
32
+ return seg.trim();
33
+ }
34
+
35
+ export type ParsedChunk = {
36
+ chunkIndex: number;
37
+ text: string;
38
+ };
39
+
40
+ export type ParsedWikilink = {
41
+ target: string;
42
+ contextSnippet: string;
43
+ };
44
+
45
+ export type ParsedDocument = {
46
+ title: string;
47
+ body: string;
48
+ text: string;
49
+ frontmatter: Record<string, unknown>;
50
+ type: string | null;
51
+ status: string | null;
52
+ tags: string[];
53
+ created: string | null;
54
+ updated: string | null;
55
+ wikilinks: ParsedWikilink[];
56
+ chunks: ParsedChunk[];
57
+ };
58
+
59
+ export function parseMarkdown(content: string, relativePath: string, chunkSize: number, chunkOverlap: number, taxonomy: TaxonomyPreset): ParsedDocument {
60
+ const parsed = matter(content);
61
+ const frontmatter = normalizeFrontmatter(parsed.data);
62
+ const body = parsed.content.trim();
63
+ const title = typeof frontmatter.title === "string" ? frontmatter.title : noteTitleFromPath(relativePath);
64
+
65
+ // Wikilinks are still extracted from the FULL body (including the "## Связи"
66
+ // block) so the graph stays correct. But the indexed/embedded text is the
67
+ // note's actual content — without the links section the wikilinks would
68
+ // otherwise pollute BM25 hits and steal snippet fallback.
69
+ const indexableBody = stripLinksSection(body, taxonomy);
70
+
71
+ return {
72
+ title,
73
+ body,
74
+ text: content,
75
+ frontmatter,
76
+ type: asNullableString(frontmatter.type),
77
+ status: asNullableString(frontmatter.status),
78
+ tags: Array.isArray(frontmatter.tags) ? frontmatter.tags.filter((tag): tag is string => typeof tag === "string") : [],
79
+ created: asNullableString(frontmatter.created),
80
+ updated: asNullableString(frontmatter.updated),
81
+ wikilinks: extractWikilinks(body),
82
+ chunks: chunkText(indexableBody, chunkSize, chunkOverlap, title),
83
+ };
84
+ }
85
+
86
+ /**
87
+ * Strip the leading links-section block (taxonomy.linksSection heading)
88
+ * from a note body before chunking.
89
+ *
90
+ * Vault notes follow a fixed structure: body starts with the links-section heading containing
91
+ * a list of [[wikilinks]], then a horizontal rule "---", then the actual
92
+ * content. The links section is graph metadata, not semantic content — feeding
93
+ * it to BM25/embeddings produces false hits on every note that mentions a
94
+ * popular wikilink target, and the snippet fallback in search.ts pulls the
95
+ * first chunk which (without this strip) is always just the links block.
96
+ *
97
+ * Only strips when both conditions hold: body starts with the heading AND a
98
+ * "---" divider follows. Notes without that structure pass through unchanged.
99
+ */
100
+ export function stripLinksSection(body: string, taxonomy: TaxonomyPreset): string {
101
+ // The heading pattern comes from the taxonomy preset (ADR-002): `## Links`
102
+ // for the EN base, `## Связи` for RU. linksSectionPattern uses (?:\s|$)
103
+ // instead of `\b` — JS \b is ASCII-only and useless after a cyrillic
104
+ // letter (the strip silently no-op'd on every RU note before this fix).
105
+ if (!linksSectionPattern(taxonomy).test(body)) return body;
106
+ const dividerMatch = body.match(/\n---\s*\n/);
107
+ if (!dividerMatch || dividerMatch.index === undefined) return body;
108
+ return body.slice(dividerMatch.index + dividerMatch[0].length).trim();
109
+ }
110
+
111
+ /**
112
+ * Маскирует содержимое markdown code-областей пробелами равной длины. Это
113
+ * подавляет `[[X]]` внутри `\`код\`` и ```fenced ...``` от попадания в граф
114
+ * как реальные wikilinks — раньше шаблонные placeholder'ы из инструкций
115
+ * Индекса/копирайтера (`\`[[X]]\``, ```\n[[Связанная заметка]]\n```) шли в
116
+ * `edges` как broken links, забивая `unresolved_links` фолс-orphan'ами.
117
+ *
118
+ * Замена ПРОБЕЛАМИ (не удаление) — offset'ы остального текста не сдвигаются,
119
+ * `contextSnippet` ниже строится из ОРИГИНАЛЬНОГО body, окно вокруг wikilink
120
+ * остаётся правильным.
121
+ *
122
+ * Порядок: сначала fenced (3+ backticks/tildes на отдельных строках) — они
123
+ * длиннее и могут содержать внутри одиночные backticks; потом многократные
124
+ * inline (\`\`escape\`\`) и одиночные (\`code\`). Не покрывает edge-cases
125
+ * CommonMark с N-backtick парами (N≥3 inline) — в наших нотах не встречаются.
126
+ */
127
+ function maskCodeRegions(body: string): string {
128
+ let masked = body;
129
+ // Fenced ```...``` или ~~~...~~~ (включая многострочное содержимое).
130
+ masked = masked.replace(/```[\s\S]*?```|~~~[\s\S]*?~~~/g, (m) => " ".repeat(m.length));
131
+ // Inline ``escape`` (double backtick — для строк с backtick внутри).
132
+ masked = masked.replace(/``[^`\n]+``/g, (m) => " ".repeat(m.length));
133
+ // Inline `code` (single backtick). [^`\n] — не переносить строку, и не есть
134
+ // вложенные backticks (CommonMark inline code не пересекает строку без
135
+ // явной обёртки).
136
+ masked = masked.replace(/`[^`\n]+`/g, (m) => " ".repeat(m.length));
137
+ return masked;
138
+ }
139
+
140
+ export function extractWikilinks(body: string): ParsedWikilink[] {
141
+ const masked = maskCodeRegions(body);
142
+ const matches: ParsedWikilink[] = [];
143
+ for (const match of masked.matchAll(WIKILINK_RE)) {
144
+ const rawTarget = match[1]?.trim();
145
+ if (!rawTarget) continue;
146
+ const start = Math.max(0, (match.index ?? 0) - 50);
147
+ const end = Math.min(body.length, (match.index ?? 0) + match[0].length + 50);
148
+ matches.push({
149
+ // Path-preserving: [[Name]] → "Name", [[Folder/Name]] → "Folder/Name",
150
+ // [[Folder/Name.md]] → "Folder/Name". The resolver decides path-exact
151
+ // vs basename-unique — it must see the path the author actually wrote.
152
+ target: stripWikilinkTarget(rawTarget),
153
+ // contextSnippet строим из оригинального body, не masked — иначе
154
+ // пользователь увидит пустоту вместо реального окружения wikilink'а.
155
+ contextSnippet: body.slice(start, end).replace(/\s+/g, " ").trim(),
156
+ });
157
+ }
158
+ return matches;
159
+ }
160
+
161
+ export function chunkText(
162
+ body: string,
163
+ chunkSize: number,
164
+ chunkOverlap: number,
165
+ title?: string,
166
+ ): ParsedChunk[] {
167
+ const normalized = body.replace(/\r\n/g, "\n").trim();
168
+ // Title prefix: BM25 (FTS5) and the embedding model both index chunk_text
169
+ // verbatim, so a note's title was effectively invisible to search. We
170
+ // prepend the title to the first chunk only — that's enough for both the
171
+ // keyword and semantic paths to "see" the note name without bloating every
172
+ // chunk and skewing BM25 with repeated matches.
173
+ const titlePrefix = title?.trim() ? `${title.trim()}\n\n` : "";
174
+
175
+ if (!normalized) {
176
+ // Empty body still indexes by title alone — otherwise a freshly-created
177
+ // note (frontmatter only) is unsearchable until first content edit.
178
+ return titlePrefix
179
+ ? [{ chunkIndex: 0, text: titlePrefix.trim() }]
180
+ : [];
181
+ }
182
+
183
+ const paragraphs = normalized
184
+ .split(/\n{2,}/)
185
+ .map((part) => part.trim())
186
+ .filter(Boolean);
187
+
188
+ // We accumulate text-only entries here and assign sequential chunkIndex
189
+ // values in the final map below. Internal type kept narrow so pushChunk's
190
+ // signature lines up.
191
+ const chunks: { text: string }[] = [];
192
+ let current = "";
193
+
194
+ for (const paragraph of paragraphs) {
195
+ if (!current) {
196
+ current = paragraph;
197
+ continue;
198
+ }
199
+
200
+ const candidate = `${current}\n\n${paragraph}`;
201
+ if (candidate.length <= chunkSize) {
202
+ current = candidate;
203
+ continue;
204
+ }
205
+
206
+ pushChunk(chunks, current);
207
+ current = mergeOverlap(current, paragraph, chunkOverlap);
208
+
209
+ // Infinite-loop guard: a paragraph longer than chunkSize with no
210
+ // splittable whitespace can make findSplitIndex return chunkSize and the
211
+ // post-slice tail can stay >chunkSize indefinitely. Bail when a pass
212
+ // doesn't shorten `current`.
213
+ while (current.length > chunkSize) {
214
+ const before = current.length;
215
+ const splitIndex = findSplitIndex(current, chunkSize);
216
+ pushChunk(chunks, current.slice(0, splitIndex).trim());
217
+ current = current.slice(Math.max(0, splitIndex - chunkOverlap)).trim();
218
+ if (current.length >= before) {
219
+ // Hard cut: drop the consumed prefix outright. Better a too-large
220
+ // last chunk than a wedged indexer.
221
+ pushChunk(chunks, current.slice(0, chunkSize));
222
+ current = current.slice(chunkSize);
223
+ break;
224
+ }
225
+ }
226
+ }
227
+
228
+ if (current) {
229
+ pushChunk(chunks, current);
230
+ }
231
+
232
+ // Prepend the title to chunk[0] in place — keeps chunkIndex sequencing
233
+ // intact and preserves the per-chunk size budget for the rest.
234
+ if (titlePrefix && chunks.length > 0) {
235
+ chunks[0] = { text: `${titlePrefix}${chunks[0].text}` };
236
+ }
237
+
238
+ return chunks.map((chunk, index) => ({ chunkIndex: index, text: chunk.text }));
239
+ }
240
+
241
+ function pushChunk(chunks: { text: string }[], text: string): void {
242
+ const normalized = text.trim();
243
+ if (normalized) chunks.push({ text: normalized });
244
+ }
245
+
246
+ function mergeOverlap(previous: string, next: string, overlap: number): string {
247
+ const tail = previous.slice(Math.max(0, previous.length - overlap)).trim();
248
+ return [tail, next].filter(Boolean).join("\n\n").trim();
249
+ }
250
+
251
+ function findSplitIndex(input: string, target: number): number {
252
+ const candidates = [input.lastIndexOf("\n", target), input.lastIndexOf(" ", target)].filter((index) => index > 0);
253
+ return Math.max(...candidates, Math.min(target, input.length));
254
+ }
255
+
256
+ function normalizeFrontmatter(data: unknown): Record<string, unknown> {
257
+ return data && typeof data === "object" && !Array.isArray(data) ? { ...(data as Record<string, unknown>) } : {};
258
+ }
259
+
260
+ function asNullableString(value: unknown): string | null {
261
+ if (typeof value === "string") return value;
262
+ // gray-matter parses YAML date scalars (e.g. `created: 2026-03-30`) into
263
+ // Date objects. Without this branch the meta we surface from vault_read
264
+ // would silently drop them as null.
265
+ if (value instanceof Date && !Number.isNaN(value.getTime())) {
266
+ return value.toISOString().slice(0, 10);
267
+ }
268
+ return null;
269
+ }
@@ -0,0 +1,110 @@
1
+ /**
2
+ * Permanent-change detector core — a memoryd subsystem (ADR-004).
3
+ *
4
+ * Carries over the detection SEMANTICS of the reference
5
+ * `mergemind-permanent-monitor.sh` (a bash poll loop; no direct unit
6
+ * fixtures existed — the load-bearing part, the smart hash, is ported and
7
+ * tested in `smart-hash.ts`):
8
+ *
9
+ * - watches the SIX permanent folders (five canonical + agent memory),
10
+ * recursively; the archive is deliberately ignored (frozen notes);
11
+ * - compares by the sha256 of the SEMANTIC part of each file (frontmatter
12
+ * minus service fields + body, `smart-hash.ts`) — NOT raw bytes, NOT
13
+ * mtime. This kills both noisy-event classes: iCloud mtime-only syncs
14
+ * and the hook-induced loop (service-field re-stamps are invisible);
15
+ * - deletions are ignored (an archive move by the Index, not a change);
16
+ * - events are COALESCED (ADR-004): one diff pass yields ONE event
17
+ * carrying the list of changed paths, not N wake-ups.
18
+ *
19
+ * The fs-watch/debounce shell and the stdout signal line belong to the
20
+ * memoryd daemon stage; this module is the pure snapshot/diff core.
21
+ */
22
+
23
+ import fs from "node:fs";
24
+ import path from "node:path";
25
+ import { hashFile } from "./smart-hash.js";
26
+ import type { TaxonomyPreset } from "./taxonomy.js";
27
+
28
+ /** rel path → smart hash. */
29
+ export type VaultSnapshot = Map<string, string>;
30
+
31
+ function* walkMdFiles(dir: string): Generator<string> {
32
+ let entries: fs.Dirent[];
33
+ try {
34
+ entries = fs.readdirSync(dir, { withFileTypes: true });
35
+ } catch {
36
+ return;
37
+ }
38
+ for (const e of entries) {
39
+ const full = path.join(dir, e.name);
40
+ if (e.isDirectory()) yield* walkMdFiles(full);
41
+ else if (e.isFile() && e.name.endsWith(".md")) yield full;
42
+ }
43
+ }
44
+
45
+ /** The six monitored folders (five canonical + agent memory). */
46
+ export function monitoredFolders(taxonomy: TaxonomyPreset): string[] {
47
+ const f = taxonomy.folders;
48
+ return [f.knowledge, f.decisions, f.projects, f.ideas, f.lists, f.agentMemory];
49
+ }
50
+
51
+ /**
52
+ * Snapshot the monitored folders: rel path → smart hash. Unreadable files
53
+ * are skipped silently (parity with the hash helper's CLI contract).
54
+ */
55
+ export function snapshotVault(vault: string, taxonomy: TaxonomyPreset): VaultSnapshot {
56
+ const snapshot: VaultSnapshot = new Map();
57
+ for (const folder of monitoredFolders(taxonomy)) {
58
+ for (const filePath of walkMdFiles(path.join(vault, folder))) {
59
+ const h = hashFile(filePath);
60
+ if (h) snapshot.set(path.relative(vault, filePath), h);
61
+ }
62
+ }
63
+ return snapshot;
64
+ }
65
+
66
+ /**
67
+ * Semantic diff of two snapshots: added or changed paths, sorted.
68
+ * Deletions are ignored by design.
69
+ */
70
+ export function diffSnapshots(prev: VaultSnapshot, next: VaultSnapshot): string[] {
71
+ const changed: string[] = [];
72
+ for (const [rel, hash] of next) {
73
+ if (prev.get(rel) !== hash) changed.push(rel);
74
+ }
75
+ return changed.sort();
76
+ }
77
+
78
+ export type PermanentChangedEvent = {
79
+ kind: "PERMANENT_CHANGED";
80
+ /** Coalesced list of changed vault-relative paths (ADR-004). */
81
+ paths: string[];
82
+ };
83
+
84
+ /**
85
+ * One detection pass: diff the current vault state against the previous
86
+ * snapshot. Returns the coalesced event (or null when nothing changed)
87
+ * plus the snapshot to carry into the next pass.
88
+ */
89
+ export function detectPermanentChanges(opts: {
90
+ vault: string;
91
+ taxonomy: TaxonomyPreset;
92
+ prev: VaultSnapshot;
93
+ }): { event: PermanentChangedEvent | null; next: VaultSnapshot } {
94
+ const next = snapshotVault(opts.vault, opts.taxonomy);
95
+ const paths = diffSnapshots(opts.prev, next);
96
+ return {
97
+ event: paths.length ? { kind: "PERMANENT_CHANGED", paths } : null,
98
+ next,
99
+ };
100
+ }
101
+
102
+ /**
103
+ * Render the event as memoryd stdout signal lines (one per path — the
104
+ * notifier forwards each line verbatim; the Index batches the visible
105
+ * burst in one turn, and the coalescing above already bounds the burst to
106
+ * one pass).
107
+ */
108
+ export function formatEventLines(event: PermanentChangedEvent): string[] {
109
+ return event.paths.map((p) => `PERMANENT_CHANGED: ${p}`);
110
+ }