@agfpd/iapeer-memory-core 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +32 -0
- package/src/config.ts +257 -0
- package/src/context-render.ts +185 -0
- package/src/db.ts +550 -0
- package/src/embedding.ts +174 -0
- package/src/fm-update.ts +352 -0
- package/src/frontmatter-fill.ts +529 -0
- package/src/graph.ts +427 -0
- package/src/http-client.ts +129 -0
- package/src/human-edit-detect.ts +213 -0
- package/src/index-render.ts +876 -0
- package/src/index.ts +65 -0
- package/src/indexer.ts +323 -0
- package/src/log.ts +27 -0
- package/src/mcp-tools.ts +468 -0
- package/src/memoryd.ts +680 -0
- package/src/migrate-auto-memory.ts +289 -0
- package/src/parser.ts +269 -0
- package/src/permanent-detect.ts +110 -0
- package/src/render-doctrine.ts +113 -0
- package/src/reranker.ts +162 -0
- package/src/search.ts +806 -0
- package/src/smart-hash.ts +85 -0
- package/src/sqlite-loader.ts +151 -0
- package/src/tags-mirror.ts +47 -0
- package/src/taxonomy.ts +385 -0
- package/src/utils.ts +69 -0
- package/tsconfig.json +24 -0
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Migration of a harness's built-in per-peer auto-memory into the vault's
|
|
3
|
+
* agent-memory zone (`06_Agent_Memory/<agent>/`).
|
|
4
|
+
*
|
|
5
|
+
* TS port of the reference `scripts/migrate-auto-memory.py` (behavioural
|
|
6
|
+
* parity against `tests/python/test_migrate_auto_memory.py`, 16 fixtures).
|
|
7
|
+
* Deterministic, no LLM:
|
|
8
|
+
*
|
|
9
|
+
* 1. parse each source `.md` frontmatter (flat parser — auto-memory is
|
|
10
|
+
* simple);
|
|
11
|
+
* 2. map the harness `type` → vault `subtype` (taxonomy tokens):
|
|
12
|
+
* user → person_profile, feedback → feedback, project → context,
|
|
13
|
+
* reference → reference, anything else → context. A `feedback` note
|
|
14
|
+
* that is semantically a pitfall cannot be told apart
|
|
15
|
+
* deterministically — re-filing to `pitfall` is the agent's manual step
|
|
16
|
+
* after migration (distill phase 5);
|
|
17
|
+
* 3. build the agent-memory frontmatter (title from filename, type/status
|
|
18
|
+
* tokens from the taxonomy, description through the SHARED YAML-safe
|
|
19
|
+
* serialiser, created from birthtime/mtime, author = agent);
|
|
20
|
+
* 4. per-file: backup → write target (atomic) → unlink source. Idempotent:
|
|
21
|
+
* an existing target file is skipped, never overwritten.
|
|
22
|
+
*
|
|
23
|
+
* ADAPTER SCOPE: the ENGINE is source-agnostic — the adapter supplies the
|
|
24
|
+
* source directory (claude: `~/.claude/agent-memory/<agent>/` for launchd
|
|
25
|
+
* peers, `~/.claude/projects/<slug>/memory/` for project sessions). The
|
|
26
|
+
* codex memories source location/format is NOT fact-checked yet — wiring
|
|
27
|
+
* it up is the codex-adapter's job once verified against a live codex
|
|
28
|
+
* (никогда не выдумываем формат из памяти модели).
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import fs from "node:fs";
|
|
32
|
+
import path from "node:path";
|
|
33
|
+
import type { TaxonomyPreset } from "./taxonomy.js";
|
|
34
|
+
import { yamlSafeScalar } from "./fm-update.js";
|
|
35
|
+
|
|
36
|
+
/** Source files that are backed up but never copied into the vault. */
|
|
37
|
+
export const SKIP_FILES: ReadonlySet<string> = new Set(["MEMORY.md"]);
|
|
38
|
+
|
|
39
|
+
/** Flat frontmatter parser — first line of each `key: value` only. */
|
|
40
|
+
export function parseFlatFrontmatter(text: string): [Record<string, string>, string] {
|
|
41
|
+
const m = /^---[^\S\n]*\n([\s\S]*?\n)---[^\S\n]*(?:\n|$)/.exec(text);
|
|
42
|
+
if (!m) return [{}, text];
|
|
43
|
+
const fm: Record<string, string> = {};
|
|
44
|
+
for (const line of m[1].split("\n")) {
|
|
45
|
+
if (!line.trim() || line.startsWith("#")) continue;
|
|
46
|
+
const i = line.indexOf(":");
|
|
47
|
+
if (i === -1) continue;
|
|
48
|
+
fm[line.slice(0, i).trim()] = line.slice(i + 1).trim();
|
|
49
|
+
}
|
|
50
|
+
return [fm, text.slice(m[0].length)];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Harness auto-memory `type` → vault subtype token (taxonomy-driven). */
|
|
54
|
+
export function mapTypeToSubtype(oldType: string, taxonomy: TaxonomyPreset): string {
|
|
55
|
+
const s = taxonomy.subtypes;
|
|
56
|
+
switch (oldType.trim().toLowerCase()) {
|
|
57
|
+
case "user":
|
|
58
|
+
return s.personProfile;
|
|
59
|
+
case "feedback":
|
|
60
|
+
return s.feedback;
|
|
61
|
+
case "project":
|
|
62
|
+
return s.context;
|
|
63
|
+
case "reference":
|
|
64
|
+
return s.reference;
|
|
65
|
+
default:
|
|
66
|
+
return s.context;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function fileCreatedDate(p: string): string {
|
|
71
|
+
const st = fs.statSync(p);
|
|
72
|
+
const birth = st.birthtimeMs > 0 ? st.birthtimeMs : st.mtimeMs;
|
|
73
|
+
return new Date(birth).toISOString().slice(0, 10);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Agent-memory frontmatter; description through the shared YAML-safe rules. */
|
|
77
|
+
export function buildNewFrontmatter(opts: {
|
|
78
|
+
title: string;
|
|
79
|
+
subtype: string;
|
|
80
|
+
description: string;
|
|
81
|
+
created: string;
|
|
82
|
+
author: string;
|
|
83
|
+
taxonomy: TaxonomyPreset;
|
|
84
|
+
}): string {
|
|
85
|
+
const desc = opts.description ? yamlSafeScalar(opts.description) : "''";
|
|
86
|
+
return [
|
|
87
|
+
"---",
|
|
88
|
+
`title: ${opts.title}`,
|
|
89
|
+
`type: ${opts.taxonomy.types.agentMemory}`,
|
|
90
|
+
`subtype: ${opts.subtype}`,
|
|
91
|
+
`status: ${opts.taxonomy.statusTokens.current}`,
|
|
92
|
+
`description: ${desc}`,
|
|
93
|
+
`created: ${opts.created}`,
|
|
94
|
+
`author: ${opts.author}`,
|
|
95
|
+
"---",
|
|
96
|
+
].join("\n") + "\n";
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export type MigrationPlan = {
|
|
100
|
+
source: string;
|
|
101
|
+
target: string;
|
|
102
|
+
files: Array<{ name: string; oldType?: string; subtype?: string; error?: string }>;
|
|
103
|
+
skippedSystem: string[];
|
|
104
|
+
skippedAlreadyInTarget: string[];
|
|
105
|
+
subtypeCounts: Record<string, number>;
|
|
106
|
+
totalToMigrate: number;
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
/** Scan the source and build the plan WITHOUT writing anything (dry-run). */
|
|
110
|
+
export function planMigration(opts: {
|
|
111
|
+
sourceDir: string;
|
|
112
|
+
agent: string;
|
|
113
|
+
vault: string;
|
|
114
|
+
taxonomy: TaxonomyPreset;
|
|
115
|
+
}): MigrationPlan {
|
|
116
|
+
const targetDir = path.join(opts.vault, opts.taxonomy.folders.agentMemory, opts.agent);
|
|
117
|
+
const files: MigrationPlan["files"] = [];
|
|
118
|
+
const skippedSystem: string[] = [];
|
|
119
|
+
const skippedAlreadyInTarget: string[] = [];
|
|
120
|
+
const subtypeCounts: Record<string, number> = {};
|
|
121
|
+
|
|
122
|
+
const entries = fs
|
|
123
|
+
.readdirSync(opts.sourceDir, { withFileTypes: true })
|
|
124
|
+
.filter((e) => e.isFile() && e.name.endsWith(".md"))
|
|
125
|
+
.map((e) => e.name)
|
|
126
|
+
.sort();
|
|
127
|
+
|
|
128
|
+
for (const name of entries) {
|
|
129
|
+
if (SKIP_FILES.has(name)) {
|
|
130
|
+
skippedSystem.push(name);
|
|
131
|
+
continue;
|
|
132
|
+
}
|
|
133
|
+
if (fs.existsSync(path.join(targetDir, name))) {
|
|
134
|
+
skippedAlreadyInTarget.push(name);
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
let text: string;
|
|
138
|
+
try {
|
|
139
|
+
text = new TextDecoder("utf-8", { fatal: true }).decode(
|
|
140
|
+
fs.readFileSync(path.join(opts.sourceDir, name)),
|
|
141
|
+
);
|
|
142
|
+
} catch {
|
|
143
|
+
files.push({ name, error: "unreadable" });
|
|
144
|
+
continue;
|
|
145
|
+
}
|
|
146
|
+
const [fm] = parseFlatFrontmatter(text);
|
|
147
|
+
const oldType = (fm.type ?? "").trim().toLowerCase();
|
|
148
|
+
const subtype = mapTypeToSubtype(oldType, opts.taxonomy);
|
|
149
|
+
subtypeCounts[subtype] = (subtypeCounts[subtype] ?? 0) + 1;
|
|
150
|
+
files.push({ name, oldType: oldType || "(none)", subtype });
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return {
|
|
154
|
+
source: opts.sourceDir,
|
|
155
|
+
target: targetDir,
|
|
156
|
+
files,
|
|
157
|
+
skippedSystem,
|
|
158
|
+
skippedAlreadyInTarget,
|
|
159
|
+
subtypeCounts,
|
|
160
|
+
totalToMigrate: files.length,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export type MigrationResult = {
|
|
165
|
+
migrated: string[];
|
|
166
|
+
skipped: string[];
|
|
167
|
+
errors: string[];
|
|
168
|
+
backupDir: string;
|
|
169
|
+
sourceRemoved: boolean;
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Apply the migration: per-file backup → convert+write target → unlink
|
|
174
|
+
* source. A failed write leaves the source intact (the backup already
|
|
175
|
+
* exists). The source dir is removed only when it ends up empty (`rmdir`,
|
|
176
|
+
* never a recursive delete).
|
|
177
|
+
*/
|
|
178
|
+
export function applyMigration(opts: {
|
|
179
|
+
sourceDir: string;
|
|
180
|
+
agent: string;
|
|
181
|
+
vault: string;
|
|
182
|
+
backupRoot: string;
|
|
183
|
+
taxonomy: TaxonomyPreset;
|
|
184
|
+
/** Injectable for tests. */
|
|
185
|
+
now?: Date;
|
|
186
|
+
}): MigrationResult {
|
|
187
|
+
const targetDir = path.join(opts.vault, opts.taxonomy.folders.agentMemory, opts.agent);
|
|
188
|
+
fs.mkdirSync(targetDir, { recursive: true });
|
|
189
|
+
|
|
190
|
+
const now = opts.now ?? new Date();
|
|
191
|
+
const pad = (n: number) => String(n).padStart(2, "0");
|
|
192
|
+
const stamp = `${now.getFullYear()}${pad(now.getMonth() + 1)}${pad(now.getDate())}-${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`;
|
|
193
|
+
const backupDir = path.join(opts.backupRoot, `${opts.agent}-${stamp}`);
|
|
194
|
+
fs.mkdirSync(backupDir, { recursive: true });
|
|
195
|
+
|
|
196
|
+
const migrated: string[] = [];
|
|
197
|
+
const skipped: string[] = [];
|
|
198
|
+
const errors: string[] = [];
|
|
199
|
+
|
|
200
|
+
const entries = fs
|
|
201
|
+
.readdirSync(opts.sourceDir, { withFileTypes: true })
|
|
202
|
+
.filter((e) => e.isFile())
|
|
203
|
+
.map((e) => e.name)
|
|
204
|
+
.sort();
|
|
205
|
+
|
|
206
|
+
for (const name of entries) {
|
|
207
|
+
const srcPath = path.join(opts.sourceDir, name);
|
|
208
|
+
|
|
209
|
+
// 1. Backup BEFORE any processing.
|
|
210
|
+
try {
|
|
211
|
+
fs.copyFileSync(srcPath, path.join(backupDir, name));
|
|
212
|
+
} catch (err) {
|
|
213
|
+
errors.push(`${name}: backup failed — ${String(err)}`);
|
|
214
|
+
continue;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// 2a. Non-md and SKIP_FILES: backup-only, removed from the source.
|
|
218
|
+
if (!name.endsWith(".md") || SKIP_FILES.has(name)) {
|
|
219
|
+
try {
|
|
220
|
+
fs.unlinkSync(srcPath);
|
|
221
|
+
} catch (err) {
|
|
222
|
+
errors.push(`${name}: unlink after backup failed — ${String(err)}`);
|
|
223
|
+
}
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// 2b. Markdown auto-memory: convert + atomic write + unlink source.
|
|
228
|
+
const targetFile = path.join(targetDir, name);
|
|
229
|
+
if (fs.existsSync(targetFile)) {
|
|
230
|
+
skipped.push(name);
|
|
231
|
+
try {
|
|
232
|
+
fs.unlinkSync(srcPath);
|
|
233
|
+
} catch (err) {
|
|
234
|
+
errors.push(`${name}: unlink (already migrated) failed — ${String(err)}`);
|
|
235
|
+
}
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
let text: string;
|
|
240
|
+
try {
|
|
241
|
+
text = new TextDecoder("utf-8", { fatal: true }).decode(fs.readFileSync(srcPath));
|
|
242
|
+
} catch (err) {
|
|
243
|
+
errors.push(`${name}: read failed — ${String(err)}`);
|
|
244
|
+
continue;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const [fm, body] = parseFlatFrontmatter(text);
|
|
248
|
+
const newFm = buildNewFrontmatter({
|
|
249
|
+
title: name.slice(0, -3),
|
|
250
|
+
subtype: mapTypeToSubtype(fm.type ?? "", opts.taxonomy),
|
|
251
|
+
description: (fm.description ?? "").trim(),
|
|
252
|
+
created: fileCreatedDate(srcPath),
|
|
253
|
+
author: opts.agent,
|
|
254
|
+
taxonomy: opts.taxonomy,
|
|
255
|
+
});
|
|
256
|
+
const newText = body
|
|
257
|
+
? body.startsWith("\n")
|
|
258
|
+
? newFm + body
|
|
259
|
+
: newFm + "\n" + body
|
|
260
|
+
: newFm;
|
|
261
|
+
|
|
262
|
+
try {
|
|
263
|
+
const tmp = `${targetFile}.tmp`;
|
|
264
|
+
fs.writeFileSync(tmp, newText, "utf-8");
|
|
265
|
+
fs.renameSync(tmp, targetFile);
|
|
266
|
+
} catch (err) {
|
|
267
|
+
errors.push(`${name}: write failed — ${String(err)}`);
|
|
268
|
+
continue; // source untouched — write failed
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
try {
|
|
272
|
+
fs.unlinkSync(srcPath);
|
|
273
|
+
migrated.push(name);
|
|
274
|
+
} catch (err) {
|
|
275
|
+
errors.push(`${name}: written to target but source unlink failed — ${String(err)}`);
|
|
276
|
+
migrated.push(name);
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
let sourceRemoved = false;
|
|
281
|
+
try {
|
|
282
|
+
fs.rmdirSync(opts.sourceDir);
|
|
283
|
+
sourceRemoved = true;
|
|
284
|
+
} catch {
|
|
285
|
+
// not empty / no rights — left in place, surfaced via errors/remnants
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
return { migrated, skipped, errors, backupDir, sourceRemoved };
|
|
289
|
+
}
|
package/src/parser.ts
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
import matter from "gray-matter";
|
|
2
|
+
import { noteTitleFromPath } from "./utils.js";
|
|
3
|
+
import { linksSectionPattern, type TaxonomyPreset } from "./taxonomy.js";
|
|
4
|
+
|
|
5
|
+
const WIKILINK_RE = /\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g;
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Strip a wikilink target down to its stored form, KEEPING the path.
|
|
9
|
+
*
|
|
10
|
+
* Obsidian allows four wikilink shapes:
|
|
11
|
+
* [[Name]]
|
|
12
|
+
* [[Name|alias]]
|
|
13
|
+
* [[Folder/Subfolder/Name]]
|
|
14
|
+
* [[Folder/Subfolder/Name|alias]]
|
|
15
|
+
*
|
|
16
|
+
* The regex above already strips the `|alias` part. We only strip a trailing
|
|
17
|
+
* `.md` and trim — the folder path is deliberately preserved so the resolver
|
|
18
|
+
* (indexer.resolveWikilinks) can do path-aware resolution: an explicit
|
|
19
|
+
* `[[03_Проекты/A/Фаза]]` must NOT collapse to the bare basename and risk
|
|
20
|
+
* resolving to a same-named note in another project.
|
|
21
|
+
*/
|
|
22
|
+
function stripWikilinkTarget(raw: string): string {
|
|
23
|
+
return raw.replace(/\.md$/i, "").trim();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Last path segment of a (possibly path-qualified) wikilink target. Used by
|
|
28
|
+
* the resolver for the basename-uniqueness fallback when there is no path.
|
|
29
|
+
*/
|
|
30
|
+
export function wikilinkBasename(target: string): string {
|
|
31
|
+
const seg = target.includes("/") ? (target.split("/").pop() ?? target) : target;
|
|
32
|
+
return seg.trim();
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export type ParsedChunk = {
|
|
36
|
+
chunkIndex: number;
|
|
37
|
+
text: string;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
export type ParsedWikilink = {
|
|
41
|
+
target: string;
|
|
42
|
+
contextSnippet: string;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
export type ParsedDocument = {
|
|
46
|
+
title: string;
|
|
47
|
+
body: string;
|
|
48
|
+
text: string;
|
|
49
|
+
frontmatter: Record<string, unknown>;
|
|
50
|
+
type: string | null;
|
|
51
|
+
status: string | null;
|
|
52
|
+
tags: string[];
|
|
53
|
+
created: string | null;
|
|
54
|
+
updated: string | null;
|
|
55
|
+
wikilinks: ParsedWikilink[];
|
|
56
|
+
chunks: ParsedChunk[];
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
export function parseMarkdown(content: string, relativePath: string, chunkSize: number, chunkOverlap: number, taxonomy: TaxonomyPreset): ParsedDocument {
|
|
60
|
+
const parsed = matter(content);
|
|
61
|
+
const frontmatter = normalizeFrontmatter(parsed.data);
|
|
62
|
+
const body = parsed.content.trim();
|
|
63
|
+
const title = typeof frontmatter.title === "string" ? frontmatter.title : noteTitleFromPath(relativePath);
|
|
64
|
+
|
|
65
|
+
// Wikilinks are still extracted from the FULL body (including the "## Связи"
|
|
66
|
+
// block) so the graph stays correct. But the indexed/embedded text is the
|
|
67
|
+
// note's actual content — without the links section the wikilinks would
|
|
68
|
+
// otherwise pollute BM25 hits and steal snippet fallback.
|
|
69
|
+
const indexableBody = stripLinksSection(body, taxonomy);
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
title,
|
|
73
|
+
body,
|
|
74
|
+
text: content,
|
|
75
|
+
frontmatter,
|
|
76
|
+
type: asNullableString(frontmatter.type),
|
|
77
|
+
status: asNullableString(frontmatter.status),
|
|
78
|
+
tags: Array.isArray(frontmatter.tags) ? frontmatter.tags.filter((tag): tag is string => typeof tag === "string") : [],
|
|
79
|
+
created: asNullableString(frontmatter.created),
|
|
80
|
+
updated: asNullableString(frontmatter.updated),
|
|
81
|
+
wikilinks: extractWikilinks(body),
|
|
82
|
+
chunks: chunkText(indexableBody, chunkSize, chunkOverlap, title),
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Strip the leading links-section block (taxonomy.linksSection heading)
|
|
88
|
+
* from a note body before chunking.
|
|
89
|
+
*
|
|
90
|
+
* Vault notes follow a fixed structure: body starts with the links-section heading containing
|
|
91
|
+
* a list of [[wikilinks]], then a horizontal rule "---", then the actual
|
|
92
|
+
* content. The links section is graph metadata, not semantic content — feeding
|
|
93
|
+
* it to BM25/embeddings produces false hits on every note that mentions a
|
|
94
|
+
* popular wikilink target, and the snippet fallback in search.ts pulls the
|
|
95
|
+
* first chunk which (without this strip) is always just the links block.
|
|
96
|
+
*
|
|
97
|
+
* Only strips when both conditions hold: body starts with the heading AND a
|
|
98
|
+
* "---" divider follows. Notes without that structure pass through unchanged.
|
|
99
|
+
*/
|
|
100
|
+
export function stripLinksSection(body: string, taxonomy: TaxonomyPreset): string {
|
|
101
|
+
// The heading pattern comes from the taxonomy preset (ADR-002): `## Links`
|
|
102
|
+
// for the EN base, `## Связи` for RU. linksSectionPattern uses (?:\s|$)
|
|
103
|
+
// instead of `\b` — JS \b is ASCII-only and useless after a cyrillic
|
|
104
|
+
// letter (the strip silently no-op'd on every RU note before this fix).
|
|
105
|
+
if (!linksSectionPattern(taxonomy).test(body)) return body;
|
|
106
|
+
const dividerMatch = body.match(/\n---\s*\n/);
|
|
107
|
+
if (!dividerMatch || dividerMatch.index === undefined) return body;
|
|
108
|
+
return body.slice(dividerMatch.index + dividerMatch[0].length).trim();
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Маскирует содержимое markdown code-областей пробелами равной длины. Это
|
|
113
|
+
* подавляет `[[X]]` внутри `\`код\`` и ```fenced ...``` от попадания в граф
|
|
114
|
+
* как реальные wikilinks — раньше шаблонные placeholder'ы из инструкций
|
|
115
|
+
* Индекса/копирайтера (`\`[[X]]\``, ```\n[[Связанная заметка]]\n```) шли в
|
|
116
|
+
* `edges` как broken links, забивая `unresolved_links` фолс-orphan'ами.
|
|
117
|
+
*
|
|
118
|
+
* Замена ПРОБЕЛАМИ (не удаление) — offset'ы остального текста не сдвигаются,
|
|
119
|
+
* `contextSnippet` ниже строится из ОРИГИНАЛЬНОГО body, окно вокруг wikilink
|
|
120
|
+
* остаётся правильным.
|
|
121
|
+
*
|
|
122
|
+
* Порядок: сначала fenced (3+ backticks/tildes на отдельных строках) — они
|
|
123
|
+
* длиннее и могут содержать внутри одиночные backticks; потом многократные
|
|
124
|
+
* inline (\`\`escape\`\`) и одиночные (\`code\`). Не покрывает edge-cases
|
|
125
|
+
* CommonMark с N-backtick парами (N≥3 inline) — в наших нотах не встречаются.
|
|
126
|
+
*/
|
|
127
|
+
function maskCodeRegions(body: string): string {
|
|
128
|
+
let masked = body;
|
|
129
|
+
// Fenced ```...``` или ~~~...~~~ (включая многострочное содержимое).
|
|
130
|
+
masked = masked.replace(/```[\s\S]*?```|~~~[\s\S]*?~~~/g, (m) => " ".repeat(m.length));
|
|
131
|
+
// Inline ``escape`` (double backtick — для строк с backtick внутри).
|
|
132
|
+
masked = masked.replace(/``[^`\n]+``/g, (m) => " ".repeat(m.length));
|
|
133
|
+
// Inline `code` (single backtick). [^`\n] — не переносить строку, и не есть
|
|
134
|
+
// вложенные backticks (CommonMark inline code не пересекает строку без
|
|
135
|
+
// явной обёртки).
|
|
136
|
+
masked = masked.replace(/`[^`\n]+`/g, (m) => " ".repeat(m.length));
|
|
137
|
+
return masked;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export function extractWikilinks(body: string): ParsedWikilink[] {
|
|
141
|
+
const masked = maskCodeRegions(body);
|
|
142
|
+
const matches: ParsedWikilink[] = [];
|
|
143
|
+
for (const match of masked.matchAll(WIKILINK_RE)) {
|
|
144
|
+
const rawTarget = match[1]?.trim();
|
|
145
|
+
if (!rawTarget) continue;
|
|
146
|
+
const start = Math.max(0, (match.index ?? 0) - 50);
|
|
147
|
+
const end = Math.min(body.length, (match.index ?? 0) + match[0].length + 50);
|
|
148
|
+
matches.push({
|
|
149
|
+
// Path-preserving: [[Name]] → "Name", [[Folder/Name]] → "Folder/Name",
|
|
150
|
+
// [[Folder/Name.md]] → "Folder/Name". The resolver decides path-exact
|
|
151
|
+
// vs basename-unique — it must see the path the author actually wrote.
|
|
152
|
+
target: stripWikilinkTarget(rawTarget),
|
|
153
|
+
// contextSnippet строим из оригинального body, не masked — иначе
|
|
154
|
+
// пользователь увидит пустоту вместо реального окружения wikilink'а.
|
|
155
|
+
contextSnippet: body.slice(start, end).replace(/\s+/g, " ").trim(),
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
return matches;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
export function chunkText(
|
|
162
|
+
body: string,
|
|
163
|
+
chunkSize: number,
|
|
164
|
+
chunkOverlap: number,
|
|
165
|
+
title?: string,
|
|
166
|
+
): ParsedChunk[] {
|
|
167
|
+
const normalized = body.replace(/\r\n/g, "\n").trim();
|
|
168
|
+
// Title prefix: BM25 (FTS5) and the embedding model both index chunk_text
|
|
169
|
+
// verbatim, so a note's title was effectively invisible to search. We
|
|
170
|
+
// prepend the title to the first chunk only — that's enough for both the
|
|
171
|
+
// keyword and semantic paths to "see" the note name without bloating every
|
|
172
|
+
// chunk and skewing BM25 with repeated matches.
|
|
173
|
+
const titlePrefix = title?.trim() ? `${title.trim()}\n\n` : "";
|
|
174
|
+
|
|
175
|
+
if (!normalized) {
|
|
176
|
+
// Empty body still indexes by title alone — otherwise a freshly-created
|
|
177
|
+
// note (frontmatter only) is unsearchable until first content edit.
|
|
178
|
+
return titlePrefix
|
|
179
|
+
? [{ chunkIndex: 0, text: titlePrefix.trim() }]
|
|
180
|
+
: [];
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const paragraphs = normalized
|
|
184
|
+
.split(/\n{2,}/)
|
|
185
|
+
.map((part) => part.trim())
|
|
186
|
+
.filter(Boolean);
|
|
187
|
+
|
|
188
|
+
// We accumulate text-only entries here and assign sequential chunkIndex
|
|
189
|
+
// values in the final map below. Internal type kept narrow so pushChunk's
|
|
190
|
+
// signature lines up.
|
|
191
|
+
const chunks: { text: string }[] = [];
|
|
192
|
+
let current = "";
|
|
193
|
+
|
|
194
|
+
for (const paragraph of paragraphs) {
|
|
195
|
+
if (!current) {
|
|
196
|
+
current = paragraph;
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const candidate = `${current}\n\n${paragraph}`;
|
|
201
|
+
if (candidate.length <= chunkSize) {
|
|
202
|
+
current = candidate;
|
|
203
|
+
continue;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
pushChunk(chunks, current);
|
|
207
|
+
current = mergeOverlap(current, paragraph, chunkOverlap);
|
|
208
|
+
|
|
209
|
+
// Infinite-loop guard: a paragraph longer than chunkSize with no
|
|
210
|
+
// splittable whitespace can make findSplitIndex return chunkSize and the
|
|
211
|
+
// post-slice tail can stay >chunkSize indefinitely. Bail when a pass
|
|
212
|
+
// doesn't shorten `current`.
|
|
213
|
+
while (current.length > chunkSize) {
|
|
214
|
+
const before = current.length;
|
|
215
|
+
const splitIndex = findSplitIndex(current, chunkSize);
|
|
216
|
+
pushChunk(chunks, current.slice(0, splitIndex).trim());
|
|
217
|
+
current = current.slice(Math.max(0, splitIndex - chunkOverlap)).trim();
|
|
218
|
+
if (current.length >= before) {
|
|
219
|
+
// Hard cut: drop the consumed prefix outright. Better a too-large
|
|
220
|
+
// last chunk than a wedged indexer.
|
|
221
|
+
pushChunk(chunks, current.slice(0, chunkSize));
|
|
222
|
+
current = current.slice(chunkSize);
|
|
223
|
+
break;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
if (current) {
|
|
229
|
+
pushChunk(chunks, current);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Prepend the title to chunk[0] in place — keeps chunkIndex sequencing
|
|
233
|
+
// intact and preserves the per-chunk size budget for the rest.
|
|
234
|
+
if (titlePrefix && chunks.length > 0) {
|
|
235
|
+
chunks[0] = { text: `${titlePrefix}${chunks[0].text}` };
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
return chunks.map((chunk, index) => ({ chunkIndex: index, text: chunk.text }));
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
function pushChunk(chunks: { text: string }[], text: string): void {
|
|
242
|
+
const normalized = text.trim();
|
|
243
|
+
if (normalized) chunks.push({ text: normalized });
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
function mergeOverlap(previous: string, next: string, overlap: number): string {
|
|
247
|
+
const tail = previous.slice(Math.max(0, previous.length - overlap)).trim();
|
|
248
|
+
return [tail, next].filter(Boolean).join("\n\n").trim();
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function findSplitIndex(input: string, target: number): number {
|
|
252
|
+
const candidates = [input.lastIndexOf("\n", target), input.lastIndexOf(" ", target)].filter((index) => index > 0);
|
|
253
|
+
return Math.max(...candidates, Math.min(target, input.length));
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function normalizeFrontmatter(data: unknown): Record<string, unknown> {
|
|
257
|
+
return data && typeof data === "object" && !Array.isArray(data) ? { ...(data as Record<string, unknown>) } : {};
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function asNullableString(value: unknown): string | null {
|
|
261
|
+
if (typeof value === "string") return value;
|
|
262
|
+
// gray-matter parses YAML date scalars (e.g. `created: 2026-03-30`) into
|
|
263
|
+
// Date objects. Without this branch the meta we surface from vault_read
|
|
264
|
+
// would silently drop them as null.
|
|
265
|
+
if (value instanceof Date && !Number.isNaN(value.getTime())) {
|
|
266
|
+
return value.toISOString().slice(0, 10);
|
|
267
|
+
}
|
|
268
|
+
return null;
|
|
269
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Permanent-change detector core — a memoryd subsystem (ADR-004).
|
|
3
|
+
*
|
|
4
|
+
* Carries over the detection SEMANTICS of the reference
|
|
5
|
+
* `mergemind-permanent-monitor.sh` (a bash poll loop; no direct unit
|
|
6
|
+
* fixtures existed — the load-bearing part, the smart hash, is ported and
|
|
7
|
+
* tested in `smart-hash.ts`):
|
|
8
|
+
*
|
|
9
|
+
* - watches the SIX permanent folders (five canonical + agent memory),
|
|
10
|
+
* recursively; the archive is deliberately ignored (frozen notes);
|
|
11
|
+
* - compares by the sha256 of the SEMANTIC part of each file (frontmatter
|
|
12
|
+
* minus service fields + body, `smart-hash.ts`) — NOT raw bytes, NOT
|
|
13
|
+
* mtime. This kills both noisy-event classes: iCloud mtime-only syncs
|
|
14
|
+
* and the hook-induced loop (service-field re-stamps are invisible);
|
|
15
|
+
* - deletions are ignored (an archive move by the Index, not a change);
|
|
16
|
+
* - events are COALESCED (ADR-004): one diff pass yields ONE event
|
|
17
|
+
* carrying the list of changed paths, not N wake-ups.
|
|
18
|
+
*
|
|
19
|
+
* The fs-watch/debounce shell and the stdout signal line belong to the
|
|
20
|
+
* memoryd daemon stage; this module is the pure snapshot/diff core.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import fs from "node:fs";
|
|
24
|
+
import path from "node:path";
|
|
25
|
+
import { hashFile } from "./smart-hash.js";
|
|
26
|
+
import type { TaxonomyPreset } from "./taxonomy.js";
|
|
27
|
+
|
|
28
|
+
/** rel path → smart hash. */
|
|
29
|
+
export type VaultSnapshot = Map<string, string>;
|
|
30
|
+
|
|
31
|
+
function* walkMdFiles(dir: string): Generator<string> {
|
|
32
|
+
let entries: fs.Dirent[];
|
|
33
|
+
try {
|
|
34
|
+
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
35
|
+
} catch {
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
for (const e of entries) {
|
|
39
|
+
const full = path.join(dir, e.name);
|
|
40
|
+
if (e.isDirectory()) yield* walkMdFiles(full);
|
|
41
|
+
else if (e.isFile() && e.name.endsWith(".md")) yield full;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** The six monitored folders (five canonical + agent memory). */
|
|
46
|
+
export function monitoredFolders(taxonomy: TaxonomyPreset): string[] {
|
|
47
|
+
const f = taxonomy.folders;
|
|
48
|
+
return [f.knowledge, f.decisions, f.projects, f.ideas, f.lists, f.agentMemory];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Snapshot the monitored folders: rel path → smart hash. Unreadable files
|
|
53
|
+
* are skipped silently (parity with the hash helper's CLI contract).
|
|
54
|
+
*/
|
|
55
|
+
export function snapshotVault(vault: string, taxonomy: TaxonomyPreset): VaultSnapshot {
|
|
56
|
+
const snapshot: VaultSnapshot = new Map();
|
|
57
|
+
for (const folder of monitoredFolders(taxonomy)) {
|
|
58
|
+
for (const filePath of walkMdFiles(path.join(vault, folder))) {
|
|
59
|
+
const h = hashFile(filePath);
|
|
60
|
+
if (h) snapshot.set(path.relative(vault, filePath), h);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return snapshot;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Semantic diff of two snapshots: added or changed paths, sorted.
|
|
68
|
+
* Deletions are ignored by design.
|
|
69
|
+
*/
|
|
70
|
+
export function diffSnapshots(prev: VaultSnapshot, next: VaultSnapshot): string[] {
|
|
71
|
+
const changed: string[] = [];
|
|
72
|
+
for (const [rel, hash] of next) {
|
|
73
|
+
if (prev.get(rel) !== hash) changed.push(rel);
|
|
74
|
+
}
|
|
75
|
+
return changed.sort();
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export type PermanentChangedEvent = {
|
|
79
|
+
kind: "PERMANENT_CHANGED";
|
|
80
|
+
/** Coalesced list of changed vault-relative paths (ADR-004). */
|
|
81
|
+
paths: string[];
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* One detection pass: diff the current vault state against the previous
|
|
86
|
+
* snapshot. Returns the coalesced event (or null when nothing changed)
|
|
87
|
+
* plus the snapshot to carry into the next pass.
|
|
88
|
+
*/
|
|
89
|
+
export function detectPermanentChanges(opts: {
|
|
90
|
+
vault: string;
|
|
91
|
+
taxonomy: TaxonomyPreset;
|
|
92
|
+
prev: VaultSnapshot;
|
|
93
|
+
}): { event: PermanentChangedEvent | null; next: VaultSnapshot } {
|
|
94
|
+
const next = snapshotVault(opts.vault, opts.taxonomy);
|
|
95
|
+
const paths = diffSnapshots(opts.prev, next);
|
|
96
|
+
return {
|
|
97
|
+
event: paths.length ? { kind: "PERMANENT_CHANGED", paths } : null,
|
|
98
|
+
next,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Render the event as memoryd stdout signal lines (one per path — the
|
|
104
|
+
* notifier forwards each line verbatim; the Index batches the visible
|
|
105
|
+
* burst in one turn, and the coalescing above already bounds the burst to
|
|
106
|
+
* one pass).
|
|
107
|
+
*/
|
|
108
|
+
export function formatEventLines(event: PermanentChangedEvent): string[] {
|
|
109
|
+
return event.paths.map((p) => `PERMANENT_CHANGED: ${p}`);
|
|
110
|
+
}
|