@agfpd/iapeer-memory-core 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Public surface of `@agfpd/iapeer-memory-core` — the modules the package
3
+ * facade (CLI) and the adapters consume. Deliberately EXPLICIT, not
4
+ * `export *`: the barrel is the contract boundary between core and the
5
+ * distribution layer (docs/10-distribution.md); deep imports stay possible
6
+ * inside the workspace but everything the facade needs must be listed here
7
+ * (also avoids real symbol collisions, e.g. the two internal `atomicWrite`s).
8
+ */
9
+
10
+ // config
11
+ export { configFromEnv, type CoreConfig } from "./config.js";
12
+
13
+ // taxonomy (ADR-002/011)
14
+ export {
15
+ getTaxonomy,
16
+ isLocaleId,
17
+ defaultExcludeFolders,
18
+ DEFAULT_CURATOR_SET,
19
+ DEFAULT_RANKING,
20
+ type LocaleId,
21
+ type RankingConfig,
22
+ type TaxonomyPreset,
23
+ } from "./taxonomy.js";
24
+
25
+ // frontmatter: post-write fill + structural fm-update (CLI contract in module header)
26
+ export { processFile, resolveAgentName, type ProcessOptions } from "./frontmatter-fill.js";
27
+ export { fmUpdate, collectOps, yamlSafeScalar, type FmUpdateOptions, type Op } from "./fm-update.js";
28
+
29
+ // author index rendering
30
+ export { regenerateVaultIndex, fullIndexPathFor, type RenderContext } from "./index-render.js";
31
+
32
+ // layer-5 fragments (ADR-001)
33
+ export {
34
+ FRAGMENT_STEM,
35
+ peerFragmentsDir,
36
+ renderPeerFragment,
37
+ writeHostWideGuideFragment,
38
+ type FragmentEnv,
39
+ } from "./context-render.js";
40
+
41
+ // role doctrines + version marker (ADR-009/010)
42
+ export {
43
+ renderDoctrine,
44
+ renderRoleDoctrines,
45
+ renderedVersion,
46
+ versionMarker,
47
+ type RenderOutcome,
48
+ } from "./render-doctrine.js";
49
+
50
+ // memoryd (ADR-004/012)
51
+ export {
52
+ startMemoryd,
53
+ MEMORYD_SERVER_NAME,
54
+ type MemorydHandle,
55
+ type MemorydOptions,
56
+ } from "./memoryd.js";
57
+
58
+ // auto-memory migration (engine; sources are adapter-scoped)
59
+ export { planMigration, applyMigration, type MigrationPlan, type MigrationResult } from "./migrate-auto-memory.js";
60
+
61
+ // sqlite runtime probe (vec availability — visible degradation, never silent)
62
+ export { prepareSqliteRuntime, type SqliteRuntime } from "./sqlite-loader.js";
63
+
64
+ // logging
65
+ export { makeLogger, type Logger } from "./log.js";
package/src/indexer.ts ADDED
@@ -0,0 +1,323 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import type { CoreConfig } from "./config.js";
4
+ import type { CoreDb } from "./db.js";
5
+ import { deleteMissingDocuments, getStoredHash, getDocumentMeta, documentExists, upsertDocument, getChunksWithoutEmbeddings, storeChunkEmbeddings } from "./db.js";
6
+ import { embedTexts } from "./embedding.js";
7
+ import { parseMarkdown, wikilinkBasename } from "./parser.js";
8
+ import { hashContent, normalizeRelativePath, nowIso } from "./utils.js";
9
+
10
+ export async function indexAll(params: {
11
+ db: CoreDb;
12
+ config: CoreConfig;
13
+ logger: { info: (msg: string) => void; warn: (msg: string) => void; error: (msg: string) => void };
14
+ }): Promise<Map<string, string[]>> {
15
+ const { db, config, logger } = params;
16
+ const seenPaths = new Set<string>();
17
+ // title/basename → doc paths. A list, not a single path: two notes can share
18
+ // a basename (e.g. `Фаза — MVP` in two projects). The resolver treats >1 as
19
+ // ambiguous instead of silently picking the last writer.
20
+ const titleToPath = new Map<string, string[]>();
21
+
22
+ await scanRoot({
23
+ db,
24
+ basePath: config.vaultPath,
25
+ excludeFolders: new Set(config.excludeFolders),
26
+ config,
27
+ seenPaths,
28
+ logger,
29
+ titleToPath,
30
+ });
31
+
32
+ const deleted = deleteMissingDocuments(db, seenPaths);
33
+ if (deleted > 0) {
34
+ logger.info(`MergeMind: removed ${deleted} stale documents from index`);
35
+ }
36
+
37
+ // Resolve wikilinks: map note titles to actual file paths
38
+ resolveWikilinks(db, titleToPath);
39
+
40
+ // Embed chunks that don't have embeddings yet
41
+ if (config.embedding) {
42
+ await embedMissingChunks({ db, config, logger });
43
+ }
44
+
45
+ return titleToPath;
46
+ }
47
+
48
+ /**
49
+ * Register a title/basename → docPath association. List-valued so collisions
50
+ * (same basename in different folders) are detectable, not last-writer-wins.
51
+ * Exported because the watcher maintains the same map incrementally.
52
+ */
53
+ export function addTitlePath(
54
+ map: Map<string, string[]>,
55
+ key: string,
56
+ docPath: string,
57
+ ): void {
58
+ const arr = map.get(key);
59
+ if (!arr) {
60
+ map.set(key, [docPath]);
61
+ } else if (!arr.includes(docPath)) {
62
+ arr.push(docPath);
63
+ }
64
+ }
65
+
66
+ /**
67
+ * Resolve wikilink targets against the title→paths map.
68
+ *
69
+ * Path-aware (Audit #3): an author-written path is honoured exactly first; a
70
+ * bare basename resolves only when exactly one note has it — never the last
71
+ * indexed one. Unresolvable links are NOT silently dropped (Audit #5): they
72
+ * move to `unresolved_links` with a reason (`missing` | `ambiguous`) so the
73
+ * vault_map / nightly health-check can see vault rot. The map carries ALL
74
+ * indexed files including unchanged ones (Audit #1), so a link to a note that
75
+ * simply wasn't re-parsed this run still resolves instead of being dropped.
76
+ *
77
+ * Also self-heals: a previously-unresolved link whose target later appears is
78
+ * promoted back into `edges` on the next pass.
79
+ */
80
+ export function resolveWikilinks(
81
+ db: CoreDb,
82
+ titleToPath: Map<string, string[]>,
83
+ ): void {
84
+ type Res = { path: string } | { reason: "missing" | "ambiguous" };
85
+
86
+ const tryResolve = (raw: string): Res => {
87
+ const nfc = raw.normalize("NFC");
88
+ if (nfc.includes("/")) {
89
+ // Author wrote an explicit path — match it exactly. docPaths are stored
90
+ // NFD (normalizeRelativePath); links from content are usually NFC.
91
+ const withMd = /\.md$/i.test(nfc) ? nfc : `${nfc}.md`;
92
+ const cand = withMd.normalize("NFD");
93
+ if (documentExists(db, cand)) return { path: cand };
94
+ // Explicit path didn't hit — fall through to a strict basename try.
95
+ }
96
+ const base = wikilinkBasename(nfc).normalize("NFC");
97
+ const paths = titleToPath.get(base);
98
+ if (!paths || paths.length === 0) return { reason: "missing" };
99
+ if (paths.length > 1) return { reason: "ambiguous" };
100
+ return { path: paths[0] };
101
+ };
102
+
103
+ const edges = db
104
+ .prepare(
105
+ "SELECT rowid, source_path, target_path, context_snippet FROM edges",
106
+ )
107
+ .all() as Array<{
108
+ rowid: number;
109
+ source_path: string;
110
+ target_path: string;
111
+ context_snippet: string | null;
112
+ }>;
113
+ // OR IGNORE: two different links in one note can resolve to the same note
114
+ // ([[Foo]] and [[01_Знания/Foo]]); the second update would hit the
115
+ // (source,target) PK. On ignore (changes===0) it's a duplicate — drop it.
116
+ const updateEdge = db.prepare(
117
+ "UPDATE OR IGNORE edges SET target_path = ? WHERE rowid = ?",
118
+ );
119
+ const removeEdge = db.prepare("DELETE FROM edges WHERE rowid = ?");
120
+ const insertEdge = db.prepare(
121
+ "INSERT OR IGNORE INTO edges (source_path, target_path, context_snippet) VALUES (?, ?, ?)",
122
+ );
123
+ const upsertUnresolved = db.prepare(
124
+ "INSERT INTO unresolved_links (source_path, raw_target, reason, context_snippet) VALUES (?, ?, ?, ?) ON CONFLICT(source_path, raw_target) DO UPDATE SET reason = excluded.reason, context_snippet = excluded.context_snippet",
125
+ );
126
+ const updateUnresolvedReason = db.prepare(
127
+ "UPDATE unresolved_links SET reason = ? WHERE rowid = ?",
128
+ );
129
+ const removeUnresolved = db.prepare(
130
+ "DELETE FROM unresolved_links WHERE rowid = ?",
131
+ );
132
+
133
+ const tx = db.transaction(() => {
134
+ for (const edge of edges) {
135
+ if (edge.target_path.endsWith(".md")) continue; // already resolved
136
+ const r = tryResolve(edge.target_path);
137
+ if ("path" in r) {
138
+ const res = updateEdge.run(r.path, edge.rowid);
139
+ if (res.changes === 0) removeEdge.run(edge.rowid); // dup of existing edge
140
+ } else {
141
+ removeEdge.run(edge.rowid);
142
+ upsertUnresolved.run(
143
+ edge.source_path,
144
+ edge.target_path,
145
+ r.reason,
146
+ edge.context_snippet,
147
+ );
148
+ }
149
+ }
150
+
151
+ // Self-heal: retry every unresolved link — its target may exist now.
152
+ const unresolved = db
153
+ .prepare(
154
+ "SELECT rowid, source_path, raw_target, context_snippet, reason FROM unresolved_links",
155
+ )
156
+ .all() as Array<{
157
+ rowid: number;
158
+ source_path: string;
159
+ raw_target: string;
160
+ context_snippet: string | null;
161
+ reason: string;
162
+ }>;
163
+ for (const u of unresolved) {
164
+ const r = tryResolve(u.raw_target);
165
+ if ("path" in r) {
166
+ insertEdge.run(u.source_path, r.path, u.context_snippet);
167
+ removeUnresolved.run(u.rowid);
168
+ } else if (r.reason !== u.reason) {
169
+ updateUnresolvedReason.run(r.reason, u.rowid);
170
+ }
171
+ }
172
+ });
173
+ tx();
174
+ }
175
+
176
+ async function embedMissingChunks(params: {
177
+ db: CoreDb;
178
+ config: CoreConfig;
179
+ logger: { info: (msg: string) => void; warn: (msg: string) => void; error: (msg: string) => void };
180
+ }): Promise<void> {
181
+ const { db, config, logger } = params;
182
+ if (!config.embedding) return;
183
+
184
+ const batchSize = config.embedding.batchSize;
185
+ let total = 0;
186
+
187
+ while (true) {
188
+ const missing = getChunksWithoutEmbeddings(db, batchSize);
189
+ if (missing.length === 0) break;
190
+
191
+ const texts = missing.map((c) => c.chunkText);
192
+ const result = await embedTexts(texts, config.embedding!);
193
+
194
+ if (!result.vectors) {
195
+ logger.warn(
196
+ `MergeMind: embedding endpoint unavailable (${result.status}), skipping embedding`,
197
+ );
198
+ break;
199
+ }
200
+
201
+ const updates = missing.map((chunk, i) => ({
202
+ id: chunk.id,
203
+ embedding: Buffer.from(result.vectors![i].buffer),
204
+ }));
205
+
206
+ storeChunkEmbeddings(db, updates);
207
+ total += updates.length;
208
+ }
209
+
210
+ if (total > 0) {
211
+ logger.info(`MergeMind: embedded ${total} chunks`);
212
+ }
213
+ }
214
+
215
+ type ScanRootParams = {
216
+ db: CoreDb;
217
+ basePath: string;
218
+ excludeFolders: Set<string>;
219
+ config: CoreConfig;
220
+ seenPaths: Set<string>;
221
+ logger: { info: (msg: string) => void; warn: (msg: string) => void; error: (msg: string) => void };
222
+ titleToPath: Map<string, string[]>;
223
+ };
224
+
225
+ async function scanRoot(params: ScanRootParams): Promise<void> {
226
+ const { basePath, logger } = params;
227
+ try {
228
+ const stat = await fs.stat(basePath);
229
+ if (!stat.isDirectory()) {
230
+ logger.warn(`MergeMind: skip non-directory path ${basePath}`);
231
+ return;
232
+ }
233
+ } catch {
234
+ logger.warn(`MergeMind: path does not exist, skipping ${basePath}`);
235
+ return;
236
+ }
237
+
238
+ await walkDirectory(params, basePath);
239
+ }
240
+
241
+ async function walkDirectory(params: ScanRootParams, currentPath: string): Promise<void> {
242
+ const entries = await fs.readdir(currentPath, { withFileTypes: true });
243
+ for (const entry of entries) {
244
+ const fullPath = path.join(currentPath, entry.name);
245
+ if (entry.isDirectory()) {
246
+ if (params.excludeFolders.has(entry.name)) {
247
+ continue;
248
+ }
249
+ await walkDirectory(params, fullPath);
250
+ continue;
251
+ }
252
+
253
+ if (!entry.isFile() || !entry.name.endsWith(".md")) {
254
+ continue;
255
+ }
256
+
257
+ // One malformed frontmatter shouldn't kill the whole scan. Pre-split,
258
+ // server.ts caught at the top of indexAll and the writer continued
259
+ // with an empty titleToPath (degraded). Now that the writer is its own
260
+ // daemon, a single bad note crashing the scan would force a launchd
261
+ // restart loop — the bad note is still bad, so the daemon never
262
+ // stabilises. Log, skip, move on.
263
+ try {
264
+ await indexFile(params, fullPath);
265
+ } catch (err) {
266
+ params.logger.warn(
267
+ `MergeMind: skip ${fullPath} — ${err instanceof Error ? err.message : String(err)}`,
268
+ );
269
+ }
270
+ }
271
+ }
272
+
273
+ async function indexFile(params: ScanRootParams, fullPath: string): Promise<void> {
274
+ const { db, basePath, seenPaths, config, logger, titleToPath } = params;
275
+ const content = await fs.readFile(fullPath, "utf8");
276
+ const docPath = normalizeRelativePath(path.relative(basePath, fullPath));
277
+ seenPaths.add(docPath);
278
+
279
+ const contentHash = hashContent(content);
280
+ // NFC-normalize keys: paths from iCloud are NFD, wikilinks in content are NFC
281
+ const titleKey = path.basename(docPath, ".md").normalize("NFC");
282
+
283
+ if (getStoredHash(db, docPath) === contentHash) {
284
+ // Unchanged this run — but its title MUST still be registered. Otherwise a
285
+ // changed file linking [[ThisNote]] won't resolve and resolveWikilinks()
286
+ // silently deletes the edge: graph rot on every clean restart. The title
287
+ // comes from the stored row (no re-parse needed for an unchanged file).
288
+ addTitlePath(titleToPath, titleKey, docPath);
289
+ const storedTitle = getDocumentMeta(db, docPath)?.title?.normalize("NFC");
290
+ if (storedTitle && storedTitle !== titleKey) {
291
+ addTitlePath(titleToPath, storedTitle, docPath);
292
+ }
293
+ return;
294
+ }
295
+
296
+ const parsed = parseMarkdown(content, docPath, config.search.chunkSize, config.search.chunkOverlap, config.taxonomy);
297
+ upsertDocument(
298
+ db,
299
+ {
300
+ path: docPath,
301
+ title: parsed.title,
302
+ type: parsed.type,
303
+ status: parsed.status,
304
+ tags: parsed.tags,
305
+ contentHash,
306
+ frontmatter: parsed.frontmatter,
307
+ created: parsed.created,
308
+ updated: parsed.updated,
309
+ indexedAt: nowIso(),
310
+ },
311
+ parsed.chunks,
312
+ parsed.wikilinks,
313
+ );
314
+
315
+ // Register title → path mapping for wikilink resolution
316
+ addTitlePath(titleToPath, titleKey, docPath);
317
+ const titleNfc = parsed.title?.normalize("NFC");
318
+ if (titleNfc && titleNfc !== titleKey) {
319
+ addTitlePath(titleToPath, titleNfc, docPath);
320
+ }
321
+
322
+ logger.info(`MergeMind: indexed ${docPath}`);
323
+ }
package/src/log.ts ADDED
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Shared stderr logger.
3
+ *
4
+ * stdout is reserved for MCP JSON-RPC (server.ts only writes JSON frames
5
+ * there); everything diagnostic — both the MCP frontend and the writer
6
+ * daemon — funnels through here so a single grep on stderr captures the
7
+ * full picture. The `[iapeer-memory <kind>]` prefix lets log aggregators
8
+ * distinguish reader and writer processes when they share a tmux pane.
9
+ */
10
+
11
+ export type Logger = {
12
+ info: (msg: string) => void;
13
+ warn: (msg: string) => void;
14
+ error: (msg: string) => void;
15
+ };
16
+
17
+ export function makeLogger(kind: "mcp" | "memoryd"): Logger {
18
+ // `mcp` keeps a stable `[iapeer-memory]` prefix that downstream tooling
19
+ // (status skill, monitor scripts) greps for. New processes
20
+ // get an explicit kind so logs can be filtered.
21
+ const tag = kind === "mcp" ? "iapeer-memory" : `iapeer-memory ${kind}`;
22
+ return {
23
+ info: (msg) => process.stderr.write(`[${tag}] ${msg}\n`),
24
+ warn: (msg) => process.stderr.write(`[${tag} WARN] ${msg}\n`),
25
+ error: (msg) => process.stderr.write(`[${tag} ERROR] ${msg}\n`),
26
+ };
27
+ }