@agfpd/iapeer-memory-core 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,468 @@
1
+ /**
2
+ * MCP tool handlers — pure functions called from server.ts after Zod
3
+ * validation. Each returns the JSON payload that the server wraps into
4
+ * `content` + `structuredContent`.
5
+ */
6
+
7
+ import fs from "node:fs/promises";
8
+ import path from "node:path";
9
+ import type { CoreConfig } from "./config.js";
10
+ import type { CoreDb } from "./db.js";
11
+ import { getBacklinks, getDocumentMeta } from "./db.js";
12
+ import { parseMarkdown } from "./parser.js";
13
+ import { runVaultSearch } from "./search.js";
14
+ import { buildVaultMap } from "./graph.js";
15
+ import { normalizePath } from "./utils.js";
16
+ import { agentMemoryFolderMarker } from "./taxonomy.js";
17
+
18
+ // Защита от UF_DATALESS iCloud-файлов: чтение такого файла триггерит
19
+ // синхронный fetch с iCloud, без таймаута Node event loop замораживается
20
+ // на минуты. 30 секунд — большой запас, на нормальной сети iCloud
21
+ // укладывается за 5-15 секунд. При срабатывании пишем warn в stderr и
22
+ // возвращаем not-found, чтобы caller мог упасть на vault_search fallback.
23
+ const READ_TIMEOUT_MS = 30000;
24
+
25
+ function isAbortError(err: unknown): boolean {
26
+ if (!err || typeof err !== "object") return false;
27
+ const e = err as { name?: string; code?: string };
28
+ return e.name === "AbortError" || e.code === "ABORT_ERR";
29
+ }
30
+
31
+ // ---- vault_search ----
32
+
33
+ export async function runSearch(
34
+ db: CoreDb,
35
+ config: CoreConfig,
36
+ args: { query: string; forCuration?: boolean },
37
+ ): Promise<unknown> {
38
+ const out = await runVaultSearch({
39
+ db,
40
+ config,
41
+ query: args.query,
42
+ forCuration: args.forCuration ?? false,
43
+ });
44
+ return { query: args.query, results: out.results, pipeline: out.pipeline };
45
+ }
46
+
47
+ /**
48
+ * Public MCP tool surface (ADR-008): exactly three read-only tools.
49
+ * `vault_read` is deliberately NOT part of the surface — in-session reading
50
+ * is the harness's native Read (after vault_search the path is known), and
51
+ * backlinks are covered by vault_graph(depth=1, incoming). `runRead` below
52
+ * stays a LIBRARY function of core — for memoryd, the Index runtime, CLI and
53
+ * programmatic consumers outside harness sessions.
54
+ */
55
+ export const MCP_TOOL_SURFACE = ["vault_search", "vault_graph", "vault_map"] as const;
56
+
57
+ // ---- vault_read — library read function (NOT on the MCP surface, ADR-008) ----
58
+
59
+ /**
60
+ * Validate and resolve a user-supplied vault path before any disk access.
61
+ *
62
+ * vault_read is reachable from any agent that loads the MergeMind plugin,
63
+ * including ones whose context can be poisoned by inbox drafts (prompt
64
+ * injection). Without containment, an attacker-controlled `path` like
65
+ * `../../.ssh/id_rsa`, `../../.mergemind/env`, or `/etc/passwd` would
66
+ * exfiltrate arbitrary files the MCP process can read.
67
+ *
68
+ * Defence-in-depth here: enforce relative-only + .md suffix + no traversal
69
+ * segments, refuse paths inside `excludeFolders` (drafts/system), and finally
70
+ * resolve to an absolute path and assert it stays under the canonical vault
71
+ * root. Both sides are NFD-normalised because macOS stores filenames in NFD
72
+ * while user input is usually NFC, and a byte-wise startsWith would otherwise
73
+ * either fail-open (accept paths that look outside but aren't) or fail-closed
74
+ * (reject legitimate Cyrillic paths).
75
+ */
76
+ /**
77
+ * Validation outcome:
78
+ * - `{docPath, fullPath}` — path is well-formed and inside vault root.
79
+ * - `{notFound: true, reason}` — path is well-formed but lives in an
80
+ * excludeFolders area (drafts/system). The caller surfaces this as a
81
+ * payload-level not-found so excluded folders can't be probed for
82
+ * existence — same wording as the actual not-in-index case.
83
+ * - **throws** on malformed input (null byte, non-.md, absolute path,
84
+ * empty/./.. segments, vault-root escape). These are programmer/caller
85
+ * errors, not lookup misses, so they bubble up as MCP tool errors
86
+ * (`isError: true`) instead of leaking validation strings into the
87
+ * payload of an otherwise successful-looking response.
88
+ */
89
+ type ValidatedPath =
90
+ | { docPath: string; fullPath: string }
91
+ | { notFound: true; reason: string };
92
+
93
+ function validateVaultPath(
94
+ rawPath: string,
95
+ config: CoreConfig,
96
+ ): ValidatedPath {
97
+ if (!rawPath || rawPath.includes("\0")) {
98
+ throw new Error("Path is required");
99
+ }
100
+ const docPath = normalizePath(rawPath);
101
+
102
+ if (!docPath.toLowerCase().endsWith(".md")) {
103
+ throw new Error("Only .md files can be read via vault_read");
104
+ }
105
+ if (path.isAbsolute(docPath)) {
106
+ throw new Error("Path must be relative to the vault root");
107
+ }
108
+ const segments = docPath.split("/");
109
+ if (segments.some((s) => s === "" || s === "." || s === "..")) {
110
+ throw new Error("Path must not contain empty, '.' or '..' segments");
111
+ }
112
+
113
+ // Respect excludeFolders even for direct disk reads. Inbox drafts and
114
+ // system folders are intentionally hidden from search — leaking them
115
+ // through vault_read would defeat the privacy contract excludeFolders
116
+ // is supposed to provide. Same "Document not found" wording as the
117
+ // not-in-index branch so we don't reveal whether the path exists.
118
+ const firstSegment = segments[0] ?? "";
119
+ const excluded = config.excludeFolders.map((f) => f.normalize("NFD"));
120
+ if (excluded.includes(firstSegment)) {
121
+ return { notFound: true, reason: `Document not found: ${docPath}` };
122
+ }
123
+
124
+ const vaultRoot = path.resolve(config.vaultPath).normalize("NFD");
125
+ const fullPath = path.resolve(vaultRoot, docPath).normalize("NFD");
126
+ if (fullPath !== vaultRoot && !fullPath.startsWith(vaultRoot + path.sep)) {
127
+ throw new Error("Path escapes vault root");
128
+ }
129
+
130
+ return { docPath, fullPath };
131
+ }
132
+
133
+ export async function runRead(
134
+ db: CoreDb,
135
+ config: CoreConfig,
136
+ args: { path: string },
137
+ ): Promise<unknown> {
138
+ const guard = validateVaultPath(args.path, config);
139
+ // excludeFolders branch — surface as payload-level not-found, not isError.
140
+ // Agents legitimately probe paths; isError would imply caller error.
141
+ if ("notFound" in guard) {
142
+ return { found: false, error: guard.reason };
143
+ }
144
+ const { docPath, fullPath } = guard;
145
+ const meta = getDocumentMeta(db, docPath);
146
+
147
+ // Fallback path: the requested document is not in the index (typically
148
+ // because it lives in an excluded folder like `99_System/` or `00_Inbox/`,
149
+ // or the watcher hasn't picked it up yet). Read it directly from disk and
150
+ // parse it on the fly — without backlinks, since those depend on the index.
151
+ if (!meta) {
152
+ let text: string;
153
+ try {
154
+ text = await fs.readFile(fullPath, {
155
+ encoding: "utf8",
156
+ signal: AbortSignal.timeout(READ_TIMEOUT_MS),
157
+ });
158
+ } catch (err) {
159
+ if (isAbortError(err)) {
160
+ console.warn(`[mcp] vault_read timeout (${READ_TIMEOUT_MS}ms) on ${docPath} — likely iCloud UF_DATALESS`);
161
+ return { found: false, error: `Document read timeout (${READ_TIMEOUT_MS}ms): ${docPath}` };
162
+ }
163
+ return { found: false, error: `Document not found: ${docPath}` };
164
+ }
165
+ const parsed = parseMarkdown(
166
+ text,
167
+ docPath,
168
+ config.search.chunkSize,
169
+ config.search.chunkOverlap,
170
+ config.taxonomy,
171
+ );
172
+ return {
173
+ path: docPath,
174
+ text,
175
+ meta: {
176
+ title:
177
+ typeof parsed.frontmatter.title === "string"
178
+ ? parsed.frontmatter.title
179
+ : docPath,
180
+ type: parsed.type,
181
+ status: parsed.status,
182
+ tags: parsed.tags,
183
+ created: parsed.created,
184
+ updated: parsed.updated,
185
+ notIndexed: true,
186
+ },
187
+ wikilinks: parsed.wikilinks,
188
+ backlinks: [],
189
+ };
190
+ }
191
+
192
+ let text: string;
193
+ try {
194
+ text = await fs.readFile(fullPath, {
195
+ encoding: "utf8",
196
+ signal: AbortSignal.timeout(READ_TIMEOUT_MS),
197
+ });
198
+ } catch (err) {
199
+ if (isAbortError(err)) {
200
+ console.warn(`[mcp] vault_read timeout (${READ_TIMEOUT_MS}ms) on ${docPath} — likely iCloud UF_DATALESS`);
201
+ return { found: false, error: `Document read timeout (${READ_TIMEOUT_MS}ms): ${docPath}` };
202
+ }
203
+ // Race with deletion — file was indexed but is now gone. Surface as
204
+ // not-found rather than tool error so the caller can fall back to search.
205
+ return { found: false, error: `Document not found: ${docPath} (${String(err)})` };
206
+ }
207
+
208
+ const parsed = parseMarkdown(
209
+ text,
210
+ docPath,
211
+ config.search.chunkSize,
212
+ config.search.chunkOverlap,
213
+ config.taxonomy,
214
+ );
215
+
216
+ // We deliberately don't echo `parsed.frontmatter` here — `text` already
217
+ // contains the YAML frontmatter verbatim, so an agent that needs custom
218
+ // fields (e.g. заменено_на) can read them from `text` without us paying for
219
+ // a duplicated structured copy in every response. Standard fields stay in
220
+ // `meta` for ergonomic access.
221
+ return {
222
+ path: docPath,
223
+ text,
224
+ meta: {
225
+ title: meta.title,
226
+ type: meta.type,
227
+ status: meta.status,
228
+ tags: meta.tags,
229
+ created: meta.created,
230
+ updated: meta.updated,
231
+ },
232
+ wikilinks: parsed.wikilinks,
233
+ backlinks: getBacklinks(db, docPath),
234
+ };
235
+ }
236
+
237
+ // ---- vault_graph ----
238
+
239
+ // Oneway-фильтр графа: backlinks из `06_Оперативка_агентов/` **не**
240
+ // показываются при запросе графа канонической заметки — граф MergeMind не
241
+ // должен засоряться упоминаниями оперативки разных агентов. От оперативной
242
+ // заметки исходящие связи (на каноники) показываются как есть — автор
243
+ // должен видеть на что ссылается его оперативка. Парсер сохраняет все
244
+ // wikilinks в `edges` (для целостности графа), фильтрация только здесь.
245
+ // Подробности — `docs/03-operatives.md` раздел «Oneway-фильтр в графе».
246
+ function isAgentMemory(path: string, config: CoreConfig): boolean {
247
+ return path.includes(agentMemoryFolderMarker(config.taxonomy));
248
+ }
249
+
250
+ export function runGraph(
251
+ db: CoreDb,
252
+ config: CoreConfig,
253
+ args: { path: string; depth?: number },
254
+ ): unknown {
255
+ const depth = Math.min(Math.max(args.depth ?? 1, 1), 3);
256
+ const docPath = normalizePath(args.path);
257
+ const centerMeta = getDocumentMeta(db, docPath);
258
+
259
+ if (!centerMeta) {
260
+ return { found: false, error: `Document not found: ${docPath}` };
261
+ }
262
+
263
+ type Node = {
264
+ path: string;
265
+ title: string;
266
+ type: string | null;
267
+ status: string | null;
268
+ depth: number;
269
+ direction: string;
270
+ };
271
+
272
+ const nodes: Node[] = [];
273
+ const edges: Array<{ from: string; to: string }> = [];
274
+ const visited = new Set<string>([docPath]);
275
+ let frontier: string[] = [docPath];
276
+
277
+ const outgoingStmt = db.prepare(
278
+ "SELECT target_path as path FROM edges WHERE source_path = ?",
279
+ );
280
+ const incomingStmt = db.prepare(
281
+ "SELECT source_path as path FROM edges WHERE target_path = ?",
282
+ );
283
+
284
+ for (let d = 1; d <= depth; d++) {
285
+ const nextFrontier: string[] = [];
286
+
287
+ for (const current of frontier) {
288
+ const outgoing = outgoingStmt.all(current) as Array<{ path: string }>;
289
+ for (const row of outgoing) {
290
+ edges.push({ from: current, to: row.path });
291
+ if (!visited.has(row.path)) {
292
+ visited.add(row.path);
293
+ nextFrontier.push(row.path);
294
+ const meta = getDocumentMeta(db, row.path);
295
+ nodes.push({
296
+ path: row.path,
297
+ title: meta?.title ?? row.path,
298
+ type: meta?.type ?? null,
299
+ status: meta?.status ?? null,
300
+ depth: d,
301
+ direction: "outgoing",
302
+ });
303
+ }
304
+ }
305
+
306
+ // Backlinks из оперативки не показываются для vault-заметок — граф
307
+ // vault'а не засоряется упоминаниями памяти разных агентов. Для самой
308
+ // оперативной заметки фильтр не применяется (там backlinks обычно от
309
+ // Индекса или того же автора — релевантны).
310
+ const currentIsAgentMemory = isAgentMemory(current, config);
311
+ const incoming = incomingStmt.all(current) as Array<{ path: string }>;
312
+ for (const row of incoming) {
313
+ if (!currentIsAgentMemory && isAgentMemory(row.path, config)) {
314
+ continue;
315
+ }
316
+ edges.push({ from: row.path, to: current });
317
+ if (!visited.has(row.path)) {
318
+ visited.add(row.path);
319
+ nextFrontier.push(row.path);
320
+ const meta = getDocumentMeta(db, row.path);
321
+ nodes.push({
322
+ path: row.path,
323
+ title: meta?.title ?? row.path,
324
+ type: meta?.type ?? null,
325
+ status: meta?.status ?? null,
326
+ depth: d,
327
+ direction: "incoming",
328
+ });
329
+ }
330
+ }
331
+ }
332
+
333
+ frontier = nextFrontier;
334
+ if (frontier.length === 0) break;
335
+ }
336
+
337
+ const uniqueEdges = [
338
+ ...new Map(edges.map((e) => [`${e.from}→${e.to}`, e])).values(),
339
+ ];
340
+
341
+ return {
342
+ center: {
343
+ path: docPath,
344
+ title: centerMeta.title,
345
+ type: centerMeta.type,
346
+ },
347
+ nodes,
348
+ edges: uniqueEdges,
349
+ stats: {
350
+ totalNodes: nodes.length,
351
+ totalEdges: uniqueEdges.length,
352
+ depth,
353
+ },
354
+ };
355
+ }
356
+
357
+ // ---- vault_map ----
358
+
359
+ // Summary-mode caps. Full topology of a 300+ note vault crosses 25KB JSON
360
+ // before it reaches the agent — most of that is per-cluster node lists and
361
+ // the hubs tail. Summary mode keeps the shape but returns only the parts an
362
+ // agent uses to *navigate* the vault: cluster name+size+hub, top hubs, full
363
+ // bridges/orphans (already small).
364
+ const SUMMARY_TOP_NODES_PER_CLUSTER = 5;
365
+ const SUMMARY_TOP_HUBS = 20;
366
+
367
+ export type VaultMapPart =
368
+ | "clusters"
369
+ | "hubs"
370
+ | "bridges"
371
+ | "orphans"
372
+ | "orphan_wikilinks";
373
+ // orphan_wikilinks is opt-in — NOT in the default set. The count always rides
374
+ // in stats (data.stats.orphan_wikilinks) so a default call still signals
375
+ // "broken links exist, ask for the part".
376
+ const ALL_PARTS: VaultMapPart[] = ["clusters", "hubs", "bridges", "orphans"];
377
+
378
+ export function runMap(
379
+ db: CoreDb,
380
+ config: CoreConfig,
381
+ args: { detail?: "summary" | "full"; parts?: VaultMapPart[] } = {},
382
+ ): unknown {
383
+ const detail = args.detail === "full" ? "full" : "summary";
384
+ const requested = new Set<VaultMapPart>(
385
+ args.parts && args.parts.length > 0 ? args.parts : ALL_PARTS,
386
+ );
387
+
388
+ const data = buildVaultMap(db, config);
389
+
390
+ // Pre-compute degree map once so we can rank nodes inside each cluster
391
+ // without an N×getDocumentMeta sweep.
392
+ const degreeByPath = new Map<string, number>();
393
+ if (detail === "summary" && requested.has("clusters")) {
394
+ for (const h of data.hubs) {
395
+ degreeByPath.set(h.path, h.total);
396
+ }
397
+ }
398
+
399
+ // stats are always cheap and orient the agent — kept regardless of `parts`.
400
+ const out: Record<string, unknown> = {
401
+ generated: data.generated,
402
+ stats: data.stats,
403
+ detail,
404
+ parts: [...requested].sort(),
405
+ };
406
+
407
+ if (requested.has("clusters")) {
408
+ out.clusters = data.clusters.map((c) => {
409
+ const base = {
410
+ name: c.name,
411
+ size: c.nodes.length,
412
+ hub: c.hub ? { title: c.hub.title, degree: c.hub.degree } : null,
413
+ };
414
+
415
+ if (detail === "full") {
416
+ return {
417
+ ...base,
418
+ nodes: c.nodes.map(
419
+ (n) => n.split("/").pop()?.replace(/\.md$/, "") ?? n,
420
+ ),
421
+ };
422
+ }
423
+
424
+ // Summary: only the top N nodes by degree (hubs of this cluster). The
425
+ // hub itself is already in `base.hub` — drop duplicates.
426
+ const ranked = [...c.nodes]
427
+ .sort((a, b) => (degreeByPath.get(b) ?? 0) - (degreeByPath.get(a) ?? 0))
428
+ .filter((p) => p !== c.hub?.path)
429
+ .slice(0, SUMMARY_TOP_NODES_PER_CLUSTER)
430
+ .map((n) => n.split("/").pop()?.replace(/\.md$/, "") ?? n);
431
+
432
+ return { ...base, top_nodes: ranked };
433
+ });
434
+ }
435
+
436
+ if (requested.has("hubs")) {
437
+ const hubs =
438
+ detail === "full" ? data.hubs : data.hubs.slice(0, SUMMARY_TOP_HUBS);
439
+ out.hubs = hubs.map((h) => ({
440
+ title: h.title,
441
+ in: h.inDegree,
442
+ out: h.outDegree,
443
+ total: h.total,
444
+ }));
445
+ if (detail === "summary" && data.hubs.length > SUMMARY_TOP_HUBS) {
446
+ out.hubs_truncated = data.hubs.length - SUMMARY_TOP_HUBS;
447
+ }
448
+ }
449
+
450
+ if (requested.has("bridges")) {
451
+ out.bridges = data.bridges.map((b) => ({
452
+ title: b.title,
453
+ connects: b.connects,
454
+ }));
455
+ }
456
+
457
+ if (requested.has("orphans")) {
458
+ out.orphans = data.orphans.map(
459
+ (o) => o.split("/").pop()?.replace(/\.md$/, "") ?? o,
460
+ );
461
+ }
462
+
463
+ if (requested.has("orphan_wikilinks")) {
464
+ out.orphan_wikilinks = data.orphanWikilinks;
465
+ }
466
+
467
+ return out;
468
+ }