npm - @agfpd/iapeer-memory-core - Versions diffs - 0.1.1 - Mend

@agfpd/iapeer-memory-core 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/package.json +32 -0
package/src/config.ts +257 -0
package/src/context-render.ts +185 -0
package/src/db.ts +550 -0
package/src/embedding.ts +174 -0
package/src/fm-update.ts +352 -0
package/src/frontmatter-fill.ts +529 -0
package/src/graph.ts +427 -0
package/src/http-client.ts +129 -0
package/src/human-edit-detect.ts +213 -0
package/src/index-render.ts +876 -0
package/src/index.ts +65 -0
package/src/indexer.ts +323 -0
package/src/log.ts +27 -0
package/src/mcp-tools.ts +468 -0
package/src/memoryd.ts +680 -0
package/src/migrate-auto-memory.ts +289 -0
package/src/parser.ts +269 -0
package/src/permanent-detect.ts +110 -0
package/src/render-doctrine.ts +113 -0
package/src/reranker.ts +162 -0
package/src/search.ts +806 -0
package/src/smart-hash.ts +85 -0
package/src/sqlite-loader.ts +151 -0
package/src/tags-mirror.ts +47 -0
package/src/taxonomy.ts +385 -0
package/src/utils.ts +69 -0
package/tsconfig.json +24 -0

package/src/index.ts ADDED Viewed

@@ -0,0 +1,65 @@
+/**
+ * Public surface of `@agfpd/iapeer-memory-core` — the modules the package
+ * facade (CLI) and the adapters consume. Deliberately EXPLICIT, not
+ * `export *`: the barrel is the contract boundary between core and the
+ * distribution layer (docs/10-distribution.md); deep imports stay possible
+ * inside the workspace but everything the facade needs must be listed here
+ * (also avoids real symbol collisions, e.g. the two internal `atomicWrite`s).
+ */
+// config
+export { configFromEnv, type CoreConfig } from "./config.js";
+// taxonomy (ADR-002/011)
+export {
+  getTaxonomy,
+  isLocaleId,
+  defaultExcludeFolders,
+  DEFAULT_CURATOR_SET,
+  DEFAULT_RANKING,
+  type LocaleId,
+  type RankingConfig,
+  type TaxonomyPreset,
+} from "./taxonomy.js";
+// frontmatter: post-write fill + structural fm-update (CLI contract in module header)
+export { processFile, resolveAgentName, type ProcessOptions } from "./frontmatter-fill.js";
+export { fmUpdate, collectOps, yamlSafeScalar, type FmUpdateOptions, type Op } from "./fm-update.js";
+// author index rendering
+export { regenerateVaultIndex, fullIndexPathFor, type RenderContext } from "./index-render.js";
+// layer-5 fragments (ADR-001)
+export {
+  FRAGMENT_STEM,
+  peerFragmentsDir,
+  renderPeerFragment,
+  writeHostWideGuideFragment,
+  type FragmentEnv,
+} from "./context-render.js";
+// role doctrines + version marker (ADR-009/010)
+export {
+  renderDoctrine,
+  renderRoleDoctrines,
+  renderedVersion,
+  versionMarker,
+  type RenderOutcome,
+} from "./render-doctrine.js";
+// memoryd (ADR-004/012)
+export {
+  startMemoryd,
+  MEMORYD_SERVER_NAME,
+  type MemorydHandle,
+  type MemorydOptions,
+} from "./memoryd.js";
+// auto-memory migration (engine; sources are adapter-scoped)
+export { planMigration, applyMigration, type MigrationPlan, type MigrationResult } from "./migrate-auto-memory.js";
+// sqlite runtime probe (vec availability — visible degradation, never silent)
+export { prepareSqliteRuntime, type SqliteRuntime } from "./sqlite-loader.js";
+// logging
+export { makeLogger, type Logger } from "./log.js";

package/src/indexer.ts ADDED Viewed

@@ -0,0 +1,323 @@
+import fs from "node:fs/promises";
+import path from "node:path";
+import type { CoreConfig } from "./config.js";
+import type { CoreDb } from "./db.js";
+import { deleteMissingDocuments, getStoredHash, getDocumentMeta, documentExists, upsertDocument, getChunksWithoutEmbeddings, storeChunkEmbeddings } from "./db.js";
+import { embedTexts } from "./embedding.js";
+import { parseMarkdown, wikilinkBasename } from "./parser.js";
+import { hashContent, normalizeRelativePath, nowIso } from "./utils.js";
+export async function indexAll(params: {
+  db: CoreDb;
+  config: CoreConfig;
+  logger: { info: (msg: string) => void; warn: (msg: string) => void; error: (msg: string) => void };
+}): Promise<Map<string, string[]>> {
+  const { db, config, logger } = params;
+  const seenPaths = new Set<string>();
+  // title/basename → doc paths. A list, not a single path: two notes can share
+  // a basename (e.g. `Фаза — MVP` in two projects). The resolver treats >1 as
+  // ambiguous instead of silently picking the last writer.
+  const titleToPath = new Map<string, string[]>();
+  await scanRoot({
+    db,
+    basePath: config.vaultPath,
+    excludeFolders: new Set(config.excludeFolders),
+    config,
+    seenPaths,
+    logger,
+    titleToPath,
+  });
+  const deleted = deleteMissingDocuments(db, seenPaths);
+  if (deleted > 0) {
+    logger.info(`MergeMind: removed ${deleted} stale documents from index`);
+  }
+  // Resolve wikilinks: map note titles to actual file paths
+  resolveWikilinks(db, titleToPath);
+  // Embed chunks that don't have embeddings yet
+  if (config.embedding) {
+    await embedMissingChunks({ db, config, logger });
+  }
+  return titleToPath;
+}
+/**
+ * Register a title/basename → docPath association. List-valued so collisions
+ * (same basename in different folders) are detectable, not last-writer-wins.
+ * Exported because the watcher maintains the same map incrementally.
+ */
+export function addTitlePath(
+  map: Map<string, string[]>,
+  key: string,
+  docPath: string,
+): void {
+  const arr = map.get(key);
+  if (!arr) {
+    map.set(key, [docPath]);
+  } else if (!arr.includes(docPath)) {
+    arr.push(docPath);
+  }
+}
+/**
+ * Resolve wikilink targets against the title→paths map.
+ *
+ * Path-aware (Audit #3): an author-written path is honoured exactly first; a
+ * bare basename resolves only when exactly one note has it — never the last
+ * indexed one. Unresolvable links are NOT silently dropped (Audit #5): they
+ * move to `unresolved_links` with a reason (`missing` | `ambiguous`) so the
+ * vault_map / nightly health-check can see vault rot. The map carries ALL
+ * indexed files including unchanged ones (Audit #1), so a link to a note that
+ * simply wasn't re-parsed this run still resolves instead of being dropped.
+ *
+ * Also self-heals: a previously-unresolved link whose target later appears is
+ * promoted back into `edges` on the next pass.
+ */
+export function resolveWikilinks(
+  db: CoreDb,
+  titleToPath: Map<string, string[]>,
+): void {
+  type Res = { path: string } | { reason: "missing" | "ambiguous" };
+  const tryResolve = (raw: string): Res => {
+    const nfc = raw.normalize("NFC");
+    if (nfc.includes("/")) {
+      // Author wrote an explicit path — match it exactly. docPaths are stored
+      // NFD (normalizeRelativePath); links from content are usually NFC.
+      const withMd = /\.md$/i.test(nfc) ? nfc : `${nfc}.md`;
+      const cand = withMd.normalize("NFD");
+      if (documentExists(db, cand)) return { path: cand };
+      // Explicit path didn't hit — fall through to a strict basename try.
+    }
+    const base = wikilinkBasename(nfc).normalize("NFC");
+    const paths = titleToPath.get(base);
+    if (!paths || paths.length === 0) return { reason: "missing" };
+    if (paths.length > 1) return { reason: "ambiguous" };
+    return { path: paths[0] };
+  };
+  const edges = db
+    .prepare(
+      "SELECT rowid, source_path, target_path, context_snippet FROM edges",
+    )
+    .all() as Array<{
+      rowid: number;
+      source_path: string;
+      target_path: string;
+      context_snippet: string | null;
+    }>;
+  // OR IGNORE: two different links in one note can resolve to the same note
+  // ([[Foo]] and [[01_Знания/Foo]]); the second update would hit the
+  // (source,target) PK. On ignore (changes===0) it's a duplicate — drop it.
+  const updateEdge = db.prepare(
+    "UPDATE OR IGNORE edges SET target_path = ? WHERE rowid = ?",
+  );
+  const removeEdge = db.prepare("DELETE FROM edges WHERE rowid = ?");
+  const insertEdge = db.prepare(
+    "INSERT OR IGNORE INTO edges (source_path, target_path, context_snippet) VALUES (?, ?, ?)",
+  );
+  const upsertUnresolved = db.prepare(
+    "INSERT INTO unresolved_links (source_path, raw_target, reason, context_snippet) VALUES (?, ?, ?, ?) ON CONFLICT(source_path, raw_target) DO UPDATE SET reason = excluded.reason, context_snippet = excluded.context_snippet",
+  );
+  const updateUnresolvedReason = db.prepare(
+    "UPDATE unresolved_links SET reason = ? WHERE rowid = ?",
+  );
+  const removeUnresolved = db.prepare(
+    "DELETE FROM unresolved_links WHERE rowid = ?",
+  );
+  const tx = db.transaction(() => {
+    for (const edge of edges) {
+      if (edge.target_path.endsWith(".md")) continue; // already resolved
+      const r = tryResolve(edge.target_path);
+      if ("path" in r) {
+        const res = updateEdge.run(r.path, edge.rowid);
+        if (res.changes === 0) removeEdge.run(edge.rowid); // dup of existing edge
+      } else {
+        removeEdge.run(edge.rowid);
+        upsertUnresolved.run(
+          edge.source_path,
+          edge.target_path,
+          r.reason,
+          edge.context_snippet,
+        );
+      }
+    }
+    // Self-heal: retry every unresolved link — its target may exist now.
+    const unresolved = db
+      .prepare(
+        "SELECT rowid, source_path, raw_target, context_snippet, reason FROM unresolved_links",
+      )
+      .all() as Array<{
+        rowid: number;
+        source_path: string;
+        raw_target: string;
+        context_snippet: string | null;
+        reason: string;
+      }>;
+    for (const u of unresolved) {
+      const r = tryResolve(u.raw_target);
+      if ("path" in r) {
+        insertEdge.run(u.source_path, r.path, u.context_snippet);
+        removeUnresolved.run(u.rowid);
+      } else if (r.reason !== u.reason) {
+        updateUnresolvedReason.run(r.reason, u.rowid);
+      }
+    }
+  });
+  tx();
+}
+async function embedMissingChunks(params: {
+  db: CoreDb;
+  config: CoreConfig;
+  logger: { info: (msg: string) => void; warn: (msg: string) => void; error: (msg: string) => void };
+}): Promise<void> {
+  const { db, config, logger } = params;
+  if (!config.embedding) return;
+  const batchSize = config.embedding.batchSize;
+  let total = 0;
+  while (true) {
+    const missing = getChunksWithoutEmbeddings(db, batchSize);
+    if (missing.length === 0) break;
+    const texts = missing.map((c) => c.chunkText);
+    const result = await embedTexts(texts, config.embedding!);
+    if (!result.vectors) {
+      logger.warn(
+        `MergeMind: embedding endpoint unavailable (${result.status}), skipping embedding`,
+      );
+      break;
+    }
+    const updates = missing.map((chunk, i) => ({
+      id: chunk.id,
+      embedding: Buffer.from(result.vectors![i].buffer),
+    }));
+    storeChunkEmbeddings(db, updates);
+    total += updates.length;
+  }
+  if (total > 0) {
+    logger.info(`MergeMind: embedded ${total} chunks`);
+  }
+}
+type ScanRootParams = {
+  db: CoreDb;
+  basePath: string;
+  excludeFolders: Set<string>;
+  config: CoreConfig;
+  seenPaths: Set<string>;
+  logger: { info: (msg: string) => void; warn: (msg: string) => void; error: (msg: string) => void };
+  titleToPath: Map<string, string[]>;
+};
+async function scanRoot(params: ScanRootParams): Promise<void> {
+  const { basePath, logger } = params;
+  try {
+    const stat = await fs.stat(basePath);
+    if (!stat.isDirectory()) {
+      logger.warn(`MergeMind: skip non-directory path ${basePath}`);
+      return;
+    }
+  } catch {
+    logger.warn(`MergeMind: path does not exist, skipping ${basePath}`);
+    return;
+  }
+  await walkDirectory(params, basePath);
+}
+async function walkDirectory(params: ScanRootParams, currentPath: string): Promise<void> {
+  const entries = await fs.readdir(currentPath, { withFileTypes: true });
+  for (const entry of entries) {
+    const fullPath = path.join(currentPath, entry.name);
+    if (entry.isDirectory()) {
+      if (params.excludeFolders.has(entry.name)) {
+        continue;
+      }
+      await walkDirectory(params, fullPath);
+      continue;
+    }
+    if (!entry.isFile() || !entry.name.endsWith(".md")) {
+      continue;
+    }
+    // One malformed frontmatter shouldn't kill the whole scan. Pre-split,
+    // server.ts caught at the top of indexAll and the writer continued
+    // with an empty titleToPath (degraded). Now that the writer is its own
+    // daemon, a single bad note crashing the scan would force a launchd
+    // restart loop — the bad note is still bad, so the daemon never
+    // stabilises. Log, skip, move on.
+    try {
+      await indexFile(params, fullPath);
+    } catch (err) {
+      params.logger.warn(
+        `MergeMind: skip ${fullPath} — ${err instanceof Error ? err.message : String(err)}`,
+      );
+    }
+  }
+}
+async function indexFile(params: ScanRootParams, fullPath: string): Promise<void> {
+  const { db, basePath, seenPaths, config, logger, titleToPath } = params;
+  const content = await fs.readFile(fullPath, "utf8");
+  const docPath = normalizeRelativePath(path.relative(basePath, fullPath));
+  seenPaths.add(docPath);
+  const contentHash = hashContent(content);
+  // NFC-normalize keys: paths from iCloud are NFD, wikilinks in content are NFC
+  const titleKey = path.basename(docPath, ".md").normalize("NFC");
+  if (getStoredHash(db, docPath) === contentHash) {
+    // Unchanged this run — but its title MUST still be registered. Otherwise a
+    // changed file linking [[ThisNote]] won't resolve and resolveWikilinks()
+    // silently deletes the edge: graph rot on every clean restart. The title
+    // comes from the stored row (no re-parse needed for an unchanged file).
+    addTitlePath(titleToPath, titleKey, docPath);
+    const storedTitle = getDocumentMeta(db, docPath)?.title?.normalize("NFC");
+    if (storedTitle && storedTitle !== titleKey) {
+      addTitlePath(titleToPath, storedTitle, docPath);
+    }
+    return;
+  }
+  const parsed = parseMarkdown(content, docPath, config.search.chunkSize, config.search.chunkOverlap, config.taxonomy);
+  upsertDocument(
+    db,
+    {
+      path: docPath,
+      title: parsed.title,
+      type: parsed.type,
+      status: parsed.status,
+      tags: parsed.tags,
+      contentHash,
+      frontmatter: parsed.frontmatter,
+      created: parsed.created,
+      updated: parsed.updated,
+      indexedAt: nowIso(),
+    },
+    parsed.chunks,
+    parsed.wikilinks,
+  );
+  // Register title → path mapping for wikilink resolution
+  addTitlePath(titleToPath, titleKey, docPath);
+  const titleNfc = parsed.title?.normalize("NFC");
+  if (titleNfc && titleNfc !== titleKey) {
+    addTitlePath(titleToPath, titleNfc, docPath);
+  }
+  logger.info(`MergeMind: indexed ${docPath}`);
+}

package/src/log.ts ADDED Viewed

@@ -0,0 +1,27 @@
+/**
+ * Shared stderr logger.
+ *
+ * stdout is reserved for MCP JSON-RPC (server.ts only writes JSON frames
+ * there); everything diagnostic — both the MCP frontend and the writer
+ * daemon — funnels through here so a single grep on stderr captures the
+ * full picture. The `[iapeer-memory <kind>]` prefix lets log aggregators
+ * distinguish reader and writer processes when they share a tmux pane.
+ */
+export type Logger = {
+  info: (msg: string) => void;
+  warn: (msg: string) => void;
+  error: (msg: string) => void;
+};
+export function makeLogger(kind: "mcp" | "memoryd"): Logger {
+  // `mcp` keeps a stable `[iapeer-memory]` prefix that downstream tooling
+  // (status skill, monitor scripts) greps for. New processes
+  // get an explicit kind so logs can be filtered.
+  const tag = kind === "mcp" ? "iapeer-memory" : `iapeer-memory ${kind}`;
+  return {
+    info: (msg) => process.stderr.write(`[${tag}] ${msg}\n`),
+    warn: (msg) => process.stderr.write(`[${tag} WARN] ${msg}\n`),
+    error: (msg) => process.stderr.write(`[${tag} ERROR] ${msg}\n`),
+  };
+}