npm - nlm-memory - Versions diffs - 0.5.0 → 0.5.1 - Mend

nlm-memory 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (247) hide show

package/README.md +72 -34
package/dist/cli/nlm.js +2 -1
package/dist/cli/nlm.js.map +1 -1
package/dist/http/app.js +2 -1
package/dist/http/app.js.map +1 -1
package/dist/mcp/server.js +20 -1
package/dist/mcp/server.js.map +1 -1
package/dist/ui/assets/{index-C8cpwbYJ.css → index-Beo8psd-.css} +1 -1
package/dist/ui/assets/{index-CB50QnL-.js → index-CSPTTeeM.js} +8 -8
package/dist/ui/index.html +2 -2
package/package.json +26 -1
package/.agents/plugins/marketplace.json +0 -20
package/.github/workflows/ci.yml +0 -30
package/docs/methodology/re-derivation-rate.md +0 -112
package/docs/methodology/useful-hit-rate.md +0 -79
package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
package/docs/plans/desktop-product.md +0 -69
package/docs/plans/factstore-design.md +0 -236
package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1575
package/logs/CHANGELOG/CHANGELOG.md +0 -209
package/migrations/000_initial_schema.sql +0 -174
package/migrations/001_entity_type_rename.sql +0 -17
package/migrations/002_adapter_state_extend.sql +0 -12
package/migrations/003_session_embeddings.sql +0 -11
package/migrations/004_facts.sql +0 -46
package/migrations/005_sources.sql +0 -31
package/migrations/006_providers.sql +0 -33
package/migrations/007_source_tokens.sql +0 -17
package/migrations/008_fts_rebuild.sql +0 -9
package/migrations/009_session_embedding_chunks.sql +0 -46
package/migrations/010_sources_opencode.sql +0 -30
package/migrations/011_sources_hermes_agent.sql +0 -30
package/migrations/012_sources_aider.sql +0 -30
package/migrations/013_adapter_state_failure_count.sql +0 -12
package/migrations/014_sources_cursor.sql +0 -30
package/migrations/015_sources_windsurf.sql +0 -30
package/plugin-hermes-agent/README.md +0 -49
package/plugin-hermes-agent/__init__.py +0 -75
package/plugin-hermes-agent/plugin.yaml +0 -15
package/scripts/backfill-citations.mjs +0 -0
package/scripts/build-codex-plugin.mjs +0 -61
package/scripts/deepseek-probe.mjs +0 -67
package/scripts/extract-triples.mjs +0 -207
package/scripts/longmemeval/embedding-cache.ts +0 -77
package/scripts/longmemeval/fetch-dataset.sh +0 -25
package/scripts/longmemeval/run-harness.ts +0 -315
package/scripts/longmemeval/scorer.ts +0 -99
package/scripts/longmemeval/tsconfig.json +0 -9
package/scripts/longmemeval/types.ts +0 -35
package/scripts/nlm-daily-digest.py +0 -239
package/scripts/nlm-daily-digest.sh +0 -28
package/src/cli/classify-parity.ts +0 -257
package/src/cli/launchctl-helpers.ts +0 -49
package/src/cli/nlm.ts +0 -1078
package/src/core/actions/actions-log.ts +0 -118
package/src/core/actions/overlay.ts +0 -117
package/src/core/adapters/aider.ts +0 -205
package/src/core/adapters/claude-code.ts +0 -293
package/src/core/adapters/common.ts +0 -54
package/src/core/adapters/cursor.ts +0 -486
package/src/core/adapters/from-source.ts +0 -67
package/src/core/adapters/hermes-agent.ts +0 -240
package/src/core/adapters/hermes.ts +0 -277
package/src/core/adapters/jsonl-generic.ts +0 -208
package/src/core/adapters/opencode.ts +0 -281
package/src/core/adapters/pi.ts +0 -264
package/src/core/adapters/windsurf.ts +0 -386
package/src/core/classifier/prompt.ts +0 -200
package/src/core/dataset/build-dataset.ts +0 -463
package/src/core/embedding/chunk-body.ts +0 -76
package/src/core/embedding/embed-backfill.ts +0 -210
package/src/core/embedding/embed-normalize.ts +0 -135
package/src/core/facts/backfill-facts.ts +0 -254
package/src/core/facts/extract-facts.ts +0 -50
package/src/core/hook/citation-detect.ts +0 -124
package/src/core/hook/cite-memo.ts +0 -68
package/src/core/hook/claude-settings.ts +0 -187
package/src/core/hook/gate.ts +0 -25
package/src/core/hook/hook-log.ts +0 -41
package/src/core/hook/memo-sweep.ts +0 -164
package/src/core/hook/memo.ts +0 -67
package/src/core/hook/pointer-block.ts +0 -26
package/src/core/hook/select.ts +0 -32
package/src/core/hook/transcript.ts +0 -121
package/src/core/ingest/ingest-session.ts +0 -111
package/src/core/providers/provider-models.ts +0 -100
package/src/core/providers/provider-registry.ts +0 -196
package/src/core/recall/citation-log.ts +0 -108
package/src/core/recall/filter.ts +0 -27
package/src/core/recall/index.ts +0 -6
package/src/core/recall/match-fields.ts +0 -40
package/src/core/recall/query-log.ts +0 -149
package/src/core/recall/query-shape.ts +0 -66
package/src/core/recall/recall-service.ts +0 -320
package/src/core/recall/recent-log.ts +0 -59
package/src/core/recall/tokenize.ts +0 -18
package/src/core/recall/useful-scan.ts +0 -336
package/src/core/recall-facts/fact-query-log.ts +0 -150
package/src/core/recall-facts/fact-recall-service.ts +0 -327
package/src/core/scheduler/scan-once.ts +0 -142
package/src/core/scheduler/scheduler.ts +0 -225
package/src/core/sources/source-registry.ts +0 -278
package/src/core/storage/db-restore.ts +0 -133
package/src/core/storage/live-status.ts +0 -45
package/src/core/storage/migrate.ts +0 -72
package/src/core/storage/sqlite-fact-store.ts +0 -304
package/src/core/storage/sqlite-session-store.ts +0 -810
package/src/hook/hook-auth.ts +0 -18
package/src/hook/prompt-recall-hook.ts +0 -180
package/src/hook/session-end-hook.ts +0 -81
package/src/hook/session-start-hook.ts +0 -168
package/src/hook/stop-hook.ts +0 -239
package/src/http/app.ts +0 -1215
package/src/install/claude-code.ts +0 -128
package/src/install/codex.ts +0 -367
package/src/install/cursor.ts +0 -68
package/src/install/hermes-agent.ts +0 -76
package/src/install/hermes.ts +0 -78
package/src/install/nlm-dir-perms.ts +0 -55
package/src/install/ollama.ts +0 -284
package/src/install/setup.ts +0 -489
package/src/install/windsurf.ts +0 -68
package/src/llm/classifier-box.ts +0 -64
package/src/llm/deepseek-client.ts +0 -150
package/src/llm/env-autoload.ts +0 -55
package/src/llm/ollama-client.ts +0 -189
package/src/mcp/server.ts +0 -534
package/src/ports/fact-store.ts +0 -102
package/src/ports/llm-client.ts +0 -52
package/src/ports/logger.ts +0 -16
package/src/ports/session-store.ts +0 -45
package/src/ports/transcript-adapter.ts +0 -55
package/src/shared/types.ts +0 -149
package/src/ui/App.tsx +0 -58
package/src/ui/components/PromoteOpenButton.tsx +0 -65
package/src/ui/components/SessionDrawer.tsx +0 -199
package/src/ui/components/SideNav.tsx +0 -162
package/src/ui/components/Skeleton.tsx +0 -107
package/src/ui/index.html +0 -13
package/src/ui/lib/actions.ts +0 -30
package/src/ui/lib/api.ts +0 -92
package/src/ui/lib/dataset.ts +0 -141
package/src/ui/lib/registries.ts +0 -155
package/src/ui/lib/view-settings.ts +0 -41
package/src/ui/main.tsx +0 -15
package/src/ui/pages/Live.tsx +0 -229
package/src/ui/pages/Pulse.tsx +0 -415
package/src/ui/pages/Recall.tsx +0 -190
package/src/ui/pages/River.tsx +0 -354
package/src/ui/pages/Search.tsx +0 -386
package/src/ui/pages/Stub.tsx +0 -9
package/src/ui/pages/Thread.tsx +0 -473
package/src/ui/pages/settings/Classifier.tsx +0 -227
package/src/ui/pages/settings/Data.tsx +0 -190
package/src/ui/pages/settings/Index.tsx +0 -65
package/src/ui/pages/settings/Labels.tsx +0 -224
package/src/ui/pages/settings/Providers.tsx +0 -305
package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
package/src/ui/pages/settings/Sources.tsx +0 -326
package/src/ui/pages/settings/Views.tsx +0 -96
package/src/ui/styles.css +0 -1890
package/src/ui/tsconfig.json +0 -21
package/src/ui/vite.config.ts +0 -19
package/tests/fixtures/claude_code/short_session.jsonl +0 -2
package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
package/tests/fixtures/facts.ts +0 -17
package/tests/fixtures/golden-corpus.ts +0 -85
package/tests/fixtures/hermes/paired_request_dump.json +0 -24
package/tests/fixtures/hermes/paired_session.json +0 -23
package/tests/fixtures/hermes/request_dump.json +0 -28
package/tests/fixtures/hermes/session_iso.json +0 -38
package/tests/fixtures/hermes/session_unix.json +0 -38
package/tests/fixtures/hermes/system_only.json +0 -18
package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
package/tests/fixtures/pi/short-successful.jsonl +0 -5
package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
package/tests/fixtures/sessions.ts +0 -22
package/tests/integration/backfill-facts.test.ts +0 -362
package/tests/integration/citation-explicit.test.ts +0 -111
package/tests/integration/cite-event.test.ts +0 -169
package/tests/integration/cite-memo.test.ts +0 -87
package/tests/integration/db-restore.test.ts +0 -153
package/tests/integration/embed-backfill.test.ts +0 -176
package/tests/integration/fact-supersedence.test.ts +0 -313
package/tests/integration/fts-index.test.ts +0 -60
package/tests/integration/getbyids-sqlite.test.ts +0 -100
package/tests/integration/hermes-agent-hooks.test.ts +0 -248
package/tests/integration/hook-claude-settings.test.ts +0 -218
package/tests/integration/hook-log.test.ts +0 -54
package/tests/integration/hook-memo.test.ts +0 -68
package/tests/integration/hook-pre-compact.test.ts +0 -105
package/tests/integration/hook-subagent-start.test.ts +0 -102
package/tests/integration/http.test.ts +0 -401
package/tests/integration/keyword-search-fts.test.ts +0 -66
package/tests/integration/mcp-recall-logging.test.ts +0 -88
package/tests/integration/mcp.test.ts +0 -260
package/tests/integration/memo-sweep.test.ts +0 -91
package/tests/integration/prompt-recall-hook.test.ts +0 -88
package/tests/integration/provider-registry.test.ts +0 -107
package/tests/integration/recall-golden.test.ts +0 -59
package/tests/integration/recall-sqlite.test.ts +0 -169
package/tests/integration/scheduler.test.ts +0 -391
package/tests/integration/session-end-hook.test.ts +0 -48
package/tests/integration/session-start-hook.test.ts +0 -126
package/tests/integration/source-registry.test.ts +0 -122
package/tests/integration/sqlite-fact-store.test.ts +0 -346
package/tests/integration/stop-hook.test.ts +0 -560
package/tests/integration/wal-checkpoint.test.ts +0 -49
package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
package/tests/unit/core/adapters/aider.test.ts +0 -230
package/tests/unit/core/adapters/claude-code.test.ts +0 -118
package/tests/unit/core/adapters/cursor.test.ts +0 -485
package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
package/tests/unit/core/adapters/hermes.test.ts +0 -81
package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
package/tests/unit/core/adapters/opencode.test.ts +0 -354
package/tests/unit/core/adapters/pi.test.ts +0 -110
package/tests/unit/core/adapters/windsurf.test.ts +0 -416
package/tests/unit/core/classifier/prompt.test.ts +0 -126
package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
package/tests/unit/core/facts/extract-facts.test.ts +0 -117
package/tests/unit/core/filter.test.ts +0 -40
package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
package/tests/unit/core/hook/citation-detect.test.ts +0 -124
package/tests/unit/core/hook/gate.test.ts +0 -29
package/tests/unit/core/hook/pointer-block.test.ts +0 -22
package/tests/unit/core/hook/select.test.ts +0 -66
package/tests/unit/core/match-fields.test.ts +0 -39
package/tests/unit/core/mcp-cite-session.test.ts +0 -51
package/tests/unit/core/providers/provider-models.test.ts +0 -101
package/tests/unit/core/query-shape.test.ts +0 -92
package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
package/tests/unit/core/recall-service.test.ts +0 -200
package/tests/unit/core/storage/live-status.test.ts +0 -54
package/tests/unit/core/tokenize.test.ts +0 -32
package/tests/unit/core/useful-scan.test.ts +0 -537
package/tests/unit/llm/embed.test.ts +0 -93
package/tests/unit/llm/ollama-client.test.ts +0 -124
package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
package/tsconfig.json +0 -31
package/tsconfig.test.json +0 -11
package/vitest.config.ts +0 -22

package/src/core/embedding/chunk-body.ts DELETED Viewed

@@ -1,76 +0,0 @@
-/**
- * chunk-body — split a session body into ≤MAX_CHUNK_CHARS-char windows
- * for the chunk + max-pool semantic index. Header (label + summary) is
- * prepended to the first chunk so it's always part of the index without
- * inflating later chunk sizes.
- *
- * MAX_CHUNK_CHARS sized for nomic-embed-text's 2048-token context. Char
- * density varies by content: prose ~4 chars/token, code/tool-output ~3
- * chars/token. The 2026-05-26 backfill bisect found the cliff at ~6,388
- * chars for token-dense Claude Code session bodies — 5,500 holds a safe
- * margin and eliminates the "input exceeds context length" 500s that
- * drove ~76% per-chunk rejection at 7,500. See 2026-05-26 CHANGELOG.
- *
- * OVERLAP_CHARS preserves context across boundaries so a phrase split
- * mid-chunk still appears intact in one neighboring chunk.
- *
- * Pure function. No I/O, no allocations beyond the returned array.
- */
-export const MAX_CHUNK_CHARS = 5_500;
-export const OVERLAP_CHARS = 500;
-export interface ChunkInput {
-  readonly label?: string | null;
-  readonly summary?: string | null;
-  readonly body?: string | null;
-}
-export interface ChunkOptions {
-  readonly maxChars?: number;
-  readonly overlap?: number;
-}
-export function chunkSessionText(
-  input: ChunkInput,
-  opts: ChunkOptions = {},
-): string[] {
-  const maxChars = opts.maxChars ?? MAX_CHUNK_CHARS;
-  const overlap = opts.overlap ?? OVERLAP_CHARS;
-  if (maxChars <= 0) throw new Error("chunkSessionText: maxChars must be > 0");
-  if (overlap < 0 || overlap >= maxChars) {
-    throw new Error("chunkSessionText: overlap must satisfy 0 <= overlap < maxChars");
-  }
-  const header = [input.label ?? "", input.summary ?? ""]
-    .map((s) => s.trim())
-    .filter((s) => s.length > 0)
-    .join(" ");
-  const body = (input.body ?? "").trim();
-  if (!header && !body) return [];
-  if (!body) return [header];
-  // First chunk: header + as much body as fits.
-  const headerPrefix = header ? header + " " : "";
-  const firstBodyBudget = Math.max(1, maxChars - headerPrefix.length);
-  if (body.length <= firstBodyBudget) {
-    return [(headerPrefix + body).trim()];
-  }
-  const chunks: string[] = [];
-  chunks.push((headerPrefix + body.slice(0, firstBodyBudget)).trim());
-  // Subsequent chunks: body windows with overlap.
-  const step = maxChars - overlap;
-  let pos = Math.max(0, firstBodyBudget - overlap);
-  while (pos < body.length) {
-    const end = Math.min(pos + maxChars, body.length);
-    const slice = body.slice(pos, end).trim();
-    if (slice.length > 0) chunks.push(slice);
-    if (end >= body.length) break;
-    pos += step;
-  }
-  return chunks;
-}

package/src/core/embedding/embed-backfill.ts DELETED Viewed

@@ -1,210 +0,0 @@
-/**
- * embed-backfill — re-embed every session in canonical.sqlite into the
- * chunk + max-pool index (session_embedding_chunks). Replaces the prior
- * one-vector-per-session backfill that wrote to session_embeddings.
- *
- * For each session: chunk (label + summary + body) via chunkSessionText,
- * embed each chunk with kind="document", and write to the chunk table +
- * session_chunk_map via the same INSERT pair used by the live ingest path.
- *
- * Resumable via a JSON state file at $NLM_EMBED_STATE (default
- * ~/.nlm/embed_reembed.state). Interrupting + rerunning skips already-done
- * session ids. A session is considered "done" only when ALL its chunks
- * embed successfully — partial sessions are retried on the next run.
- *
- * Layering: depends on the LLMClient port. SQLite touched directly via
- * better-sqlite3 because this is a one-shot operational tool, not a hot
- * path. Lives under core/ but is invoked from the CLI composition root.
- */
-import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
-import { dirname, join } from "node:path";
-import { homedir } from "node:os";
-import Database from "better-sqlite3";
-import * as sqliteVec from "sqlite-vec";
-import type { LLMClient } from "@ports/llm-client.js";
-import { LLMUnreachableError } from "@ports/llm-client.js";
-import { chunkSessionText } from "@core/embedding/chunk-body.js";
-const DEFAULT_STATE_PATH = join(homedir(), ".nlm", "embed_reembed.state");
-const SAVE_EVERY = 25;
-export interface BackfillOptions {
-  readonly dbPath: string;
-  readonly embedder: LLMClient;
-  readonly statePath?: string;
-  readonly limit?: number;
-  readonly onProgress?: (i: number, total: number, sid: string, status: string) => void;
-}
-export interface BackfillReport {
-  readonly total: number;
-  readonly processed: number;
-  readonly succeeded: number;
-  readonly failed: number;
-  readonly skippedAlreadyDone: number;
-  readonly dbMissing: boolean;
-}
-interface SessionRow {
-  id: string;
-  label: string | null;
-  summary: string | null;
-  body: string | null;
-}
-function loadState(path: string): Set<string> {
-  if (!existsSync(path)) return new Set();
-  try {
-    const data = JSON.parse(readFileSync(path, "utf8")) as { done?: string[] };
-    return new Set(data.done ?? []);
-  } catch {
-    return new Set();
-  }
-}
-function saveState(path: string, done: Set<string>): void {
-  mkdirSync(dirname(path), { recursive: true });
-  writeFileSync(path, JSON.stringify({ done: [...done].sort() }));
-}
-export async function reembedCorpus(opts: BackfillOptions): Promise<BackfillReport> {
-  const statePath = opts.statePath ?? DEFAULT_STATE_PATH;
-  if (!existsSync(opts.dbPath)) {
-    return { total: 0, processed: 0, succeeded: 0, failed: 0, skippedAlreadyDone: 0, dbMissing: true };
-  }
-  const db = new Database(opts.dbPath);
-  sqliteVec.load(db);
-  // Backfill every session with content; live ingest covers ongoing writes.
-  // The state file dedupes across runs so partial completion resumes cleanly.
-  const sql =
-    "SELECT s.id, s.label, s.summary, s.body FROM sessions s " +
-    "WHERE s.body IS NOT NULL OR s.summary IS NOT NULL OR s.label IS NOT NULL " +
-    "ORDER BY s.started_at" +
-    (opts.limit ? ` LIMIT ${Math.trunc(opts.limit)}` : "");
-  const rows = db.prepare<[], SessionRow>(sql).all();
-  const total = rows.length;
-  const done = loadState(statePath);
-  const selectChunks = db.prepare<[string], { chunk_id: number }>(
-    "SELECT chunk_id FROM session_chunk_map WHERE session_id = ?",
-  );
-  const delChunks = (sessionId: string): void => {
-    const existing = selectChunks.all(sessionId);
-    if (existing.length === 0) return;
-    const placeholders = existing.map(() => "?").join(",");
-    const ids = existing.map((r) => r.chunk_id);
-    db.prepare(
-      `DELETE FROM session_embedding_chunks WHERE chunk_id IN (${placeholders})`,
-    ).run(...ids);
-    db.prepare("DELETE FROM session_chunk_map WHERE session_id = ?").run(sessionId);
-  };
-  const insChunk = db.prepare(
-    "INSERT INTO session_embedding_chunks (embedding, session_id, chunk_idx) VALUES (?, ?, ?)",
-  );
-  const insMap = db.prepare(
-    "INSERT INTO session_chunk_map (chunk_id, session_id, chunk_idx) VALUES (?, ?, ?)",
-  );
-  let succeeded = 0;
-  let failed = 0;
-  let skipped = 0;
-  try {
-    for (let i = 0; i < rows.length; i++) {
-      const row = rows[i]!;
-      const idx = i + 1;
-      if (done.has(row.id)) {
-        skipped += 1;
-        continue;
-      }
-      const chunks = chunkSessionText({
-        label: row.label,
-        summary: row.summary,
-        body: row.body,
-      });
-      if (chunks.length === 0) {
-        failed += 1;
-        opts.onProgress?.(idx, total, row.id, "SKIP (no text)");
-        continue;
-      }
-      // Per-chunk failure tolerance matches live ingest: one chunk hitting
-      // the Ollama edge-cliff 500 must not zero out an entire session's
-      // coverage. Single retry on LLMUnreachableError catches transient
-      // failures; persistent ones are dropped. Session is "done" if any
-      // chunk landed — partial max-pool coverage beats none.
-      const vectors: { idx: number; vec: Float32Array }[] = [];
-      let chunkSkipped = 0;
-      for (let c = 0; c < chunks.length; c++) {
-        const chunk = chunks[c]!;
-        let lastErr: unknown;
-        for (let attempt = 0; attempt < 2; attempt++) {
-          try {
-            const out = await opts.embedder.embed(chunk, "document");
-            vectors.push({ idx: c, vec: out.vector });
-            lastErr = undefined;
-            break;
-          } catch (e) {
-            lastErr = e;
-            if (!(e instanceof LLMUnreachableError)) throw e;
-            if (attempt === 0) await new Promise((r) => setTimeout(r, 200));
-          }
-        }
-        if (lastErr !== undefined) chunkSkipped += 1;
-      }
-      if (vectors.length === 0) {
-        failed += 1;
-        opts.onProgress?.(idx, total, row.id, `FAIL (embedder, ${chunkSkipped}/${chunks.length} chunks)`);
-        continue;
-      }
-      try {
-        delChunks(row.id);
-        for (const { idx: cidx, vec } of vectors) {
-          const blob = Buffer.from(vec.buffer, vec.byteOffset, vec.byteLength);
-          // BigInt cast so vec0's aux chunk_idx column receives an INTEGER.
-          const info = insChunk.run(blob, row.id, BigInt(cidx));
-          insMap.run(Number(info.lastInsertRowid), row.id, cidx);
-        }
-      } catch (e) {
-        failed += 1;
-        opts.onProgress?.(idx, total, row.id, `FAIL (db): ${(e as Error).message}`);
-        continue;
-      }
-      done.add(row.id);
-      succeeded += 1;
-      const status =
-        chunkSkipped === 0
-          ? `OK (${vectors.length} chunks)`
-          : `PARTIAL (${vectors.length}/${chunks.length} chunks, ${chunkSkipped} skipped)`;
-      opts.onProgress?.(idx, total, row.id, status);
-      if (succeeded % SAVE_EVERY === 0) saveState(statePath, done);
-    }
-    saveState(statePath, done);
-  } finally {
-    db.close();
-  }
-  return {
-    total,
-    processed: succeeded + failed + skipped,
-    succeeded,
-    failed,
-    skippedAlreadyDone: skipped,
-    dbMissing: false,
-  };
-}
-export function clearBackfillState(statePath: string = DEFAULT_STATE_PATH): void {
-  if (existsSync(statePath)) {
-    const { unlinkSync } = require("node:fs") as typeof import("node:fs");
-    unlinkSync(statePath);
-  }
-}

package/src/core/embedding/embed-normalize.ts DELETED Viewed

@@ -1,135 +0,0 @@
-/**
- * embed-normalize — one-shot migration: L2-normalize every row in
- * session_embeddings. Ports `embed_normalize.py`.
- *
- * vec0 with implicit L2 distance ranks correctly by cosine similarity
- * only when stored vectors are unit-length. New writes (post-this-fix)
- * are normalized at source by OllamaClient.embed; this module brings
- * existing rows to the same invariant.
- *
- * Idempotent: re-running on already-normalized vectors is a no-op
- * within float tolerance (EPS = 1e-3). Each row is rewritten in its
- * own transaction so interrupts are safe.
- */
-import { existsSync } from "node:fs";
-import Database from "better-sqlite3";
-import * as sqliteVec from "sqlite-vec";
-const EPS = 1e-3;
-const DEFAULT_DIM = 768;
-const DEFAULT_BATCH = 100;
-export interface NormalizeOptions {
-  readonly dbPath: string;
-  readonly dim?: number;
-  readonly batchSize?: number;
-  readonly dryRun?: boolean;
-}
-export interface NormalizeReport {
-  readonly total: number;
-  readonly alreadyNormalized: number;
-  readonly rewritten: number;
-  readonly zeroVector: number;
-  readonly dbMissing: boolean;
-  readonly dryRun: boolean;
-}
-interface EmbeddingRow {
-  session_id: string;
-  embedding: Buffer;
-}
-interface IdRow {
-  session_id: string;
-}
-function bytesToFloats(buf: Buffer, dim: number): Float32Array {
-  if (buf.byteLength !== dim * 4) {
-    throw new Error(`expected ${dim * 4} bytes, got ${buf.byteLength}`);
-  }
-  return new Float32Array(buf.buffer, buf.byteOffset, dim);
-}
-function floatsToBytes(vec: Float32Array): Buffer {
-  return Buffer.from(vec.buffer, vec.byteOffset, vec.byteLength);
-}
-export function normalizeEmbeddings(opts: NormalizeOptions): NormalizeReport {
-  const dim = opts.dim ?? DEFAULT_DIM;
-  const batchSize = opts.batchSize ?? DEFAULT_BATCH;
-  const dryRun = opts.dryRun ?? false;
-  if (!existsSync(opts.dbPath)) {
-    return { total: 0, alreadyNormalized: 0, rewritten: 0, zeroVector: 0, dbMissing: true, dryRun };
-  }
-  const db = new Database(opts.dbPath);
-  sqliteVec.load(db);
-  let total = 0;
-  let alreadyNormalized = 0;
-  let rewritten = 0;
-  let zeroVector = 0;
-  try {
-    const ids = db
-      .prepare<[], IdRow>("SELECT session_id FROM session_embeddings")
-      .all()
-      .map((r) => r.session_id);
-    total = ids.length;
-    const sel = db.prepare<[string], EmbeddingRow>(
-      "SELECT session_id, embedding FROM session_embeddings WHERE session_id = ?",
-    );
-    const del = db.prepare("DELETE FROM session_embeddings WHERE session_id = ?");
-    const ins = db.prepare("INSERT INTO session_embeddings (session_id, embedding) VALUES (?, ?)");
-    for (let start = 0; start < total; start += batchSize) {
-      const batch = ids.slice(start, start + batchSize);
-      for (const sid of batch) {
-        const row = sel.get(sid);
-        if (!row) continue;
-        const vec = bytesToFloats(row.embedding, dim);
-        let sumSq = 0;
-        for (let i = 0; i < dim; i++) {
-          const v = vec[i] ?? 0;
-          sumSq += v * v;
-        }
-        if (sumSq === 0) {
-          zeroVector += 1;
-          continue;
-        }
-        const norm = Math.sqrt(sumSq);
-        if (Math.abs(norm - 1) <= EPS) {
-          alreadyNormalized += 1;
-          continue;
-        }
-        if (dryRun) {
-          rewritten += 1;
-          continue;
-        }
-        const normalized = new Float32Array(dim);
-        for (let i = 0; i < dim; i++) {
-          normalized[i] = (vec[i] ?? 0) / norm;
-        }
-        del.run(sid);
-        ins.run(sid, floatsToBytes(normalized));
-        rewritten += 1;
-      }
-    }
-  } finally {
-    db.close();
-  }
-  return {
-    total,
-    alreadyNormalized,
-    rewritten,
-    zeroVector,
-    dbMissing: false,
-    dryRun,
-  };
-}

package/src/core/facts/backfill-facts.ts DELETED Viewed

@@ -1,254 +0,0 @@
-/**
- * backfill-facts — one-shot population of the FactStore from the existing
- * session corpus. Phase B.5, see docs/plans/factstore-design.md Section 7.
- *
- * For each session in `sessions` that has no facts yet (and was started
- * before the script's start timestamp, to avoid racing with live ingest),
- * runs the classifier over its body, extracts facts, and writes them via
- * SqliteSessionStore.insertFactsForSession.
- *
- * Resumable via a JSON state file (mirrors core/embedding/embed-backfill).
- * Interrupting and rerunning skips already-processed sessions. State path
- * defaults to ~/.nlm/backfill_facts.state.
- *
- * Layering: depends on the LLMClient + FactStore ports through the
- * SqliteSessionStore + SqliteFactStore composition. Lives under core/ but
- * is invoked from the CLI composition root, like embed-backfill.
- */
-import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
-import { dirname, join } from "node:path";
-import { homedir } from "node:os";
-import { extractFacts } from "@core/facts/extract-facts.js";
-import type { SqliteFactStore } from "@core/storage/sqlite-fact-store.js";
-import type { SqliteSessionStore } from "@core/storage/sqlite-session-store.js";
-import type { LLMClient } from "@ports/llm-client.js";
-import { LLMUnreachableError } from "@ports/llm-client.js";
-const DEFAULT_STATE_PATH = join(homedir(), ".nlm", "backfill_facts.state");
-const SAVE_EVERY = 25;
-export interface BackfillFactsOptions {
-  readonly store: SqliteSessionStore;
-  readonly factStore: SqliteFactStore;
-  readonly classifier: LLMClient;
-  /** Optional embedder. When omitted, facts are written without semantic vectors. */
-  readonly embedder?: LLMClient | null;
-  readonly statePath?: string;
-  /** Cap on sessions processed this run. Default: all eligible. */
-  readonly limit?: number;
-  /**
-   * Resume from a specific session id. When set, sessions with id
-   * lexicographically <= this value are skipped on top of the state file's
-   * done set. Useful when the state file is lost but the operator
-   * remembers the last successful id.
-   */
-  readonly from?: string;
-  /** Don't write — just count what would happen. */
-  readonly dryRun?: boolean;
-  /**
-   * Re-process sessions that already have facts. Default: false (skip).
-   * Use when iterating the classifier prompt to refresh the corpus.
-   */
-  readonly reprocess?: boolean;
-  readonly onProgress?: (
-    i: number,
-    total: number,
-    sessionId: string,
-    status: BackfillStatus,
-    details?: string,
-  ) => void;
-}
-export type BackfillStatus =
-  | "ok"
-  | "skipped_done"
-  | "skipped_existing_facts"
-  | "skipped_no_body"
-  | "skipped_low_confidence"
-  | "classify_failed"
-  | "storage_failed";
-export interface BackfillFactsReport {
-  readonly total: number;
-  readonly processed: number;
-  readonly factsWritten: number;
-  readonly skippedAlreadyDone: number;
-  readonly skippedExistingFacts: number;
-  readonly skippedNoBody: number;
-  readonly skippedLowConfidence: number;
-  readonly classifyFailures: number;
-  readonly storageFailures: number;
-}
-interface CandidateRow {
-  id: string;
-  started_at: string;
-  body: string | null;
-}
-function loadState(path: string): Set<string> {
-  if (!existsSync(path)) return new Set();
-  try {
-    const data = JSON.parse(readFileSync(path, "utf8")) as { done?: string[] };
-    return new Set(data.done ?? []);
-  } catch {
-    return new Set();
-  }
-}
-function saveState(path: string, done: Set<string>): void {
-  const dir = dirname(path);
-  if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
-  writeFileSync(path, JSON.stringify({ done: Array.from(done) }, null, 0));
-}
-export async function backfillFacts(
-  opts: BackfillFactsOptions,
-): Promise<BackfillFactsReport> {
-  const startedAtCutoff = new Date().toISOString();
-  const statePath = opts.statePath ?? DEFAULT_STATE_PATH;
-  const done = opts.dryRun ? new Set<string>() : loadState(statePath);
-  const db = opts.store.rawDb();
-  // Eligible sessions: started strictly before this run's cutoff (don't
-  // race with live ingest), with a non-empty body (the classifier needs
-  // transcript text). When reprocess=false, exclude sessions that already
-  // have facts attributed to them.
-  const sql = opts.reprocess
-    ? `
-      SELECT id, started_at, body
-      FROM sessions
-      WHERE started_at < ?
-        AND body IS NOT NULL AND length(body) > 0
-        ${opts.from ? "AND id > ?" : ""}
-      ORDER BY started_at ASC, id ASC
-    `
-    : `
-      SELECT s.id, s.started_at, s.body
-      FROM sessions s
-      WHERE s.started_at < ?
-        AND s.body IS NOT NULL AND length(s.body) > 0
-        AND NOT EXISTS (
-          SELECT 1 FROM facts f WHERE f.source_session_id = s.id
-        )
-        ${opts.from ? "AND s.id > ?" : ""}
-      ORDER BY s.started_at ASC, s.id ASC
-    `;
-  const rows: CandidateRow[] = opts.from
-    ? db.prepare<[string, string], CandidateRow>(sql).all(startedAtCutoff, opts.from)
-    : db.prepare<[string], CandidateRow>(sql).all(startedAtCutoff);
-  // Filter state-file-known done ids BEFORE applying limit. Without this,
-  // a dense cluster of previously-skipped (low-confidence) sessions would
-  // burn the batch's --limit on no-op skips. With it, --limit N means
-  // "N actually-processable sessions" — much more useful UX for repeated
-  // small batches that walk forward through the corpus. The pre-filter
-  // count gets reported as `skippedAlreadyDone` so the operator still sees
-  // how big the skip region was.
-  const skippedByStateFile = rows.filter((r) => done.has(r.id)).length;
-  const candidates = rows.filter((r) => !done.has(r.id));
-  const limit = opts.limit ?? candidates.length;
-  const work = candidates.slice(0, limit);
-  const total = work.length;
-  let processed = 0;
-  let factsWritten = 0;
-  let skippedAlreadyDone = skippedByStateFile;
-  let skippedExistingFacts = 0;
-  let skippedNoBody = 0;
-  let skippedLowConfidence = 0;
-  let classifyFailures = 0;
-  let storageFailures = 0;
-  for (let i = 0; i < work.length; i++) {
-    const row = work[i]!;
-    const sid = row.id;
-    // No per-iteration `done` check needed — `work` is already filtered
-    // against the state file above.
-    if (!row.body || row.body.length === 0) {
-      skippedNoBody += 1;
-      opts.onProgress?.(i + 1, total, sid, "skipped_no_body");
-      continue;
-    }
-    let classification;
-    try {
-      classification = await opts.classifier.classify(row.body);
-    } catch (err) {
-      classifyFailures += 1;
-      const detail =
-        err instanceof LLMUnreachableError
-          ? "ollama unreachable — stopping run"
-          : err instanceof Error
-            ? err.message
-            : String(err);
-      opts.onProgress?.(i + 1, total, sid, "classify_failed", detail);
-      // Ollama-down is fatal: every subsequent classify will fail. Stop
-      // here so the operator can fix and resume.
-      if (err instanceof LLMUnreachableError) break;
-      continue;
-    }
-    const facts = extractFacts(classification, sid, row.started_at);
-    if (facts.length === 0) {
-      skippedLowConfidence += 1;
-      opts.onProgress?.(
-        i + 1,
-        total,
-        sid,
-        "skipped_low_confidence",
-        `confidence=${classification.confidence}`,
-      );
-      // Mark done so a re-run doesn't keep paying the classifier cost on
-      // sessions the model can't extract anything from.
-      done.add(sid);
-      if (!opts.dryRun && processed % SAVE_EVERY === 0) saveState(statePath, done);
-      continue;
-    }
-    if (opts.dryRun) {
-      factsWritten += facts.length;
-      processed += 1;
-      opts.onProgress?.(i + 1, total, sid, "ok", `would-write=${facts.length}`);
-      continue;
-    }
-    try {
-      await opts.store.insertFactsForSession(
-        sid,
-        opts.factStore,
-        facts,
-        opts.embedder ?? null,
-      );
-    } catch (err) {
-      storageFailures += 1;
-      const detail = err instanceof Error ? err.message : String(err);
-      opts.onProgress?.(i + 1, total, sid, "storage_failed", detail);
-      continue;
-    }
-    factsWritten += facts.length;
-    processed += 1;
-    done.add(sid);
-    opts.onProgress?.(i + 1, total, sid, "ok", `wrote=${facts.length}`);
-    if (processed % SAVE_EVERY === 0) saveState(statePath, done);
-  }
-  if (!opts.dryRun) saveState(statePath, done);
-  return {
-    total,
-    processed,
-    factsWritten,
-    skippedAlreadyDone,
-    skippedExistingFacts,
-    skippedNoBody,
-    skippedLowConfidence,
-    classifyFailures,
-    storageFailures,
-  };
-}