npm - nlm-memory - Versions diffs - 0.5.0 → 0.5.2 - Mend

nlm-memory 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (257) hide show

package/README.md +89 -34
package/dist/cli/digest.d.ts +20 -0
package/dist/cli/digest.js +142 -0
package/dist/cli/digest.js.map +1 -0
package/dist/cli/nlm.d.ts +1 -0
package/dist/cli/nlm.js +25 -1
package/dist/cli/nlm.js.map +1 -1
package/dist/core/digest/compose.d.ts +38 -0
package/dist/core/digest/compose.js +93 -0
package/dist/core/digest/compose.js.map +1 -0
package/dist/core/digest/hook-liveness.d.ts +32 -0
package/dist/core/digest/hook-liveness.js +54 -0
package/dist/core/digest/hook-liveness.js.map +1 -0
package/dist/http/app.js +2 -1
package/dist/http/app.js.map +1 -1
package/dist/mcp/server.js +20 -1
package/dist/mcp/server.js.map +1 -1
package/dist/ui/assets/{index-C8cpwbYJ.css → index-Beo8psd-.css} +1 -1
package/dist/ui/assets/{index-CB50QnL-.js → index-CSPTTeeM.js} +8 -8
package/dist/ui/index.html +2 -2
package/package.json +26 -1
package/.agents/plugins/marketplace.json +0 -20
package/.github/workflows/ci.yml +0 -30
package/docs/methodology/re-derivation-rate.md +0 -112
package/docs/methodology/useful-hit-rate.md +0 -79
package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
package/docs/plans/desktop-product.md +0 -69
package/docs/plans/factstore-design.md +0 -236
package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1575
package/logs/CHANGELOG/CHANGELOG.md +0 -209
package/migrations/000_initial_schema.sql +0 -174
package/migrations/001_entity_type_rename.sql +0 -17
package/migrations/002_adapter_state_extend.sql +0 -12
package/migrations/003_session_embeddings.sql +0 -11
package/migrations/004_facts.sql +0 -46
package/migrations/005_sources.sql +0 -31
package/migrations/006_providers.sql +0 -33
package/migrations/007_source_tokens.sql +0 -17
package/migrations/008_fts_rebuild.sql +0 -9
package/migrations/009_session_embedding_chunks.sql +0 -46
package/migrations/010_sources_opencode.sql +0 -30
package/migrations/011_sources_hermes_agent.sql +0 -30
package/migrations/012_sources_aider.sql +0 -30
package/migrations/013_adapter_state_failure_count.sql +0 -12
package/migrations/014_sources_cursor.sql +0 -30
package/migrations/015_sources_windsurf.sql +0 -30
package/plugin-hermes-agent/README.md +0 -49
package/plugin-hermes-agent/__init__.py +0 -75
package/plugin-hermes-agent/plugin.yaml +0 -15
package/scripts/backfill-citations.mjs +0 -0
package/scripts/build-codex-plugin.mjs +0 -61
package/scripts/deepseek-probe.mjs +0 -67
package/scripts/extract-triples.mjs +0 -207
package/scripts/longmemeval/embedding-cache.ts +0 -77
package/scripts/longmemeval/fetch-dataset.sh +0 -25
package/scripts/longmemeval/run-harness.ts +0 -315
package/scripts/longmemeval/scorer.ts +0 -99
package/scripts/longmemeval/tsconfig.json +0 -9
package/scripts/longmemeval/types.ts +0 -35
package/scripts/nlm-daily-digest.py +0 -239
package/scripts/nlm-daily-digest.sh +0 -28
package/src/cli/classify-parity.ts +0 -257
package/src/cli/launchctl-helpers.ts +0 -49
package/src/cli/nlm.ts +0 -1078
package/src/core/actions/actions-log.ts +0 -118
package/src/core/actions/overlay.ts +0 -117
package/src/core/adapters/aider.ts +0 -205
package/src/core/adapters/claude-code.ts +0 -293
package/src/core/adapters/common.ts +0 -54
package/src/core/adapters/cursor.ts +0 -486
package/src/core/adapters/from-source.ts +0 -67
package/src/core/adapters/hermes-agent.ts +0 -240
package/src/core/adapters/hermes.ts +0 -277
package/src/core/adapters/jsonl-generic.ts +0 -208
package/src/core/adapters/opencode.ts +0 -281
package/src/core/adapters/pi.ts +0 -264
package/src/core/adapters/windsurf.ts +0 -386
package/src/core/classifier/prompt.ts +0 -200
package/src/core/dataset/build-dataset.ts +0 -463
package/src/core/embedding/chunk-body.ts +0 -76
package/src/core/embedding/embed-backfill.ts +0 -210
package/src/core/embedding/embed-normalize.ts +0 -135
package/src/core/facts/backfill-facts.ts +0 -254
package/src/core/facts/extract-facts.ts +0 -50
package/src/core/hook/citation-detect.ts +0 -124
package/src/core/hook/cite-memo.ts +0 -68
package/src/core/hook/claude-settings.ts +0 -187
package/src/core/hook/gate.ts +0 -25
package/src/core/hook/hook-log.ts +0 -41
package/src/core/hook/memo-sweep.ts +0 -164
package/src/core/hook/memo.ts +0 -67
package/src/core/hook/pointer-block.ts +0 -26
package/src/core/hook/select.ts +0 -32
package/src/core/hook/transcript.ts +0 -121
package/src/core/ingest/ingest-session.ts +0 -111
package/src/core/providers/provider-models.ts +0 -100
package/src/core/providers/provider-registry.ts +0 -196
package/src/core/recall/citation-log.ts +0 -108
package/src/core/recall/filter.ts +0 -27
package/src/core/recall/index.ts +0 -6
package/src/core/recall/match-fields.ts +0 -40
package/src/core/recall/query-log.ts +0 -149
package/src/core/recall/query-shape.ts +0 -66
package/src/core/recall/recall-service.ts +0 -320
package/src/core/recall/recent-log.ts +0 -59
package/src/core/recall/tokenize.ts +0 -18
package/src/core/recall/useful-scan.ts +0 -336
package/src/core/recall-facts/fact-query-log.ts +0 -150
package/src/core/recall-facts/fact-recall-service.ts +0 -327
package/src/core/scheduler/scan-once.ts +0 -142
package/src/core/scheduler/scheduler.ts +0 -225
package/src/core/sources/source-registry.ts +0 -278
package/src/core/storage/db-restore.ts +0 -133
package/src/core/storage/live-status.ts +0 -45
package/src/core/storage/migrate.ts +0 -72
package/src/core/storage/sqlite-fact-store.ts +0 -304
package/src/core/storage/sqlite-session-store.ts +0 -810
package/src/hook/hook-auth.ts +0 -18
package/src/hook/prompt-recall-hook.ts +0 -180
package/src/hook/session-end-hook.ts +0 -81
package/src/hook/session-start-hook.ts +0 -168
package/src/hook/stop-hook.ts +0 -239
package/src/http/app.ts +0 -1215
package/src/install/claude-code.ts +0 -128
package/src/install/codex.ts +0 -367
package/src/install/cursor.ts +0 -68
package/src/install/hermes-agent.ts +0 -76
package/src/install/hermes.ts +0 -78
package/src/install/nlm-dir-perms.ts +0 -55
package/src/install/ollama.ts +0 -284
package/src/install/setup.ts +0 -489
package/src/install/windsurf.ts +0 -68
package/src/llm/classifier-box.ts +0 -64
package/src/llm/deepseek-client.ts +0 -150
package/src/llm/env-autoload.ts +0 -55
package/src/llm/ollama-client.ts +0 -189
package/src/mcp/server.ts +0 -534
package/src/ports/fact-store.ts +0 -102
package/src/ports/llm-client.ts +0 -52
package/src/ports/logger.ts +0 -16
package/src/ports/session-store.ts +0 -45
package/src/ports/transcript-adapter.ts +0 -55
package/src/shared/types.ts +0 -149
package/src/ui/App.tsx +0 -58
package/src/ui/components/PromoteOpenButton.tsx +0 -65
package/src/ui/components/SessionDrawer.tsx +0 -199
package/src/ui/components/SideNav.tsx +0 -162
package/src/ui/components/Skeleton.tsx +0 -107
package/src/ui/index.html +0 -13
package/src/ui/lib/actions.ts +0 -30
package/src/ui/lib/api.ts +0 -92
package/src/ui/lib/dataset.ts +0 -141
package/src/ui/lib/registries.ts +0 -155
package/src/ui/lib/view-settings.ts +0 -41
package/src/ui/main.tsx +0 -15
package/src/ui/pages/Live.tsx +0 -229
package/src/ui/pages/Pulse.tsx +0 -415
package/src/ui/pages/Recall.tsx +0 -190
package/src/ui/pages/River.tsx +0 -354
package/src/ui/pages/Search.tsx +0 -386
package/src/ui/pages/Stub.tsx +0 -9
package/src/ui/pages/Thread.tsx +0 -473
package/src/ui/pages/settings/Classifier.tsx +0 -227
package/src/ui/pages/settings/Data.tsx +0 -190
package/src/ui/pages/settings/Index.tsx +0 -65
package/src/ui/pages/settings/Labels.tsx +0 -224
package/src/ui/pages/settings/Providers.tsx +0 -305
package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
package/src/ui/pages/settings/Sources.tsx +0 -326
package/src/ui/pages/settings/Views.tsx +0 -96
package/src/ui/styles.css +0 -1890
package/src/ui/tsconfig.json +0 -21
package/src/ui/vite.config.ts +0 -19
package/tests/fixtures/claude_code/short_session.jsonl +0 -2
package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
package/tests/fixtures/facts.ts +0 -17
package/tests/fixtures/golden-corpus.ts +0 -85
package/tests/fixtures/hermes/paired_request_dump.json +0 -24
package/tests/fixtures/hermes/paired_session.json +0 -23
package/tests/fixtures/hermes/request_dump.json +0 -28
package/tests/fixtures/hermes/session_iso.json +0 -38
package/tests/fixtures/hermes/session_unix.json +0 -38
package/tests/fixtures/hermes/system_only.json +0 -18
package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
package/tests/fixtures/pi/short-successful.jsonl +0 -5
package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
package/tests/fixtures/sessions.ts +0 -22
package/tests/integration/backfill-facts.test.ts +0 -362
package/tests/integration/citation-explicit.test.ts +0 -111
package/tests/integration/cite-event.test.ts +0 -169
package/tests/integration/cite-memo.test.ts +0 -87
package/tests/integration/db-restore.test.ts +0 -153
package/tests/integration/embed-backfill.test.ts +0 -176
package/tests/integration/fact-supersedence.test.ts +0 -313
package/tests/integration/fts-index.test.ts +0 -60
package/tests/integration/getbyids-sqlite.test.ts +0 -100
package/tests/integration/hermes-agent-hooks.test.ts +0 -248
package/tests/integration/hook-claude-settings.test.ts +0 -218
package/tests/integration/hook-log.test.ts +0 -54
package/tests/integration/hook-memo.test.ts +0 -68
package/tests/integration/hook-pre-compact.test.ts +0 -105
package/tests/integration/hook-subagent-start.test.ts +0 -102
package/tests/integration/http.test.ts +0 -401
package/tests/integration/keyword-search-fts.test.ts +0 -66
package/tests/integration/mcp-recall-logging.test.ts +0 -88
package/tests/integration/mcp.test.ts +0 -260
package/tests/integration/memo-sweep.test.ts +0 -91
package/tests/integration/prompt-recall-hook.test.ts +0 -88
package/tests/integration/provider-registry.test.ts +0 -107
package/tests/integration/recall-golden.test.ts +0 -59
package/tests/integration/recall-sqlite.test.ts +0 -169
package/tests/integration/scheduler.test.ts +0 -391
package/tests/integration/session-end-hook.test.ts +0 -48
package/tests/integration/session-start-hook.test.ts +0 -126
package/tests/integration/source-registry.test.ts +0 -122
package/tests/integration/sqlite-fact-store.test.ts +0 -346
package/tests/integration/stop-hook.test.ts +0 -560
package/tests/integration/wal-checkpoint.test.ts +0 -49
package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
package/tests/unit/core/adapters/aider.test.ts +0 -230
package/tests/unit/core/adapters/claude-code.test.ts +0 -118
package/tests/unit/core/adapters/cursor.test.ts +0 -485
package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
package/tests/unit/core/adapters/hermes.test.ts +0 -81
package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
package/tests/unit/core/adapters/opencode.test.ts +0 -354
package/tests/unit/core/adapters/pi.test.ts +0 -110
package/tests/unit/core/adapters/windsurf.test.ts +0 -416
package/tests/unit/core/classifier/prompt.test.ts +0 -126
package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
package/tests/unit/core/facts/extract-facts.test.ts +0 -117
package/tests/unit/core/filter.test.ts +0 -40
package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
package/tests/unit/core/hook/citation-detect.test.ts +0 -124
package/tests/unit/core/hook/gate.test.ts +0 -29
package/tests/unit/core/hook/pointer-block.test.ts +0 -22
package/tests/unit/core/hook/select.test.ts +0 -66
package/tests/unit/core/match-fields.test.ts +0 -39
package/tests/unit/core/mcp-cite-session.test.ts +0 -51
package/tests/unit/core/providers/provider-models.test.ts +0 -101
package/tests/unit/core/query-shape.test.ts +0 -92
package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
package/tests/unit/core/recall-service.test.ts +0 -200
package/tests/unit/core/storage/live-status.test.ts +0 -54
package/tests/unit/core/tokenize.test.ts +0 -32
package/tests/unit/core/useful-scan.test.ts +0 -537
package/tests/unit/llm/embed.test.ts +0 -93
package/tests/unit/llm/ollama-client.test.ts +0 -124
package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
package/tsconfig.json +0 -31
package/tsconfig.test.json +0 -11
package/vitest.config.ts +0 -22

package/src/core/classifier/prompt.ts DELETED Viewed

@@ -1,200 +0,0 @@
-/**
- * Classifier prompt + transcript helpers. Centralized so every LLMClient
- * implementation hits the same prompt (parity with the Python daemon).
- *
- * Hard cap at 15K chars matches `classifier.py` MAX_TRANSCRIPT_CHARS:
- * smaller models (phi4-mini, qwen) pattern-match JSON from the transcript
- * above that size. Long sessions get first-half + last-half with a
- * separator to preserve opening intent + closing decisions.
- *
- * Phase B.2: prompt now also asks for a `facts` array of normalized
- * (subject, predicate, value) triples for the FactStore. The closed
- * predicate vocabulary is embedded in the prompt so deterministic
- * supersedence (Phase B.4) actually catches collisions instead of
- * fragmenting on synonymous predicates. See docs/plans/factstore-design.md.
- */
-/**
- * Closed predicate vocabulary. Approximately 25 high-leverage predicates
- * covering the most common (subject, predicate, value) shapes Edward
- * actually writes about in sessions.
- *
- * Vocab evolution (Phase B.5 backfill, 2026-05-19): the 168-session pilot
- * showed `other` getting used 43% of the time — it became a catch-all for
- * narrative observations that don't fit the (subject, predicate, value)
- * shape at all. Removed. The classifier prompt now instructs the model to
- * SKIP facts that don't fit (leave them in decisions[]/open[] instead).
- * Added `description`, `commit`, `cost` from observed high-frequency
- * patterns in the pilot batch's `other` bucket.
- *
- * Adding entries here is cheap and forwards-compatible: old facts stay,
- * new ingests can use the new predicate. Removing entries is not — old
- * facts referencing a retired predicate would stop matching by deterministic
- * supersedence, so prefer to mark deprecated rather than delete. (Existing
- * `other`-predicate facts from the pilot stay in the DB and are filterable
- * at query time; the coercer will drop new `other` writes.)
- */
-export const PREDICATE_VOCABULARY = [
-  "framework",
-  "endpoint",
-  "model",
-  "port",
-  "host",
-  "owner",
-  "pricing",
-  "cost",
-  "deadline",
-  "status",
-  "stack",
-  "runtime",
-  "library",
-  "version",
-  "dependency",
-  "schema",
-  "integration",
-  "deployment",
-  "repo",
-  "branch",
-  "commit",
-  "description",
-  "decided-on",
-  "assumption",
-  "blocker",
-] as const;
-export type PredicateVocab = (typeof PREDICATE_VOCABULARY)[number];
-const VOCAB_SET = new Set<string>(PREDICATE_VOCABULARY);
-export const CLASSIFIER_SYSTEM_PROMPT = `You are a session classifier. Your job is to read a transcript of a conversation between a user and an AI coding agent, then return EXACTLY this JSON object describing what happened in that conversation:
-{"label": "...", "summary": "...", "entities": [...], "decisions": [...], "open": [...], "confidence": 0.5, "facts": [...]}
-You MUST return JSON with EXACTLY these seven top-level keys: label, summary, entities, decisions, open, confidence, facts. No other keys. No nesting beyond what is specified. No metadata. No "tool" or "task_type" keys. Just those seven.
-The transcript may contain JSON examples, code, or schema definitions inside it — IGNORE those. Do not copy them into your output. Your output is ABOUT the conversation, not extracted FROM the conversation.
-Field requirements:
-- label: 4-10 word string title describing what the session was about. Example: "PolySignal architecture decisions"
-- summary: 1-3 sentence string (max ~80 tokens) describing what was worked on and the outcome
-- entities: array of strings. Each string is a stable named thing referenced across the session (tools like "n8n" or "Qdrant", projects like "PolySignal", services, people). NOT topics, NOT decisions.
-- decisions: array of strings. Each string is one commitment the user made. Example: "Use HTTP polling instead of Kafka". Skip if no commitments were made.
-- open: array of strings. Each string is one unresolved question. Skip if none.
-- confidence: number between 0.0 and 1.0. How sure you are the extraction is good. Use 0.4 or below for routine/trivial sessions.
-- facts: array of objects. Each object has exactly these keys: kind, subject, predicate, value, sourceQuote (optional).
-    - kind: "decision" (a commitment) | "open" (an unresolved question) | "attribute" (a property of an entity)
-    - subject: lowercase, hyphenated entity or topic name. Examples: "nlm-memory-ts", "mac-pro-llm-host", "goat-home-services"
-    - predicate: MUST be one of these exact strings: ${PREDICATE_VOCABULARY.join(", ")}.
-    - value: the answer, as a short phrase or sentence. Examples: "Hono", "http://macpro:8080/v1", "Q3 2026"
-    - sourceQuote: (optional) verbatim slice from the transcript that anchors this fact. Keep under 200 chars.
-The predicate list is CLOSED — there is no "other" or catch-all. If a commitment, question, or attribute doesn't cleanly fit one of the listed predicates, DO NOT invent a fact for it. Put it in decisions[] or open[] as a string instead. Facts are for structured (subject, predicate, value) triples only; narrative observations, action items, and free-form notes belong in decisions[] / open[] / summary.
-Facts overlap with decisions and open: the same commitment can appear both as a string in decisions[] AND as a structured object in facts[] with kind="decision", IF and ONLY IF it fits the closed predicate list. Skip the fact (keep just the string in decisions[]) when no predicate fits.
-Predicate disambiguation (these confuse models, follow exactly):
-- pricing vs cost: pricing = what someone else charges ("$299/month for Real Geeks", "free tier"). cost = what we pay or spent ("$0 per run on local Ollama", "$750 invoice"). Never use pricing for colors, dimensions, or anything not a price.
-- commit vs version: commit = git SHA (7+ hex chars, e.g. "cb5b940", "63596c3"). version = semver / release tag ("v4", "DSM 7.2.2", "Postgres 15", "0.3.6"). Use commit for any explicit git reference even if short-form.
-- description vs status: description = what a thing IS ("rich text editor framework by Meta"). status = what state it's in right now ("running via pm2", "not yet started", "blocked on review").
-Return ONLY the JSON object. No markdown code fences. No prose before or after.`;
-export const MAX_TRANSCRIPT_CHARS = 15_000;
-export function truncateTranscript(text: string, maxChars: number = MAX_TRANSCRIPT_CHARS): string {
-  if (text.length <= maxChars) return text;
-  const half = Math.floor((maxChars - 80) / 2);
-  return (
-    text.slice(0, half) +
-    "\n\n[... transcript truncated; below is the closing portion ...]\n\n" +
-    text.slice(text.length - half)
-  );
-}
-const FENCE_RE = /^```(?:json)?\s*|\s*```$/gm;
-export function stripJsonFences(text: string): string {
-  return text.replace(FENCE_RE, "").trim();
-}
-const REQUIRED_KEYS = ["label", "summary", "entities", "decisions", "open", "confidence"] as const;
-export function validateClassifierJson(data: unknown): data is Record<string, unknown> {
-  if (!data || typeof data !== "object" || Array.isArray(data)) return false;
-  const obj = data as Record<string, unknown>;
-  // `facts` is not in REQUIRED_KEYS — Phase B.2 accepts classifier output
-  // without it (older models, fixtures from Phase E parity tests). Coerced
-  // to [] when absent.
-  return REQUIRED_KEYS.every((k) => k in obj);
-}
-export function buildUserPrompt(transcript: string, priorContext: string): string {
-  const truncated = truncateTranscript(transcript);
-  const parts: string[] = [];
-  if (priorContext) parts.push(`PRIOR CONTEXT (already filed):\n${priorContext}\n`);
-  parts.push(`TRANSCRIPT TO CLASSIFY:\n${truncated}`);
-  return parts.join("\n");
-}
-interface CoercedFact {
-  kind: "decision" | "open" | "attribute";
-  subject: string;
-  predicate: string;
-  value: string;
-  sourceQuote?: string;
-}
-function coerceFacts(raw: unknown): CoercedFact[] {
-  if (!Array.isArray(raw)) return [];
-  const out: CoercedFact[] = [];
-  for (const item of raw) {
-    if (!item || typeof item !== "object" || Array.isArray(item)) continue;
-    const o = item as Record<string, unknown>;
-    const kindRaw = String(o["kind"] ?? "").toLowerCase().trim();
-    if (kindRaw !== "decision" && kindRaw !== "open" && kindRaw !== "attribute") continue;
-    const subject = String(o["subject"] ?? "").toLowerCase().trim();
-    const predicateRaw = String(o["predicate"] ?? "").toLowerCase().trim();
-    const value = String(o["value"] ?? "").trim();
-    if (!subject || !predicateRaw || !value) continue;
-    // Closed vocab — drop the fact entirely if the predicate isn't recognized.
-    // Pilot data (Phase B.5) showed `other` was 43% of writes and almost all
-    // slop; the prompt now instructs the model to leave such observations in
-    // decisions[]/open[] strings. This coercer enforces the policy
-    // defensively in case the model emits an off-vocab predicate anyway.
-    if (!VOCAB_SET.has(predicateRaw)) continue;
-    const predicate = predicateRaw;
-    const sourceQuoteRaw = o["sourceQuote"];
-    const sourceQuote =
-      typeof sourceQuoteRaw === "string" && sourceQuoteRaw.trim().length > 0
-        ? sourceQuoteRaw.trim().slice(0, 500)
-        : undefined;
-    const fact: CoercedFact = { kind: kindRaw, subject, predicate, value };
-    if (sourceQuote !== undefined) fact.sourceQuote = sourceQuote;
-    out.push(fact);
-  }
-  return out;
-}
-export function coerceClassifyResult(data: Record<string, unknown>): {
-  label: string;
-  summary: string;
-  entities: string[];
-  decisions: string[];
-  open: string[];
-  confidence: number;
-  facts: CoercedFact[];
-} {
-  const strArray = (v: unknown): string[] => {
-    if (!Array.isArray(v)) return [];
-    return v.map((x) => String(x).trim()).filter((s) => s.length > 0);
-  };
-  const label = String(data["label"] ?? "").trim().slice(0, 120) || "Untitled";
-  const summary = String(data["summary"] ?? "").trim();
-  const entities = strArray(data["entities"]);
-  const decisions = strArray(data["decisions"]);
-  const open = strArray(data["open"]);
-  const conf = Number(data["confidence"] ?? 0.5);
-  const confidence = Number.isFinite(conf) ? conf : 0.5;
-  const facts = coerceFacts(data["facts"]);
-  return { label, summary, entities, decisions, open, confidence, facts };
-}

package/src/core/dataset/build-dataset.ts DELETED Viewed

@@ -1,463 +0,0 @@
-/**
- * buildDataset — read projection over canonical.sqlite that hydrates every
- * UI page (pulse, river, search, thread).
- *
- * Ports the read paths of `dataset.py`. Action-driven overlays (dismissed
- * alerts, snoozed entities, retired labels, merged variants) are deferred:
- * the action log isn't yet exposed by the TS daemon. Returns persisted
- * state directly.
- */
-import { existsSync } from "node:fs";
-import Database from "better-sqlite3";
-import * as sqliteVec from "sqlite-vec";
-import { liveSessionStatus } from "@core/storage/live-status.js";
-import { loadActionOverlay, openQuestionId } from "@core/actions/overlay.js";
-import type { ActionOverlay } from "@core/actions/overlay.js";
-import type { SessionStatus } from "@shared/types.js";
-export interface DatasetSession {
-  readonly id: string;
-  readonly date: string;
-  readonly started_at: string | null;
-  readonly ended_at: string | null;
-  readonly label: string;
-  readonly summary: string;
-  readonly entities: ReadonlyArray<string>;
-  readonly decisions: ReadonlyArray<string>;
-  readonly open: ReadonlyArray<string>;
-  readonly open_questions: ReadonlyArray<{ id: string; text: string; resolved: false }>;
-  readonly status: SessionStatus;
-  readonly duration_min: number;
-  readonly runtime: string;
-  readonly supersedes?: string;
-  readonly superseded_by?: string;
-}
-export interface DatasetEntity {
-  readonly canonical: string;
-  readonly type: string;
-  readonly status: string;
-  readonly session_count: number;
-  readonly last_seen_session: string | null;
-}
-export interface DatasetResponse {
-  readonly meta: {
-    readonly last_sync: string;
-    readonly sessions_total: number;
-    readonly entities_total: number;
-    readonly db_present: boolean;
-    readonly db_path: string;
-  };
-  readonly sessions: ReadonlyArray<DatasetSession>;
-  readonly entities: ReadonlyArray<DatasetEntity>;
-  readonly entity_colors: Record<string, string>;
-  readonly entity_type: Record<string, string>;
-  readonly entity_status: Record<string, string>;
-  readonly metrics: {
-    readonly this_week: number;
-    readonly last_week: number;
-    readonly sparkline: ReadonlyArray<number>;
-    readonly healthy: number;
-    readonly sparse: number;
-    readonly stale: number;
-    readonly closed_decisions: number;
-  };
-  readonly alerts: ReadonlyArray<{
-    readonly id: string;
-    readonly type: "stale";
-    readonly severity: "high" | "medium";
-    readonly entity: string;
-    readonly summary: string;
-    readonly sessions: ReadonlyArray<string>;
-    readonly age_days: number;
-    readonly last_touch_at: string | null;
-  }>;
-  readonly runtimes: ReadonlyArray<DatasetRuntime>;
-}
-export interface DatasetRuntime {
-  readonly name: string;
-  readonly status: "active" | "idle" | "dormant";
-  readonly sessions_total: number;
-  readonly this_week: number;
-  readonly last_week: number;
-  readonly last_session_at: string | null;
-}
-interface SessionRow {
-  id: string;
-  started_at: string | null;
-  ended_at: string | null;
-  duration_min: number | null;
-  label: string;
-  summary: string;
-  status: "active" | "closed" | "superseded";
-  transcript_path: string | null;
-  runtime: string;
-}
-interface EntityRow {
-  session_id: string;
-  entity_canonical: string;
-}
-interface MarkerRow {
-  session_id: string;
-  kind: "decision" | "open";
-  text: string;
-  position: number;
-}
-interface EdgeRow {
-  from_session: string;
-  to_session: string;
-  kind: "supersedes" | "continues";
-}
-interface EntityCatalogRow {
-  canonical: string;
-  type: string;
-  status: string;
-  session_count: number;
-  last_seen_session: string | null;
-}
-const EMPTY_DATASET = (dbPath: string, present: boolean): DatasetResponse => ({
-  meta: {
-    last_sync: new Date().toISOString(),
-    sessions_total: 0,
-    entities_total: 0,
-    db_present: present,
-    db_path: dbPath,
-  },
-  sessions: [],
-  entities: [],
-  entity_colors: {},
-  entity_type: {},
-  entity_status: {},
-  metrics: { this_week: 0, last_week: 0, sparkline: [0, 0, 0, 0, 0, 0, 0], healthy: 0, sparse: 0, stale: 0, closed_decisions: 0 },
-  alerts: [],
-  runtimes: [],
-});
-export interface BuildDatasetOptions {
-  /** Include path-shaped entities (filesystem leaks from the classifier).
-   *  Default false — they pollute the catalog without adding signal. */
-  readonly includePaths?: boolean;
-}
-export function buildDataset(dbPath: string, options: BuildDatasetOptions = {}): DatasetResponse {
-  if (!existsSync(dbPath)) return EMPTY_DATASET(dbPath, false);
-  const db = new Database(dbPath, { readonly: true });
-  try {
-    sqliteVec.load(db);
-  } catch {
-    // vec extension only required for semantic search; tolerable here.
-  }
-  try {
-    return projectFromDb(db, dbPath, options.includePaths ?? false);
-  } finally {
-    db.close();
-  }
-}
-/**
- * Heuristic for "this entity is actually a filesystem path the classifier
- * leaked into the catalog". Catches things like ".claude/agents/",
- * "bridge/server.js", "deploy.sh", "nlm-memory-spec.md" while leaving
- * real entities like "n8n", "Node.js", "NocoDB", "Whtnxt Agent" alone.
- */
-const CODE_FILE_EXT_RE =
-  /\.(?:md|markdown|txt|ts|tsx|js|jsx|mjs|cjs|py|pyi|json|yaml|yml|toml|sh|bash|zsh|css|html|sql|xml|env|ini|cfg|conf|lock)$/i;
-export function isPathShapedEntity(canonical: string): boolean {
-  if (!canonical) return false;
-  // Any slash → looks like a path (forward or back).
-  if (canonical.includes("/") || canonical.includes("\\")) return true;
-  // Hidden-file prefix only when it's clearly a dotfile (e.g. ".env", ".mcp.json").
-  if (canonical.startsWith(".") && canonical.length > 1 && canonical !== "...") return true;
-  // Common source-code file extensions.
-  if (CODE_FILE_EXT_RE.test(canonical)) return true;
-  return false;
-}
-function projectFromDb(db: Database.Database, dbPath: string, includePaths: boolean): DatasetResponse {
-  const sessionRows = db
-    .prepare<[], SessionRow>(`
-      SELECT id, started_at, ended_at, duration_min, label, summary,
-             status, transcript_path, runtime
-      FROM sessions
-      ORDER BY started_at ASC
-    `)
-    .all();
-  if (sessionRows.length === 0) return EMPTY_DATASET(dbPath, true);
-  const entitiesBySession = new Map<string, string[]>();
-  for (const r of db
-    .prepare<[], EntityRow>("SELECT session_id, entity_canonical FROM session_entities ORDER BY session_id")
-    .all()) {
-    const list = entitiesBySession.get(r.session_id);
-    if (list) list.push(r.entity_canonical);
-    else entitiesBySession.set(r.session_id, [r.entity_canonical]);
-  }
-  const decisionsBySession = new Map<string, string[]>();
-  const openBySession = new Map<string, { id: string; text: string }[]>();
-  for (const r of db
-    .prepare<[], MarkerRow>("SELECT session_id, kind, text, position FROM markers ORDER BY session_id, position")
-    .all()) {
-    if (r.kind === "decision") {
-      const list = decisionsBySession.get(r.session_id);
-      if (list) list.push(r.text);
-      else decisionsBySession.set(r.session_id, [r.text]);
-    } else {
-      const id = openQuestionId(r.session_id, r.text);
-      const list = openBySession.get(r.session_id);
-      if (list) list.push({ id, text: r.text });
-      else openBySession.set(r.session_id, [{ id, text: r.text }]);
-    }
-  }
-  const supersedesBy = new Map<string, string>();
-  const supersededByBy = new Map<string, string>();
-  const continuesBy = new Map<string, string>();
-  for (const r of db
-    .prepare<[], EdgeRow>("SELECT from_session, to_session, kind FROM session_edges")
-    .all()) {
-    if (r.kind === "supersedes") {
-      supersedesBy.set(r.from_session, r.to_session);
-      supersededByBy.set(r.to_session, r.from_session);
-    } else if (r.kind === "continues") {
-      continuesBy.set(r.from_session, r.to_session);
-    }
-  }
-  const allEntityRows = db
-    .prepare<[], EntityCatalogRow>(`
-      SELECT canonical, type, status, session_count, last_seen_session
-      FROM entities ORDER BY session_count DESC
-    `)
-    .all();
-  const overlay = loadActionOverlay(db);
-  for (const e of allEntityRows) {
-    if (overlay.retiredEntities.has(e.canonical)) e.status = "retired";
-    else if (overlay.snoozedEntities.has(e.canonical)) e.status = "snoozed";
-    const newType = overlay.labeledEntities.get(e.canonical);
-    if (newType) e.type = newType;
-  }
-  const entityRows = includePaths
-    ? allEntityRows
-    : allEntityRows.filter((e) => !isPathShapedEntity(e.canonical));
-  const keptEntities = new Set(entityRows.map((e) => e.canonical));
-  const sessions: DatasetSession[] = sessionRows.map((s) => {
-    const status = liveSessionStatus(s.transcript_path, s.status);
-    const rawOpen = openBySession.get(s.id) ?? [];
-    const supersedes = supersedesBy.get(s.id);
-    const supersededBy = supersededByBy.get(s.id);
-    const rawEntities = entitiesBySession.get(s.id) ?? [];
-    const activeOpen = rawOpen.filter(
-      (o) => !overlay.resolvedOpens.has(o.id) && !overlay.promotedOpens.has(o.id),
-    );
-    const promotedDecisions = rawOpen
-      .filter((o) => overlay.promotedOpens.has(o.id))
-      .map((o) => overlay.promotedOpens.get(o.id)!);
-    return {
-      id: s.id,
-      date: (s.started_at ?? "").slice(0, 10),
-      started_at: s.started_at,
-      ended_at: s.ended_at,
-      label: s.label,
-      summary: s.summary,
-      entities: includePaths ? rawEntities : rawEntities.filter((name) => keptEntities.has(name)),
-      decisions: [...(decisionsBySession.get(s.id) ?? []), ...promotedDecisions],
-      open: activeOpen.map((o) => o.text),
-      open_questions: activeOpen.map((o) => ({ id: o.id, text: o.text, resolved: false as const })),
-      status,
-      duration_min: s.duration_min ?? 0,
-      runtime: s.runtime,
-      ...(supersedes !== undefined ? { supersedes } : {}),
-      ...(supersededBy !== undefined ? { superseded_by: supersededBy } : {}),
-    };
-  });
-  // continuesBy is in the dataset shape but unused by current UI; reserved for thread view.
-  void continuesBy;
-  const entityColors: Record<string, string> = {};
-  const entityType: Record<string, string> = {};
-  const entityStatus: Record<string, string> = {};
-  for (const e of entityRows) {
-    entityColors[e.canonical] = stableColor(e.canonical);
-    entityType[e.canonical] = e.type;
-    entityStatus[e.canonical] = e.status;
-  }
-  const metrics = computeMetrics(sessions, entityRows);
-  const alerts = computeStaleAlerts(sessions, entityRows, overlay);
-  const runtimes = computeRuntimes(sessions);
-  return {
-    meta: {
-      last_sync: new Date().toISOString(),
-      sessions_total: sessions.length,
-      entities_total: entityRows.length,
-      db_present: true,
-      db_path: dbPath,
-    },
-    sessions,
-    entities: entityRows,
-    entity_colors: entityColors,
-    entity_type: entityType,
-    entity_status: entityStatus,
-    metrics,
-    alerts,
-    runtimes,
-  };
-}
-function computeRuntimes(sessions: ReadonlyArray<DatasetSession>): DatasetRuntime[] {
-  const now = Date.now();
-  const day = 86_400_000;
-  const week = now - 7 * day;
-  const prev = now - 14 * day;
-  const groups = new Map<string, {
-    total: number;
-    thisWeek: number;
-    lastWeek: number;
-    lastAt: number;
-    lastAtIso: string | null;
-  }>();
-  for (const s of sessions) {
-    const name = (s.runtime ?? "").trim() || "unknown";
-    const g = groups.get(name) ?? { total: 0, thisWeek: 0, lastWeek: 0, lastAt: 0, lastAtIso: null };
-    g.total += 1;
-    if (s.started_at) {
-      const t = Date.parse(s.started_at);
-      if (Number.isFinite(t)) {
-        if (t >= week) g.thisWeek += 1;
-        else if (t >= prev) g.lastWeek += 1;
-        if (t > g.lastAt) {
-          g.lastAt = t;
-          g.lastAtIso = s.started_at;
-        }
-      }
-    }
-    groups.set(name, g);
-  }
-  const hour = 3_600_000;
-  const out: DatasetRuntime[] = [];
-  for (const [name, g] of groups) {
-    const age = g.lastAt ? now - g.lastAt : Infinity;
-    const status: DatasetRuntime["status"] =
-      age <= hour ? "active" : age <= day ? "idle" : "dormant";
-    out.push({
-      name,
-      status,
-      sessions_total: g.total,
-      this_week: g.thisWeek,
-      last_week: g.lastWeek,
-      last_session_at: g.lastAtIso,
-    });
-  }
-  out.sort((a, b) => (Date.parse(b.last_session_at ?? "0") || 0) - (Date.parse(a.last_session_at ?? "0") || 0));
-  return out;
-}
-function computeMetrics(
-  sessions: ReadonlyArray<DatasetSession>,
-  entityRows: ReadonlyArray<EntityCatalogRow>,
-) {
-  const now = Date.now();
-  const sparkline = [0, 0, 0, 0, 0, 0, 0];
-  let thisWeek = 0;
-  let lastWeek = 0;
-  for (const s of sessions) {
-    const t = s.started_at ? Date.parse(s.started_at) : NaN;
-    if (!Number.isFinite(t)) continue;
-    const ageDays = (now - t) / 86_400_000;
-    if (ageDays >= 0 && ageDays < 7) {
-      thisWeek += 1;
-      const bucket = Math.min(6, Math.floor(ageDays));
-      sparkline[6 - bucket] = (sparkline[6 - bucket] ?? 0) + 1;
-    } else if (ageDays >= 7 && ageDays < 14) {
-      lastWeek += 1;
-    }
-  }
-  const sessionsById = new Map(sessions.map((s) => [s.id, s]));
-  let healthy = 0;
-  let sparse = 0;
-  let stale = 0;
-  for (const e of entityRows) {
-    if (e.session_count === 0) continue;
-    const last = sessionsById.get(e.last_seen_session ?? "");
-    const lastT = last?.started_at ? Date.parse(last.started_at) : NaN;
-    const ageDays = Number.isFinite(lastT) ? (now - lastT) / 86_400_000 : 999;
-    if (ageDays > 30) stale += 1;
-    else if (e.session_count >= 3) healthy += 1;
-    else sparse += 1;
-  }
-  const closedDecisions = sessions.reduce(
-    (sum, s) => sum + (s.status === "superseded" ? 0 : s.decisions.length),
-    0,
-  );
-  return { this_week: thisWeek, last_week: lastWeek, sparkline, healthy, sparse, stale, closed_decisions: closedDecisions };
-}
-function computeStaleAlerts(
-  sessions: ReadonlyArray<DatasetSession>,
-  entityRows: ReadonlyArray<EntityCatalogRow>,
-  overlay: ActionOverlay,
-): DatasetResponse["alerts"] {
-  const now = Date.now();
-  const sessionsById = new Map(sessions.map((s) => [s.id, s]));
-  const alerts: DatasetResponse["alerts"][number][] = [];
-  for (const e of entityRows) {
-    if (e.session_count === 0 || e.status === "retired" || e.status === "snoozed") continue;
-    const last = sessionsById.get(e.last_seen_session ?? "");
-    const lastT = last?.started_at ? Date.parse(last.started_at) : NaN;
-    if (!Number.isFinite(lastT)) continue;
-    const ageDays = Math.floor((now - lastT) / 86_400_000);
-    if (ageDays <= 30) continue;
-    const alertId = `stale_${e.canonical.replace(/[^A-Za-z0-9]/g, "_")}`;
-    if (overlay.dismissedAlerts.has(alertId) || overlay.snoozedAlerts.has(alertId)) continue;
-    const openOnEntity = sessions
-      .filter((s) => s.entities.includes(e.canonical))
-      .flatMap((s) => s.open)
-      .slice(0, 2);
-    let summary = `Last touch ${ageDays} days ago`;
-    if (openOnEntity.length > 0) {
-      const n = openOnEntity.length;
-      const label = n === 1 ? "question" : "questions";
-      summary += ` · ${n} unresolved open ${label}: "${openOnEntity[0]!.slice(0, 80)}"`;
-    }
-    alerts.push({
-      id: alertId,
-      type: "stale",
-      severity: ageDays > 60 ? "high" : "medium",
-      entity: e.canonical,
-      summary,
-      sessions: last ? [last.id] : [],
-      age_days: ageDays,
-      last_touch_at: last?.started_at ?? null,
-    });
-  }
-  alerts.sort((a, b) => (a.severity === "high" ? 0 : 1) - (b.severity === "high" ? 0 : 1));
-  return alerts;
-}
-const HUES = [200, 270, 320, 30, 90, 150, 220, 290, 340, 50, 110, 170] as const;
-function stableColor(canonical: string): string {
-  let h = 0;
-  for (let i = 0; i < canonical.length; i++) h = (h * 31 + canonical.charCodeAt(i)) | 0;
-  const hue = HUES[Math.abs(h) % HUES.length] ?? 200;
-  return `hsl(${hue}, 60%, 55%)`;
-}