nlm-memory 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (257) hide show
  1. package/README.md +89 -34
  2. package/dist/cli/digest.d.ts +20 -0
  3. package/dist/cli/digest.js +142 -0
  4. package/dist/cli/digest.js.map +1 -0
  5. package/dist/cli/nlm.d.ts +1 -0
  6. package/dist/cli/nlm.js +25 -1
  7. package/dist/cli/nlm.js.map +1 -1
  8. package/dist/core/digest/compose.d.ts +38 -0
  9. package/dist/core/digest/compose.js +93 -0
  10. package/dist/core/digest/compose.js.map +1 -0
  11. package/dist/core/digest/hook-liveness.d.ts +32 -0
  12. package/dist/core/digest/hook-liveness.js +54 -0
  13. package/dist/core/digest/hook-liveness.js.map +1 -0
  14. package/dist/http/app.js +2 -1
  15. package/dist/http/app.js.map +1 -1
  16. package/dist/mcp/server.js +20 -1
  17. package/dist/mcp/server.js.map +1 -1
  18. package/dist/ui/assets/{index-C8cpwbYJ.css → index-Beo8psd-.css} +1 -1
  19. package/dist/ui/assets/{index-CB50QnL-.js → index-CSPTTeeM.js} +8 -8
  20. package/dist/ui/index.html +2 -2
  21. package/package.json +26 -1
  22. package/.agents/plugins/marketplace.json +0 -20
  23. package/.github/workflows/ci.yml +0 -30
  24. package/docs/methodology/re-derivation-rate.md +0 -112
  25. package/docs/methodology/useful-hit-rate.md +0 -79
  26. package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
  27. package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
  28. package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
  29. package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
  30. package/docs/plans/desktop-product.md +0 -69
  31. package/docs/plans/factstore-design.md +0 -236
  32. package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1575
  33. package/logs/CHANGELOG/CHANGELOG.md +0 -209
  34. package/migrations/000_initial_schema.sql +0 -174
  35. package/migrations/001_entity_type_rename.sql +0 -17
  36. package/migrations/002_adapter_state_extend.sql +0 -12
  37. package/migrations/003_session_embeddings.sql +0 -11
  38. package/migrations/004_facts.sql +0 -46
  39. package/migrations/005_sources.sql +0 -31
  40. package/migrations/006_providers.sql +0 -33
  41. package/migrations/007_source_tokens.sql +0 -17
  42. package/migrations/008_fts_rebuild.sql +0 -9
  43. package/migrations/009_session_embedding_chunks.sql +0 -46
  44. package/migrations/010_sources_opencode.sql +0 -30
  45. package/migrations/011_sources_hermes_agent.sql +0 -30
  46. package/migrations/012_sources_aider.sql +0 -30
  47. package/migrations/013_adapter_state_failure_count.sql +0 -12
  48. package/migrations/014_sources_cursor.sql +0 -30
  49. package/migrations/015_sources_windsurf.sql +0 -30
  50. package/plugin-hermes-agent/README.md +0 -49
  51. package/plugin-hermes-agent/__init__.py +0 -75
  52. package/plugin-hermes-agent/plugin.yaml +0 -15
  53. package/scripts/backfill-citations.mjs +0 -0
  54. package/scripts/build-codex-plugin.mjs +0 -61
  55. package/scripts/deepseek-probe.mjs +0 -67
  56. package/scripts/extract-triples.mjs +0 -207
  57. package/scripts/longmemeval/embedding-cache.ts +0 -77
  58. package/scripts/longmemeval/fetch-dataset.sh +0 -25
  59. package/scripts/longmemeval/run-harness.ts +0 -315
  60. package/scripts/longmemeval/scorer.ts +0 -99
  61. package/scripts/longmemeval/tsconfig.json +0 -9
  62. package/scripts/longmemeval/types.ts +0 -35
  63. package/scripts/nlm-daily-digest.py +0 -239
  64. package/scripts/nlm-daily-digest.sh +0 -28
  65. package/src/cli/classify-parity.ts +0 -257
  66. package/src/cli/launchctl-helpers.ts +0 -49
  67. package/src/cli/nlm.ts +0 -1078
  68. package/src/core/actions/actions-log.ts +0 -118
  69. package/src/core/actions/overlay.ts +0 -117
  70. package/src/core/adapters/aider.ts +0 -205
  71. package/src/core/adapters/claude-code.ts +0 -293
  72. package/src/core/adapters/common.ts +0 -54
  73. package/src/core/adapters/cursor.ts +0 -486
  74. package/src/core/adapters/from-source.ts +0 -67
  75. package/src/core/adapters/hermes-agent.ts +0 -240
  76. package/src/core/adapters/hermes.ts +0 -277
  77. package/src/core/adapters/jsonl-generic.ts +0 -208
  78. package/src/core/adapters/opencode.ts +0 -281
  79. package/src/core/adapters/pi.ts +0 -264
  80. package/src/core/adapters/windsurf.ts +0 -386
  81. package/src/core/classifier/prompt.ts +0 -200
  82. package/src/core/dataset/build-dataset.ts +0 -463
  83. package/src/core/embedding/chunk-body.ts +0 -76
  84. package/src/core/embedding/embed-backfill.ts +0 -210
  85. package/src/core/embedding/embed-normalize.ts +0 -135
  86. package/src/core/facts/backfill-facts.ts +0 -254
  87. package/src/core/facts/extract-facts.ts +0 -50
  88. package/src/core/hook/citation-detect.ts +0 -124
  89. package/src/core/hook/cite-memo.ts +0 -68
  90. package/src/core/hook/claude-settings.ts +0 -187
  91. package/src/core/hook/gate.ts +0 -25
  92. package/src/core/hook/hook-log.ts +0 -41
  93. package/src/core/hook/memo-sweep.ts +0 -164
  94. package/src/core/hook/memo.ts +0 -67
  95. package/src/core/hook/pointer-block.ts +0 -26
  96. package/src/core/hook/select.ts +0 -32
  97. package/src/core/hook/transcript.ts +0 -121
  98. package/src/core/ingest/ingest-session.ts +0 -111
  99. package/src/core/providers/provider-models.ts +0 -100
  100. package/src/core/providers/provider-registry.ts +0 -196
  101. package/src/core/recall/citation-log.ts +0 -108
  102. package/src/core/recall/filter.ts +0 -27
  103. package/src/core/recall/index.ts +0 -6
  104. package/src/core/recall/match-fields.ts +0 -40
  105. package/src/core/recall/query-log.ts +0 -149
  106. package/src/core/recall/query-shape.ts +0 -66
  107. package/src/core/recall/recall-service.ts +0 -320
  108. package/src/core/recall/recent-log.ts +0 -59
  109. package/src/core/recall/tokenize.ts +0 -18
  110. package/src/core/recall/useful-scan.ts +0 -336
  111. package/src/core/recall-facts/fact-query-log.ts +0 -150
  112. package/src/core/recall-facts/fact-recall-service.ts +0 -327
  113. package/src/core/scheduler/scan-once.ts +0 -142
  114. package/src/core/scheduler/scheduler.ts +0 -225
  115. package/src/core/sources/source-registry.ts +0 -278
  116. package/src/core/storage/db-restore.ts +0 -133
  117. package/src/core/storage/live-status.ts +0 -45
  118. package/src/core/storage/migrate.ts +0 -72
  119. package/src/core/storage/sqlite-fact-store.ts +0 -304
  120. package/src/core/storage/sqlite-session-store.ts +0 -810
  121. package/src/hook/hook-auth.ts +0 -18
  122. package/src/hook/prompt-recall-hook.ts +0 -180
  123. package/src/hook/session-end-hook.ts +0 -81
  124. package/src/hook/session-start-hook.ts +0 -168
  125. package/src/hook/stop-hook.ts +0 -239
  126. package/src/http/app.ts +0 -1215
  127. package/src/install/claude-code.ts +0 -128
  128. package/src/install/codex.ts +0 -367
  129. package/src/install/cursor.ts +0 -68
  130. package/src/install/hermes-agent.ts +0 -76
  131. package/src/install/hermes.ts +0 -78
  132. package/src/install/nlm-dir-perms.ts +0 -55
  133. package/src/install/ollama.ts +0 -284
  134. package/src/install/setup.ts +0 -489
  135. package/src/install/windsurf.ts +0 -68
  136. package/src/llm/classifier-box.ts +0 -64
  137. package/src/llm/deepseek-client.ts +0 -150
  138. package/src/llm/env-autoload.ts +0 -55
  139. package/src/llm/ollama-client.ts +0 -189
  140. package/src/mcp/server.ts +0 -534
  141. package/src/ports/fact-store.ts +0 -102
  142. package/src/ports/llm-client.ts +0 -52
  143. package/src/ports/logger.ts +0 -16
  144. package/src/ports/session-store.ts +0 -45
  145. package/src/ports/transcript-adapter.ts +0 -55
  146. package/src/shared/types.ts +0 -149
  147. package/src/ui/App.tsx +0 -58
  148. package/src/ui/components/PromoteOpenButton.tsx +0 -65
  149. package/src/ui/components/SessionDrawer.tsx +0 -199
  150. package/src/ui/components/SideNav.tsx +0 -162
  151. package/src/ui/components/Skeleton.tsx +0 -107
  152. package/src/ui/index.html +0 -13
  153. package/src/ui/lib/actions.ts +0 -30
  154. package/src/ui/lib/api.ts +0 -92
  155. package/src/ui/lib/dataset.ts +0 -141
  156. package/src/ui/lib/registries.ts +0 -155
  157. package/src/ui/lib/view-settings.ts +0 -41
  158. package/src/ui/main.tsx +0 -15
  159. package/src/ui/pages/Live.tsx +0 -229
  160. package/src/ui/pages/Pulse.tsx +0 -415
  161. package/src/ui/pages/Recall.tsx +0 -190
  162. package/src/ui/pages/River.tsx +0 -354
  163. package/src/ui/pages/Search.tsx +0 -386
  164. package/src/ui/pages/Stub.tsx +0 -9
  165. package/src/ui/pages/Thread.tsx +0 -473
  166. package/src/ui/pages/settings/Classifier.tsx +0 -227
  167. package/src/ui/pages/settings/Data.tsx +0 -190
  168. package/src/ui/pages/settings/Index.tsx +0 -65
  169. package/src/ui/pages/settings/Labels.tsx +0 -224
  170. package/src/ui/pages/settings/Providers.tsx +0 -305
  171. package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
  172. package/src/ui/pages/settings/Sources.tsx +0 -326
  173. package/src/ui/pages/settings/Views.tsx +0 -96
  174. package/src/ui/styles.css +0 -1890
  175. package/src/ui/tsconfig.json +0 -21
  176. package/src/ui/vite.config.ts +0 -19
  177. package/tests/fixtures/claude_code/short_session.jsonl +0 -2
  178. package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
  179. package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
  180. package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
  181. package/tests/fixtures/facts.ts +0 -17
  182. package/tests/fixtures/golden-corpus.ts +0 -85
  183. package/tests/fixtures/hermes/paired_request_dump.json +0 -24
  184. package/tests/fixtures/hermes/paired_session.json +0 -23
  185. package/tests/fixtures/hermes/request_dump.json +0 -28
  186. package/tests/fixtures/hermes/session_iso.json +0 -38
  187. package/tests/fixtures/hermes/session_unix.json +0 -38
  188. package/tests/fixtures/hermes/system_only.json +0 -18
  189. package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
  190. package/tests/fixtures/pi/short-successful.jsonl +0 -5
  191. package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
  192. package/tests/fixtures/sessions.ts +0 -22
  193. package/tests/integration/backfill-facts.test.ts +0 -362
  194. package/tests/integration/citation-explicit.test.ts +0 -111
  195. package/tests/integration/cite-event.test.ts +0 -169
  196. package/tests/integration/cite-memo.test.ts +0 -87
  197. package/tests/integration/db-restore.test.ts +0 -153
  198. package/tests/integration/embed-backfill.test.ts +0 -176
  199. package/tests/integration/fact-supersedence.test.ts +0 -313
  200. package/tests/integration/fts-index.test.ts +0 -60
  201. package/tests/integration/getbyids-sqlite.test.ts +0 -100
  202. package/tests/integration/hermes-agent-hooks.test.ts +0 -248
  203. package/tests/integration/hook-claude-settings.test.ts +0 -218
  204. package/tests/integration/hook-log.test.ts +0 -54
  205. package/tests/integration/hook-memo.test.ts +0 -68
  206. package/tests/integration/hook-pre-compact.test.ts +0 -105
  207. package/tests/integration/hook-subagent-start.test.ts +0 -102
  208. package/tests/integration/http.test.ts +0 -401
  209. package/tests/integration/keyword-search-fts.test.ts +0 -66
  210. package/tests/integration/mcp-recall-logging.test.ts +0 -88
  211. package/tests/integration/mcp.test.ts +0 -260
  212. package/tests/integration/memo-sweep.test.ts +0 -91
  213. package/tests/integration/prompt-recall-hook.test.ts +0 -88
  214. package/tests/integration/provider-registry.test.ts +0 -107
  215. package/tests/integration/recall-golden.test.ts +0 -59
  216. package/tests/integration/recall-sqlite.test.ts +0 -169
  217. package/tests/integration/scheduler.test.ts +0 -391
  218. package/tests/integration/session-end-hook.test.ts +0 -48
  219. package/tests/integration/session-start-hook.test.ts +0 -126
  220. package/tests/integration/source-registry.test.ts +0 -122
  221. package/tests/integration/sqlite-fact-store.test.ts +0 -346
  222. package/tests/integration/stop-hook.test.ts +0 -560
  223. package/tests/integration/wal-checkpoint.test.ts +0 -49
  224. package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
  225. package/tests/unit/core/adapters/aider.test.ts +0 -230
  226. package/tests/unit/core/adapters/claude-code.test.ts +0 -118
  227. package/tests/unit/core/adapters/cursor.test.ts +0 -485
  228. package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
  229. package/tests/unit/core/adapters/hermes.test.ts +0 -81
  230. package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
  231. package/tests/unit/core/adapters/opencode.test.ts +0 -354
  232. package/tests/unit/core/adapters/pi.test.ts +0 -110
  233. package/tests/unit/core/adapters/windsurf.test.ts +0 -416
  234. package/tests/unit/core/classifier/prompt.test.ts +0 -126
  235. package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
  236. package/tests/unit/core/facts/extract-facts.test.ts +0 -117
  237. package/tests/unit/core/filter.test.ts +0 -40
  238. package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
  239. package/tests/unit/core/hook/citation-detect.test.ts +0 -124
  240. package/tests/unit/core/hook/gate.test.ts +0 -29
  241. package/tests/unit/core/hook/pointer-block.test.ts +0 -22
  242. package/tests/unit/core/hook/select.test.ts +0 -66
  243. package/tests/unit/core/match-fields.test.ts +0 -39
  244. package/tests/unit/core/mcp-cite-session.test.ts +0 -51
  245. package/tests/unit/core/providers/provider-models.test.ts +0 -101
  246. package/tests/unit/core/query-shape.test.ts +0 -92
  247. package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
  248. package/tests/unit/core/recall-service.test.ts +0 -200
  249. package/tests/unit/core/storage/live-status.test.ts +0 -54
  250. package/tests/unit/core/tokenize.test.ts +0 -32
  251. package/tests/unit/core/useful-scan.test.ts +0 -537
  252. package/tests/unit/llm/embed.test.ts +0 -93
  253. package/tests/unit/llm/ollama-client.test.ts +0 -124
  254. package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
  255. package/tsconfig.json +0 -31
  256. package/tsconfig.test.json +0 -11
  257. package/vitest.config.ts +0 -22
@@ -1,200 +0,0 @@
1
- /**
2
- * Classifier prompt + transcript helpers. Centralized so every LLMClient
3
- * implementation hits the same prompt (parity with the Python daemon).
4
- *
5
- * Hard cap at 15K chars matches `classifier.py` MAX_TRANSCRIPT_CHARS:
6
- * smaller models (phi4-mini, qwen) pattern-match JSON from the transcript
7
- * above that size. Long sessions get first-half + last-half with a
8
- * separator to preserve opening intent + closing decisions.
9
- *
10
- * Phase B.2: prompt now also asks for a `facts` array of normalized
11
- * (subject, predicate, value) triples for the FactStore. The closed
12
- * predicate vocabulary is embedded in the prompt so deterministic
13
- * supersedence (Phase B.4) actually catches collisions instead of
14
- * fragmenting on synonymous predicates. See docs/plans/factstore-design.md.
15
- */
16
-
17
- /**
18
- * Closed predicate vocabulary. Approximately 25 high-leverage predicates
19
- * covering the most common (subject, predicate, value) shapes Edward
20
- * actually writes about in sessions.
21
- *
22
- * Vocab evolution (Phase B.5 backfill, 2026-05-19): the 168-session pilot
23
- * showed `other` getting used 43% of the time — it became a catch-all for
24
- * narrative observations that don't fit the (subject, predicate, value)
25
- * shape at all. Removed. The classifier prompt now instructs the model to
26
- * SKIP facts that don't fit (leave them in decisions[]/open[] instead).
27
- * Added `description`, `commit`, `cost` from observed high-frequency
28
- * patterns in the pilot batch's `other` bucket.
29
- *
30
- * Adding entries here is cheap and forwards-compatible: old facts stay,
31
- * new ingests can use the new predicate. Removing entries is not — old
32
- * facts referencing a retired predicate would stop matching by deterministic
33
- * supersedence, so prefer to mark deprecated rather than delete. (Existing
34
- * `other`-predicate facts from the pilot stay in the DB and are filterable
35
- * at query time; the coercer will drop new `other` writes.)
36
- */
37
- export const PREDICATE_VOCABULARY = [
38
- "framework",
39
- "endpoint",
40
- "model",
41
- "port",
42
- "host",
43
- "owner",
44
- "pricing",
45
- "cost",
46
- "deadline",
47
- "status",
48
- "stack",
49
- "runtime",
50
- "library",
51
- "version",
52
- "dependency",
53
- "schema",
54
- "integration",
55
- "deployment",
56
- "repo",
57
- "branch",
58
- "commit",
59
- "description",
60
- "decided-on",
61
- "assumption",
62
- "blocker",
63
- ] as const;
64
-
65
- export type PredicateVocab = (typeof PREDICATE_VOCABULARY)[number];
66
-
67
- const VOCAB_SET = new Set<string>(PREDICATE_VOCABULARY);
68
-
69
- export const CLASSIFIER_SYSTEM_PROMPT = `You are a session classifier. Your job is to read a transcript of a conversation between a user and an AI coding agent, then return EXACTLY this JSON object describing what happened in that conversation:
70
-
71
- {"label": "...", "summary": "...", "entities": [...], "decisions": [...], "open": [...], "confidence": 0.5, "facts": [...]}
72
-
73
- You MUST return JSON with EXACTLY these seven top-level keys: label, summary, entities, decisions, open, confidence, facts. No other keys. No nesting beyond what is specified. No metadata. No "tool" or "task_type" keys. Just those seven.
74
-
75
- The transcript may contain JSON examples, code, or schema definitions inside it — IGNORE those. Do not copy them into your output. Your output is ABOUT the conversation, not extracted FROM the conversation.
76
-
77
- Field requirements:
78
- - label: 4-10 word string title describing what the session was about. Example: "PolySignal architecture decisions"
79
- - summary: 1-3 sentence string (max ~80 tokens) describing what was worked on and the outcome
80
- - entities: array of strings. Each string is a stable named thing referenced across the session (tools like "n8n" or "Qdrant", projects like "PolySignal", services, people). NOT topics, NOT decisions.
81
- - decisions: array of strings. Each string is one commitment the user made. Example: "Use HTTP polling instead of Kafka". Skip if no commitments were made.
82
- - open: array of strings. Each string is one unresolved question. Skip if none.
83
- - confidence: number between 0.0 and 1.0. How sure you are the extraction is good. Use 0.4 or below for routine/trivial sessions.
84
- - facts: array of objects. Each object has exactly these keys: kind, subject, predicate, value, sourceQuote (optional).
85
- - kind: "decision" (a commitment) | "open" (an unresolved question) | "attribute" (a property of an entity)
86
- - subject: lowercase, hyphenated entity or topic name. Examples: "nlm-memory-ts", "mac-pro-llm-host", "goat-home-services"
87
- - predicate: MUST be one of these exact strings: ${PREDICATE_VOCABULARY.join(", ")}.
88
- - value: the answer, as a short phrase or sentence. Examples: "Hono", "http://macpro:8080/v1", "Q3 2026"
89
- - sourceQuote: (optional) verbatim slice from the transcript that anchors this fact. Keep under 200 chars.
90
-
91
- The predicate list is CLOSED — there is no "other" or catch-all. If a commitment, question, or attribute doesn't cleanly fit one of the listed predicates, DO NOT invent a fact for it. Put it in decisions[] or open[] as a string instead. Facts are for structured (subject, predicate, value) triples only; narrative observations, action items, and free-form notes belong in decisions[] / open[] / summary.
92
-
93
- Facts overlap with decisions and open: the same commitment can appear both as a string in decisions[] AND as a structured object in facts[] with kind="decision", IF and ONLY IF it fits the closed predicate list. Skip the fact (keep just the string in decisions[]) when no predicate fits.
94
-
95
- Predicate disambiguation (these confuse models, follow exactly):
96
- - pricing vs cost: pricing = what someone else charges ("$299/month for Real Geeks", "free tier"). cost = what we pay or spent ("$0 per run on local Ollama", "$750 invoice"). Never use pricing for colors, dimensions, or anything not a price.
97
- - commit vs version: commit = git SHA (7+ hex chars, e.g. "cb5b940", "63596c3"). version = semver / release tag ("v4", "DSM 7.2.2", "Postgres 15", "0.3.6"). Use commit for any explicit git reference even if short-form.
98
- - description vs status: description = what a thing IS ("rich text editor framework by Meta"). status = what state it's in right now ("running via pm2", "not yet started", "blocked on review").
99
-
100
- Return ONLY the JSON object. No markdown code fences. No prose before or after.`;
101
-
102
- export const MAX_TRANSCRIPT_CHARS = 15_000;
103
-
104
- export function truncateTranscript(text: string, maxChars: number = MAX_TRANSCRIPT_CHARS): string {
105
- if (text.length <= maxChars) return text;
106
- const half = Math.floor((maxChars - 80) / 2);
107
- return (
108
- text.slice(0, half) +
109
- "\n\n[... transcript truncated; below is the closing portion ...]\n\n" +
110
- text.slice(text.length - half)
111
- );
112
- }
113
-
114
- const FENCE_RE = /^```(?:json)?\s*|\s*```$/gm;
115
-
116
- export function stripJsonFences(text: string): string {
117
- return text.replace(FENCE_RE, "").trim();
118
- }
119
-
120
- const REQUIRED_KEYS = ["label", "summary", "entities", "decisions", "open", "confidence"] as const;
121
-
122
- export function validateClassifierJson(data: unknown): data is Record<string, unknown> {
123
- if (!data || typeof data !== "object" || Array.isArray(data)) return false;
124
- const obj = data as Record<string, unknown>;
125
- // `facts` is not in REQUIRED_KEYS — Phase B.2 accepts classifier output
126
- // without it (older models, fixtures from Phase E parity tests). Coerced
127
- // to [] when absent.
128
- return REQUIRED_KEYS.every((k) => k in obj);
129
- }
130
-
131
- export function buildUserPrompt(transcript: string, priorContext: string): string {
132
- const truncated = truncateTranscript(transcript);
133
- const parts: string[] = [];
134
- if (priorContext) parts.push(`PRIOR CONTEXT (already filed):\n${priorContext}\n`);
135
- parts.push(`TRANSCRIPT TO CLASSIFY:\n${truncated}`);
136
- return parts.join("\n");
137
- }
138
-
139
- interface CoercedFact {
140
- kind: "decision" | "open" | "attribute";
141
- subject: string;
142
- predicate: string;
143
- value: string;
144
- sourceQuote?: string;
145
- }
146
-
147
- function coerceFacts(raw: unknown): CoercedFact[] {
148
- if (!Array.isArray(raw)) return [];
149
- const out: CoercedFact[] = [];
150
- for (const item of raw) {
151
- if (!item || typeof item !== "object" || Array.isArray(item)) continue;
152
- const o = item as Record<string, unknown>;
153
- const kindRaw = String(o["kind"] ?? "").toLowerCase().trim();
154
- if (kindRaw !== "decision" && kindRaw !== "open" && kindRaw !== "attribute") continue;
155
- const subject = String(o["subject"] ?? "").toLowerCase().trim();
156
- const predicateRaw = String(o["predicate"] ?? "").toLowerCase().trim();
157
- const value = String(o["value"] ?? "").trim();
158
- if (!subject || !predicateRaw || !value) continue;
159
- // Closed vocab — drop the fact entirely if the predicate isn't recognized.
160
- // Pilot data (Phase B.5) showed `other` was 43% of writes and almost all
161
- // slop; the prompt now instructs the model to leave such observations in
162
- // decisions[]/open[] strings. This coercer enforces the policy
163
- // defensively in case the model emits an off-vocab predicate anyway.
164
- if (!VOCAB_SET.has(predicateRaw)) continue;
165
- const predicate = predicateRaw;
166
- const sourceQuoteRaw = o["sourceQuote"];
167
- const sourceQuote =
168
- typeof sourceQuoteRaw === "string" && sourceQuoteRaw.trim().length > 0
169
- ? sourceQuoteRaw.trim().slice(0, 500)
170
- : undefined;
171
- const fact: CoercedFact = { kind: kindRaw, subject, predicate, value };
172
- if (sourceQuote !== undefined) fact.sourceQuote = sourceQuote;
173
- out.push(fact);
174
- }
175
- return out;
176
- }
177
-
178
- export function coerceClassifyResult(data: Record<string, unknown>): {
179
- label: string;
180
- summary: string;
181
- entities: string[];
182
- decisions: string[];
183
- open: string[];
184
- confidence: number;
185
- facts: CoercedFact[];
186
- } {
187
- const strArray = (v: unknown): string[] => {
188
- if (!Array.isArray(v)) return [];
189
- return v.map((x) => String(x).trim()).filter((s) => s.length > 0);
190
- };
191
- const label = String(data["label"] ?? "").trim().slice(0, 120) || "Untitled";
192
- const summary = String(data["summary"] ?? "").trim();
193
- const entities = strArray(data["entities"]);
194
- const decisions = strArray(data["decisions"]);
195
- const open = strArray(data["open"]);
196
- const conf = Number(data["confidence"] ?? 0.5);
197
- const confidence = Number.isFinite(conf) ? conf : 0.5;
198
- const facts = coerceFacts(data["facts"]);
199
- return { label, summary, entities, decisions, open, confidence, facts };
200
- }
@@ -1,463 +0,0 @@
1
- /**
2
- * buildDataset — read projection over canonical.sqlite that hydrates every
3
- * UI page (pulse, river, search, thread).
4
- *
5
- * Ports the read paths of `dataset.py`. Action-driven overlays (dismissed
6
- * alerts, snoozed entities, retired labels, merged variants) are deferred:
7
- * the action log isn't yet exposed by the TS daemon. Returns persisted
8
- * state directly.
9
- */
10
-
11
- import { existsSync } from "node:fs";
12
- import Database from "better-sqlite3";
13
- import * as sqliteVec from "sqlite-vec";
14
- import { liveSessionStatus } from "@core/storage/live-status.js";
15
- import { loadActionOverlay, openQuestionId } from "@core/actions/overlay.js";
16
- import type { ActionOverlay } from "@core/actions/overlay.js";
17
- import type { SessionStatus } from "@shared/types.js";
18
-
19
- export interface DatasetSession {
20
- readonly id: string;
21
- readonly date: string;
22
- readonly started_at: string | null;
23
- readonly ended_at: string | null;
24
- readonly label: string;
25
- readonly summary: string;
26
- readonly entities: ReadonlyArray<string>;
27
- readonly decisions: ReadonlyArray<string>;
28
- readonly open: ReadonlyArray<string>;
29
- readonly open_questions: ReadonlyArray<{ id: string; text: string; resolved: false }>;
30
- readonly status: SessionStatus;
31
- readonly duration_min: number;
32
- readonly runtime: string;
33
- readonly supersedes?: string;
34
- readonly superseded_by?: string;
35
- }
36
-
37
- export interface DatasetEntity {
38
- readonly canonical: string;
39
- readonly type: string;
40
- readonly status: string;
41
- readonly session_count: number;
42
- readonly last_seen_session: string | null;
43
- }
44
-
45
- export interface DatasetResponse {
46
- readonly meta: {
47
- readonly last_sync: string;
48
- readonly sessions_total: number;
49
- readonly entities_total: number;
50
- readonly db_present: boolean;
51
- readonly db_path: string;
52
- };
53
- readonly sessions: ReadonlyArray<DatasetSession>;
54
- readonly entities: ReadonlyArray<DatasetEntity>;
55
- readonly entity_colors: Record<string, string>;
56
- readonly entity_type: Record<string, string>;
57
- readonly entity_status: Record<string, string>;
58
- readonly metrics: {
59
- readonly this_week: number;
60
- readonly last_week: number;
61
- readonly sparkline: ReadonlyArray<number>;
62
- readonly healthy: number;
63
- readonly sparse: number;
64
- readonly stale: number;
65
- readonly closed_decisions: number;
66
- };
67
- readonly alerts: ReadonlyArray<{
68
- readonly id: string;
69
- readonly type: "stale";
70
- readonly severity: "high" | "medium";
71
- readonly entity: string;
72
- readonly summary: string;
73
- readonly sessions: ReadonlyArray<string>;
74
- readonly age_days: number;
75
- readonly last_touch_at: string | null;
76
- }>;
77
- readonly runtimes: ReadonlyArray<DatasetRuntime>;
78
- }
79
-
80
- export interface DatasetRuntime {
81
- readonly name: string;
82
- readonly status: "active" | "idle" | "dormant";
83
- readonly sessions_total: number;
84
- readonly this_week: number;
85
- readonly last_week: number;
86
- readonly last_session_at: string | null;
87
- }
88
-
89
- interface SessionRow {
90
- id: string;
91
- started_at: string | null;
92
- ended_at: string | null;
93
- duration_min: number | null;
94
- label: string;
95
- summary: string;
96
- status: "active" | "closed" | "superseded";
97
- transcript_path: string | null;
98
- runtime: string;
99
- }
100
-
101
- interface EntityRow {
102
- session_id: string;
103
- entity_canonical: string;
104
- }
105
-
106
- interface MarkerRow {
107
- session_id: string;
108
- kind: "decision" | "open";
109
- text: string;
110
- position: number;
111
- }
112
-
113
- interface EdgeRow {
114
- from_session: string;
115
- to_session: string;
116
- kind: "supersedes" | "continues";
117
- }
118
-
119
- interface EntityCatalogRow {
120
- canonical: string;
121
- type: string;
122
- status: string;
123
- session_count: number;
124
- last_seen_session: string | null;
125
- }
126
-
127
- const EMPTY_DATASET = (dbPath: string, present: boolean): DatasetResponse => ({
128
- meta: {
129
- last_sync: new Date().toISOString(),
130
- sessions_total: 0,
131
- entities_total: 0,
132
- db_present: present,
133
- db_path: dbPath,
134
- },
135
- sessions: [],
136
- entities: [],
137
- entity_colors: {},
138
- entity_type: {},
139
- entity_status: {},
140
- metrics: { this_week: 0, last_week: 0, sparkline: [0, 0, 0, 0, 0, 0, 0], healthy: 0, sparse: 0, stale: 0, closed_decisions: 0 },
141
- alerts: [],
142
- runtimes: [],
143
- });
144
-
145
- export interface BuildDatasetOptions {
146
- /** Include path-shaped entities (filesystem leaks from the classifier).
147
- * Default false — they pollute the catalog without adding signal. */
148
- readonly includePaths?: boolean;
149
- }
150
-
151
- export function buildDataset(dbPath: string, options: BuildDatasetOptions = {}): DatasetResponse {
152
- if (!existsSync(dbPath)) return EMPTY_DATASET(dbPath, false);
153
- const db = new Database(dbPath, { readonly: true });
154
- try {
155
- sqliteVec.load(db);
156
- } catch {
157
- // vec extension only required for semantic search; tolerable here.
158
- }
159
- try {
160
- return projectFromDb(db, dbPath, options.includePaths ?? false);
161
- } finally {
162
- db.close();
163
- }
164
- }
165
-
166
- /**
167
- * Heuristic for "this entity is actually a filesystem path the classifier
168
- * leaked into the catalog". Catches things like ".claude/agents/",
169
- * "bridge/server.js", "deploy.sh", "nlm-memory-spec.md" while leaving
170
- * real entities like "n8n", "Node.js", "NocoDB", "Whtnxt Agent" alone.
171
- */
172
- const CODE_FILE_EXT_RE =
173
- /\.(?:md|markdown|txt|ts|tsx|js|jsx|mjs|cjs|py|pyi|json|yaml|yml|toml|sh|bash|zsh|css|html|sql|xml|env|ini|cfg|conf|lock)$/i;
174
-
175
- export function isPathShapedEntity(canonical: string): boolean {
176
- if (!canonical) return false;
177
- // Any slash → looks like a path (forward or back).
178
- if (canonical.includes("/") || canonical.includes("\\")) return true;
179
- // Hidden-file prefix only when it's clearly a dotfile (e.g. ".env", ".mcp.json").
180
- if (canonical.startsWith(".") && canonical.length > 1 && canonical !== "...") return true;
181
- // Common source-code file extensions.
182
- if (CODE_FILE_EXT_RE.test(canonical)) return true;
183
- return false;
184
- }
185
-
186
- function projectFromDb(db: Database.Database, dbPath: string, includePaths: boolean): DatasetResponse {
187
- const sessionRows = db
188
- .prepare<[], SessionRow>(`
189
- SELECT id, started_at, ended_at, duration_min, label, summary,
190
- status, transcript_path, runtime
191
- FROM sessions
192
- ORDER BY started_at ASC
193
- `)
194
- .all();
195
-
196
- if (sessionRows.length === 0) return EMPTY_DATASET(dbPath, true);
197
-
198
- const entitiesBySession = new Map<string, string[]>();
199
- for (const r of db
200
- .prepare<[], EntityRow>("SELECT session_id, entity_canonical FROM session_entities ORDER BY session_id")
201
- .all()) {
202
- const list = entitiesBySession.get(r.session_id);
203
- if (list) list.push(r.entity_canonical);
204
- else entitiesBySession.set(r.session_id, [r.entity_canonical]);
205
- }
206
-
207
- const decisionsBySession = new Map<string, string[]>();
208
- const openBySession = new Map<string, { id: string; text: string }[]>();
209
- for (const r of db
210
- .prepare<[], MarkerRow>("SELECT session_id, kind, text, position FROM markers ORDER BY session_id, position")
211
- .all()) {
212
- if (r.kind === "decision") {
213
- const list = decisionsBySession.get(r.session_id);
214
- if (list) list.push(r.text);
215
- else decisionsBySession.set(r.session_id, [r.text]);
216
- } else {
217
- const id = openQuestionId(r.session_id, r.text);
218
- const list = openBySession.get(r.session_id);
219
- if (list) list.push({ id, text: r.text });
220
- else openBySession.set(r.session_id, [{ id, text: r.text }]);
221
- }
222
- }
223
-
224
- const supersedesBy = new Map<string, string>();
225
- const supersededByBy = new Map<string, string>();
226
- const continuesBy = new Map<string, string>();
227
- for (const r of db
228
- .prepare<[], EdgeRow>("SELECT from_session, to_session, kind FROM session_edges")
229
- .all()) {
230
- if (r.kind === "supersedes") {
231
- supersedesBy.set(r.from_session, r.to_session);
232
- supersededByBy.set(r.to_session, r.from_session);
233
- } else if (r.kind === "continues") {
234
- continuesBy.set(r.from_session, r.to_session);
235
- }
236
- }
237
-
238
- const allEntityRows = db
239
- .prepare<[], EntityCatalogRow>(`
240
- SELECT canonical, type, status, session_count, last_seen_session
241
- FROM entities ORDER BY session_count DESC
242
- `)
243
- .all();
244
-
245
- const overlay = loadActionOverlay(db);
246
- for (const e of allEntityRows) {
247
- if (overlay.retiredEntities.has(e.canonical)) e.status = "retired";
248
- else if (overlay.snoozedEntities.has(e.canonical)) e.status = "snoozed";
249
- const newType = overlay.labeledEntities.get(e.canonical);
250
- if (newType) e.type = newType;
251
- }
252
-
253
- const entityRows = includePaths
254
- ? allEntityRows
255
- : allEntityRows.filter((e) => !isPathShapedEntity(e.canonical));
256
- const keptEntities = new Set(entityRows.map((e) => e.canonical));
257
-
258
- const sessions: DatasetSession[] = sessionRows.map((s) => {
259
- const status = liveSessionStatus(s.transcript_path, s.status);
260
- const rawOpen = openBySession.get(s.id) ?? [];
261
- const supersedes = supersedesBy.get(s.id);
262
- const supersededBy = supersededByBy.get(s.id);
263
- const rawEntities = entitiesBySession.get(s.id) ?? [];
264
- const activeOpen = rawOpen.filter(
265
- (o) => !overlay.resolvedOpens.has(o.id) && !overlay.promotedOpens.has(o.id),
266
- );
267
- const promotedDecisions = rawOpen
268
- .filter((o) => overlay.promotedOpens.has(o.id))
269
- .map((o) => overlay.promotedOpens.get(o.id)!);
270
- return {
271
- id: s.id,
272
- date: (s.started_at ?? "").slice(0, 10),
273
- started_at: s.started_at,
274
- ended_at: s.ended_at,
275
- label: s.label,
276
- summary: s.summary,
277
- entities: includePaths ? rawEntities : rawEntities.filter((name) => keptEntities.has(name)),
278
- decisions: [...(decisionsBySession.get(s.id) ?? []), ...promotedDecisions],
279
- open: activeOpen.map((o) => o.text),
280
- open_questions: activeOpen.map((o) => ({ id: o.id, text: o.text, resolved: false as const })),
281
- status,
282
- duration_min: s.duration_min ?? 0,
283
- runtime: s.runtime,
284
- ...(supersedes !== undefined ? { supersedes } : {}),
285
- ...(supersededBy !== undefined ? { superseded_by: supersededBy } : {}),
286
- };
287
- });
288
-
289
- // continuesBy is in the dataset shape but unused by current UI; reserved for thread view.
290
- void continuesBy;
291
-
292
- const entityColors: Record<string, string> = {};
293
- const entityType: Record<string, string> = {};
294
- const entityStatus: Record<string, string> = {};
295
- for (const e of entityRows) {
296
- entityColors[e.canonical] = stableColor(e.canonical);
297
- entityType[e.canonical] = e.type;
298
- entityStatus[e.canonical] = e.status;
299
- }
300
-
301
- const metrics = computeMetrics(sessions, entityRows);
302
- const alerts = computeStaleAlerts(sessions, entityRows, overlay);
303
- const runtimes = computeRuntimes(sessions);
304
-
305
- return {
306
- meta: {
307
- last_sync: new Date().toISOString(),
308
- sessions_total: sessions.length,
309
- entities_total: entityRows.length,
310
- db_present: true,
311
- db_path: dbPath,
312
- },
313
- sessions,
314
- entities: entityRows,
315
- entity_colors: entityColors,
316
- entity_type: entityType,
317
- entity_status: entityStatus,
318
- metrics,
319
- alerts,
320
- runtimes,
321
- };
322
- }
323
-
324
- function computeRuntimes(sessions: ReadonlyArray<DatasetSession>): DatasetRuntime[] {
325
- const now = Date.now();
326
- const day = 86_400_000;
327
- const week = now - 7 * day;
328
- const prev = now - 14 * day;
329
- const groups = new Map<string, {
330
- total: number;
331
- thisWeek: number;
332
- lastWeek: number;
333
- lastAt: number;
334
- lastAtIso: string | null;
335
- }>();
336
- for (const s of sessions) {
337
- const name = (s.runtime ?? "").trim() || "unknown";
338
- const g = groups.get(name) ?? { total: 0, thisWeek: 0, lastWeek: 0, lastAt: 0, lastAtIso: null };
339
- g.total += 1;
340
- if (s.started_at) {
341
- const t = Date.parse(s.started_at);
342
- if (Number.isFinite(t)) {
343
- if (t >= week) g.thisWeek += 1;
344
- else if (t >= prev) g.lastWeek += 1;
345
- if (t > g.lastAt) {
346
- g.lastAt = t;
347
- g.lastAtIso = s.started_at;
348
- }
349
- }
350
- }
351
- groups.set(name, g);
352
- }
353
- const hour = 3_600_000;
354
- const out: DatasetRuntime[] = [];
355
- for (const [name, g] of groups) {
356
- const age = g.lastAt ? now - g.lastAt : Infinity;
357
- const status: DatasetRuntime["status"] =
358
- age <= hour ? "active" : age <= day ? "idle" : "dormant";
359
- out.push({
360
- name,
361
- status,
362
- sessions_total: g.total,
363
- this_week: g.thisWeek,
364
- last_week: g.lastWeek,
365
- last_session_at: g.lastAtIso,
366
- });
367
- }
368
- out.sort((a, b) => (Date.parse(b.last_session_at ?? "0") || 0) - (Date.parse(a.last_session_at ?? "0") || 0));
369
- return out;
370
- }
371
-
372
- function computeMetrics(
373
- sessions: ReadonlyArray<DatasetSession>,
374
- entityRows: ReadonlyArray<EntityCatalogRow>,
375
- ) {
376
- const now = Date.now();
377
- const sparkline = [0, 0, 0, 0, 0, 0, 0];
378
- let thisWeek = 0;
379
- let lastWeek = 0;
380
- for (const s of sessions) {
381
- const t = s.started_at ? Date.parse(s.started_at) : NaN;
382
- if (!Number.isFinite(t)) continue;
383
- const ageDays = (now - t) / 86_400_000;
384
- if (ageDays >= 0 && ageDays < 7) {
385
- thisWeek += 1;
386
- const bucket = Math.min(6, Math.floor(ageDays));
387
- sparkline[6 - bucket] = (sparkline[6 - bucket] ?? 0) + 1;
388
- } else if (ageDays >= 7 && ageDays < 14) {
389
- lastWeek += 1;
390
- }
391
- }
392
- const sessionsById = new Map(sessions.map((s) => [s.id, s]));
393
- let healthy = 0;
394
- let sparse = 0;
395
- let stale = 0;
396
- for (const e of entityRows) {
397
- if (e.session_count === 0) continue;
398
- const last = sessionsById.get(e.last_seen_session ?? "");
399
- const lastT = last?.started_at ? Date.parse(last.started_at) : NaN;
400
- const ageDays = Number.isFinite(lastT) ? (now - lastT) / 86_400_000 : 999;
401
- if (ageDays > 30) stale += 1;
402
- else if (e.session_count >= 3) healthy += 1;
403
- else sparse += 1;
404
- }
405
- const closedDecisions = sessions.reduce(
406
- (sum, s) => sum + (s.status === "superseded" ? 0 : s.decisions.length),
407
- 0,
408
- );
409
- return { this_week: thisWeek, last_week: lastWeek, sparkline, healthy, sparse, stale, closed_decisions: closedDecisions };
410
- }
411
-
412
- function computeStaleAlerts(
413
- sessions: ReadonlyArray<DatasetSession>,
414
- entityRows: ReadonlyArray<EntityCatalogRow>,
415
- overlay: ActionOverlay,
416
- ): DatasetResponse["alerts"] {
417
- const now = Date.now();
418
- const sessionsById = new Map(sessions.map((s) => [s.id, s]));
419
- const alerts: DatasetResponse["alerts"][number][] = [];
420
- for (const e of entityRows) {
421
- if (e.session_count === 0 || e.status === "retired" || e.status === "snoozed") continue;
422
- const last = sessionsById.get(e.last_seen_session ?? "");
423
- const lastT = last?.started_at ? Date.parse(last.started_at) : NaN;
424
- if (!Number.isFinite(lastT)) continue;
425
- const ageDays = Math.floor((now - lastT) / 86_400_000);
426
- if (ageDays <= 30) continue;
427
-
428
- const alertId = `stale_${e.canonical.replace(/[^A-Za-z0-9]/g, "_")}`;
429
- if (overlay.dismissedAlerts.has(alertId) || overlay.snoozedAlerts.has(alertId)) continue;
430
-
431
- const openOnEntity = sessions
432
- .filter((s) => s.entities.includes(e.canonical))
433
- .flatMap((s) => s.open)
434
- .slice(0, 2);
435
- let summary = `Last touch ${ageDays} days ago`;
436
- if (openOnEntity.length > 0) {
437
- const n = openOnEntity.length;
438
- const label = n === 1 ? "question" : "questions";
439
- summary += ` · ${n} unresolved open ${label}: "${openOnEntity[0]!.slice(0, 80)}"`;
440
- }
441
- alerts.push({
442
- id: alertId,
443
- type: "stale",
444
- severity: ageDays > 60 ? "high" : "medium",
445
- entity: e.canonical,
446
- summary,
447
- sessions: last ? [last.id] : [],
448
- age_days: ageDays,
449
- last_touch_at: last?.started_at ?? null,
450
- });
451
- }
452
- alerts.sort((a, b) => (a.severity === "high" ? 0 : 1) - (b.severity === "high" ? 0 : 1));
453
- return alerts;
454
- }
455
-
456
- const HUES = [200, 270, 320, 30, 90, 150, 220, 290, 340, 50, 110, 170] as const;
457
-
458
- function stableColor(canonical: string): string {
459
- let h = 0;
460
- for (let i = 0; i < canonical.length; i++) h = (h * 31 + canonical.charCodeAt(i)) | 0;
461
- const hue = HUES[Math.abs(h) % HUES.length] ?? 200;
462
- return `hsl(${hue}, 60%, 55%)`;
463
- }