nlm-memory 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/README.md +72 -34
  2. package/dist/cli/nlm.js +223 -33
  3. package/dist/cli/nlm.js.map +1 -1
  4. package/dist/core/adapters/cursor.d.ts +45 -0
  5. package/dist/core/adapters/cursor.js +397 -0
  6. package/dist/core/adapters/cursor.js.map +1 -0
  7. package/dist/core/adapters/from-source.js +10 -0
  8. package/dist/core/adapters/from-source.js.map +1 -1
  9. package/dist/core/adapters/windsurf.d.ts +44 -0
  10. package/dist/core/adapters/windsurf.js +299 -0
  11. package/dist/core/adapters/windsurf.js.map +1 -0
  12. package/dist/core/hook/claude-settings.d.ts +12 -5
  13. package/dist/core/hook/claude-settings.js +21 -6
  14. package/dist/core/hook/claude-settings.js.map +1 -1
  15. package/dist/core/sources/source-registry.d.ts +1 -1
  16. package/dist/core/sources/source-registry.js +18 -0
  17. package/dist/core/sources/source-registry.js.map +1 -1
  18. package/dist/core/storage/sqlite-session-store.d.ts +2 -0
  19. package/dist/core/storage/sqlite-session-store.js +38 -2
  20. package/dist/core/storage/sqlite-session-store.js.map +1 -1
  21. package/dist/hook/hook-auth.d.ts +13 -0
  22. package/dist/hook/hook-auth.js +19 -0
  23. package/dist/hook/hook-auth.js.map +1 -0
  24. package/dist/hook/prompt-recall-hook.js +7 -1
  25. package/dist/hook/prompt-recall-hook.js.map +1 -1
  26. package/dist/hook/session-start-hook.js +4 -1
  27. package/dist/hook/session-start-hook.js.map +1 -1
  28. package/dist/hook/stop-hook.js +4 -1
  29. package/dist/hook/stop-hook.js.map +1 -1
  30. package/dist/http/app.d.ts +2 -0
  31. package/dist/http/app.js +76 -1
  32. package/dist/http/app.js.map +1 -1
  33. package/dist/install/claude-code.js +1 -1
  34. package/dist/install/claude-code.js.map +1 -1
  35. package/dist/install/cursor.d.ts +25 -0
  36. package/dist/install/cursor.js +43 -0
  37. package/dist/install/cursor.js.map +1 -0
  38. package/dist/install/nlm-dir-perms.d.ts +19 -0
  39. package/dist/install/nlm-dir-perms.js +43 -0
  40. package/dist/install/nlm-dir-perms.js.map +1 -0
  41. package/dist/install/ollama.d.ts +18 -1
  42. package/dist/install/ollama.js +62 -7
  43. package/dist/install/ollama.js.map +1 -1
  44. package/dist/install/setup.d.ts +4 -0
  45. package/dist/install/setup.js +141 -18
  46. package/dist/install/setup.js.map +1 -1
  47. package/dist/install/windsurf.d.ts +25 -0
  48. package/dist/install/windsurf.js +43 -0
  49. package/dist/install/windsurf.js.map +1 -0
  50. package/dist/mcp/server.js +20 -1
  51. package/dist/mcp/server.js.map +1 -1
  52. package/dist/shared/types.d.ts +4 -0
  53. package/dist/ui/assets/{index-BA6IpU8g.css → index-Beo8psd-.css} +1 -1
  54. package/dist/ui/assets/index-CSPTTeeM.js +69 -0
  55. package/dist/ui/index.html +2 -2
  56. package/package.json +26 -1
  57. package/plugin/scripts/prompt-recall-hook.mjs +55 -4
  58. package/plugin/scripts/stop-hook.mjs +57 -6
  59. package/.agents/plugins/marketplace.json +0 -20
  60. package/.github/workflows/ci.yml +0 -30
  61. package/dist/ui/assets/index-B_qIVV0k.js +0 -69
  62. package/docs/methodology/re-derivation-rate.md +0 -112
  63. package/docs/methodology/useful-hit-rate.md +0 -79
  64. package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
  65. package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
  66. package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
  67. package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
  68. package/docs/plans/desktop-product.md +0 -69
  69. package/docs/plans/factstore-design.md +0 -236
  70. package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1389
  71. package/logs/CHANGELOG/CHANGELOG.md +0 -337
  72. package/migrations/000_initial_schema.sql +0 -174
  73. package/migrations/001_entity_type_rename.sql +0 -17
  74. package/migrations/002_adapter_state_extend.sql +0 -12
  75. package/migrations/003_session_embeddings.sql +0 -11
  76. package/migrations/004_facts.sql +0 -46
  77. package/migrations/005_sources.sql +0 -31
  78. package/migrations/006_providers.sql +0 -33
  79. package/migrations/007_source_tokens.sql +0 -17
  80. package/migrations/008_fts_rebuild.sql +0 -9
  81. package/migrations/009_session_embedding_chunks.sql +0 -46
  82. package/migrations/010_sources_opencode.sql +0 -30
  83. package/migrations/011_sources_hermes_agent.sql +0 -30
  84. package/migrations/012_sources_aider.sql +0 -30
  85. package/migrations/013_adapter_state_failure_count.sql +0 -12
  86. package/plugin-hermes-agent/README.md +0 -49
  87. package/plugin-hermes-agent/__init__.py +0 -75
  88. package/plugin-hermes-agent/plugin.yaml +0 -15
  89. package/scripts/backfill-citations.mjs +0 -0
  90. package/scripts/build-codex-plugin.mjs +0 -61
  91. package/scripts/deepseek-probe.mjs +0 -67
  92. package/scripts/extract-triples.mjs +0 -207
  93. package/scripts/longmemeval/embedding-cache.ts +0 -77
  94. package/scripts/longmemeval/fetch-dataset.sh +0 -25
  95. package/scripts/longmemeval/run-harness.ts +0 -315
  96. package/scripts/longmemeval/scorer.ts +0 -99
  97. package/scripts/longmemeval/tsconfig.json +0 -9
  98. package/scripts/longmemeval/types.ts +0 -35
  99. package/scripts/nlm-daily-digest.py +0 -239
  100. package/scripts/nlm-daily-digest.sh +0 -28
  101. package/src/cli/classify-parity.ts +0 -257
  102. package/src/cli/launchctl-helpers.ts +0 -49
  103. package/src/cli/nlm.ts +0 -885
  104. package/src/core/actions/actions-log.ts +0 -118
  105. package/src/core/actions/overlay.ts +0 -117
  106. package/src/core/adapters/aider.ts +0 -205
  107. package/src/core/adapters/claude-code.ts +0 -293
  108. package/src/core/adapters/common.ts +0 -54
  109. package/src/core/adapters/from-source.ts +0 -57
  110. package/src/core/adapters/hermes-agent.ts +0 -240
  111. package/src/core/adapters/hermes.ts +0 -277
  112. package/src/core/adapters/jsonl-generic.ts +0 -208
  113. package/src/core/adapters/opencode.ts +0 -281
  114. package/src/core/adapters/pi.ts +0 -264
  115. package/src/core/classifier/prompt.ts +0 -200
  116. package/src/core/dataset/build-dataset.ts +0 -463
  117. package/src/core/embedding/chunk-body.ts +0 -76
  118. package/src/core/embedding/embed-backfill.ts +0 -210
  119. package/src/core/embedding/embed-normalize.ts +0 -135
  120. package/src/core/facts/backfill-facts.ts +0 -254
  121. package/src/core/facts/extract-facts.ts +0 -50
  122. package/src/core/hook/citation-detect.ts +0 -124
  123. package/src/core/hook/cite-memo.ts +0 -68
  124. package/src/core/hook/claude-settings.ts +0 -166
  125. package/src/core/hook/gate.ts +0 -25
  126. package/src/core/hook/hook-log.ts +0 -41
  127. package/src/core/hook/memo-sweep.ts +0 -164
  128. package/src/core/hook/memo.ts +0 -67
  129. package/src/core/hook/pointer-block.ts +0 -26
  130. package/src/core/hook/select.ts +0 -32
  131. package/src/core/hook/transcript.ts +0 -121
  132. package/src/core/ingest/ingest-session.ts +0 -111
  133. package/src/core/providers/provider-models.ts +0 -100
  134. package/src/core/providers/provider-registry.ts +0 -196
  135. package/src/core/recall/citation-log.ts +0 -108
  136. package/src/core/recall/filter.ts +0 -27
  137. package/src/core/recall/index.ts +0 -6
  138. package/src/core/recall/match-fields.ts +0 -40
  139. package/src/core/recall/query-log.ts +0 -149
  140. package/src/core/recall/query-shape.ts +0 -66
  141. package/src/core/recall/recall-service.ts +0 -320
  142. package/src/core/recall/recent-log.ts +0 -59
  143. package/src/core/recall/tokenize.ts +0 -18
  144. package/src/core/recall/useful-scan.ts +0 -336
  145. package/src/core/recall-facts/fact-query-log.ts +0 -150
  146. package/src/core/recall-facts/fact-recall-service.ts +0 -327
  147. package/src/core/scheduler/scan-once.ts +0 -142
  148. package/src/core/scheduler/scheduler.ts +0 -225
  149. package/src/core/sources/source-registry.ts +0 -260
  150. package/src/core/storage/db-restore.ts +0 -133
  151. package/src/core/storage/live-status.ts +0 -45
  152. package/src/core/storage/migrate.ts +0 -72
  153. package/src/core/storage/sqlite-fact-store.ts +0 -304
  154. package/src/core/storage/sqlite-session-store.ts +0 -765
  155. package/src/hook/prompt-recall-hook.ts +0 -174
  156. package/src/hook/session-end-hook.ts +0 -81
  157. package/src/hook/session-start-hook.ts +0 -165
  158. package/src/hook/stop-hook.ts +0 -236
  159. package/src/http/app.ts +0 -1137
  160. package/src/install/claude-code.ts +0 -128
  161. package/src/install/codex.ts +0 -367
  162. package/src/install/hermes-agent.ts +0 -76
  163. package/src/install/hermes.ts +0 -78
  164. package/src/install/ollama.ts +0 -211
  165. package/src/install/setup.ts +0 -368
  166. package/src/llm/classifier-box.ts +0 -64
  167. package/src/llm/deepseek-client.ts +0 -150
  168. package/src/llm/env-autoload.ts +0 -55
  169. package/src/llm/ollama-client.ts +0 -189
  170. package/src/mcp/server.ts +0 -534
  171. package/src/ports/fact-store.ts +0 -102
  172. package/src/ports/llm-client.ts +0 -52
  173. package/src/ports/logger.ts +0 -16
  174. package/src/ports/session-store.ts +0 -45
  175. package/src/ports/transcript-adapter.ts +0 -55
  176. package/src/shared/types.ts +0 -145
  177. package/src/ui/App.tsx +0 -58
  178. package/src/ui/components/PromoteOpenButton.tsx +0 -65
  179. package/src/ui/components/SessionDrawer.tsx +0 -136
  180. package/src/ui/components/SideNav.tsx +0 -162
  181. package/src/ui/components/Skeleton.tsx +0 -107
  182. package/src/ui/index.html +0 -13
  183. package/src/ui/lib/actions.ts +0 -30
  184. package/src/ui/lib/api.ts +0 -92
  185. package/src/ui/lib/dataset.ts +0 -141
  186. package/src/ui/lib/registries.ts +0 -155
  187. package/src/ui/lib/view-settings.ts +0 -41
  188. package/src/ui/main.tsx +0 -15
  189. package/src/ui/pages/Live.tsx +0 -229
  190. package/src/ui/pages/Pulse.tsx +0 -415
  191. package/src/ui/pages/Recall.tsx +0 -190
  192. package/src/ui/pages/River.tsx +0 -308
  193. package/src/ui/pages/Search.tsx +0 -93
  194. package/src/ui/pages/Stub.tsx +0 -9
  195. package/src/ui/pages/Thread.tsx +0 -262
  196. package/src/ui/pages/settings/Classifier.tsx +0 -227
  197. package/src/ui/pages/settings/Data.tsx +0 -190
  198. package/src/ui/pages/settings/Index.tsx +0 -65
  199. package/src/ui/pages/settings/Labels.tsx +0 -224
  200. package/src/ui/pages/settings/Providers.tsx +0 -305
  201. package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
  202. package/src/ui/pages/settings/Sources.tsx +0 -326
  203. package/src/ui/pages/settings/Views.tsx +0 -96
  204. package/src/ui/styles.css +0 -1766
  205. package/src/ui/tsconfig.json +0 -21
  206. package/src/ui/vite.config.ts +0 -19
  207. package/tests/fixtures/claude_code/short_session.jsonl +0 -2
  208. package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
  209. package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
  210. package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
  211. package/tests/fixtures/facts.ts +0 -17
  212. package/tests/fixtures/golden-corpus.ts +0 -85
  213. package/tests/fixtures/hermes/paired_request_dump.json +0 -24
  214. package/tests/fixtures/hermes/paired_session.json +0 -23
  215. package/tests/fixtures/hermes/request_dump.json +0 -28
  216. package/tests/fixtures/hermes/session_iso.json +0 -38
  217. package/tests/fixtures/hermes/session_unix.json +0 -38
  218. package/tests/fixtures/hermes/system_only.json +0 -18
  219. package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
  220. package/tests/fixtures/pi/short-successful.jsonl +0 -5
  221. package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
  222. package/tests/fixtures/sessions.ts +0 -22
  223. package/tests/integration/backfill-facts.test.ts +0 -362
  224. package/tests/integration/citation-explicit.test.ts +0 -111
  225. package/tests/integration/cite-event.test.ts +0 -169
  226. package/tests/integration/cite-memo.test.ts +0 -87
  227. package/tests/integration/db-restore.test.ts +0 -153
  228. package/tests/integration/embed-backfill.test.ts +0 -176
  229. package/tests/integration/fact-supersedence.test.ts +0 -313
  230. package/tests/integration/fts-index.test.ts +0 -60
  231. package/tests/integration/getbyids-sqlite.test.ts +0 -60
  232. package/tests/integration/hermes-agent-hooks.test.ts +0 -248
  233. package/tests/integration/hook-claude-settings.test.ts +0 -205
  234. package/tests/integration/hook-log.test.ts +0 -54
  235. package/tests/integration/hook-memo.test.ts +0 -68
  236. package/tests/integration/hook-pre-compact.test.ts +0 -105
  237. package/tests/integration/hook-subagent-start.test.ts +0 -102
  238. package/tests/integration/http.test.ts +0 -401
  239. package/tests/integration/keyword-search-fts.test.ts +0 -66
  240. package/tests/integration/mcp-recall-logging.test.ts +0 -88
  241. package/tests/integration/mcp.test.ts +0 -248
  242. package/tests/integration/memo-sweep.test.ts +0 -91
  243. package/tests/integration/prompt-recall-hook.test.ts +0 -88
  244. package/tests/integration/provider-registry.test.ts +0 -107
  245. package/tests/integration/recall-golden.test.ts +0 -59
  246. package/tests/integration/recall-sqlite.test.ts +0 -169
  247. package/tests/integration/scheduler.test.ts +0 -391
  248. package/tests/integration/session-end-hook.test.ts +0 -48
  249. package/tests/integration/session-start-hook.test.ts +0 -126
  250. package/tests/integration/source-registry.test.ts +0 -120
  251. package/tests/integration/sqlite-fact-store.test.ts +0 -346
  252. package/tests/integration/stop-hook.test.ts +0 -560
  253. package/tests/integration/wal-checkpoint.test.ts +0 -49
  254. package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
  255. package/tests/unit/core/adapters/aider.test.ts +0 -230
  256. package/tests/unit/core/adapters/claude-code.test.ts +0 -118
  257. package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
  258. package/tests/unit/core/adapters/hermes.test.ts +0 -81
  259. package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
  260. package/tests/unit/core/adapters/opencode.test.ts +0 -354
  261. package/tests/unit/core/adapters/pi.test.ts +0 -110
  262. package/tests/unit/core/classifier/prompt.test.ts +0 -126
  263. package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
  264. package/tests/unit/core/facts/extract-facts.test.ts +0 -117
  265. package/tests/unit/core/filter.test.ts +0 -40
  266. package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
  267. package/tests/unit/core/hook/citation-detect.test.ts +0 -124
  268. package/tests/unit/core/hook/gate.test.ts +0 -29
  269. package/tests/unit/core/hook/pointer-block.test.ts +0 -22
  270. package/tests/unit/core/hook/select.test.ts +0 -66
  271. package/tests/unit/core/match-fields.test.ts +0 -39
  272. package/tests/unit/core/mcp-cite-session.test.ts +0 -51
  273. package/tests/unit/core/providers/provider-models.test.ts +0 -101
  274. package/tests/unit/core/query-shape.test.ts +0 -92
  275. package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
  276. package/tests/unit/core/recall-service.test.ts +0 -200
  277. package/tests/unit/core/storage/live-status.test.ts +0 -54
  278. package/tests/unit/core/tokenize.test.ts +0 -32
  279. package/tests/unit/core/useful-scan.test.ts +0 -537
  280. package/tests/unit/llm/embed.test.ts +0 -93
  281. package/tests/unit/llm/ollama-client.test.ts +0 -124
  282. package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
  283. package/tsconfig.json +0 -31
  284. package/tsconfig.test.json +0 -11
  285. package/vitest.config.ts +0 -22
@@ -1,327 +0,0 @@
1
- /**
2
- * FactRecallService — agent-facing recall over the FactStore.
3
- *
4
- * Mirrors RecallService's keyword / semantic / hybrid pattern but works on
5
- * Fact records, not Session records. Sessions and facts answer different
6
- * questions and have incompatibly-shaped results, so this is a separate
7
- * service with its own MCP tool — see Section 4 of factstore-design.md.
8
- *
9
- * Filter pipeline:
10
- * 1. Storage pre-filter (subject, predicate, kind, minConfidence,
11
- * includeSuperseded). Cheap SQL.
12
- * 2. Keyword scoring over (value, subject, predicate). Pure, in-memory.
13
- * 3. Semantic KNN via fact_embeddings vec0 (when mode != keyword).
14
- * 4. Hybrid merge: 0.6 semantic + 0.4 keyword, matching the session
15
- * recall weights.
16
- *
17
- * Confidence policy: default `minConfidence` is 0.6 (Section 1 of the plan).
18
- * Facts with classifier confidence in [0.4, 0.6) get written by
19
- * extractFacts but stay out of agent recall unless the caller lowers the
20
- * floor explicitly.
21
- */
22
-
23
- import type { FactStore } from "@ports/fact-store.js";
24
- import type { LLMClient } from "@ports/llm-client.js";
25
- import { LLMUnreachableError } from "@ports/llm-client.js";
26
- import type {
27
- Fact,
28
- FactHit,
29
- FactMatchField,
30
- FactRecallQuery,
31
- FactRecallResult,
32
- RecallMode,
33
- } from "@shared/types.js";
34
- import { tokenSet } from "@core/recall/tokenize.js";
35
-
36
- const DEFAULT_LIMIT = 10;
37
- const MAX_LIMIT = 100;
38
- const DEFAULT_MIN_CONFIDENCE = 0.6;
39
- const STORAGE_FETCH_CAP = 500;
40
- const HYBRID_KW_WEIGHT = 0.4;
41
- const HYBRID_SEM_WEIGHT = 0.6;
42
- const SEMANTIC_OVERFETCH = 3;
43
-
44
- const FIELD_WEIGHTS = {
45
- value: 3,
46
- subject: 1,
47
- predicate: 1,
48
- } as const;
49
-
50
- export interface FactRecallServiceDeps {
51
- readonly factStore: FactStore;
52
- readonly llm: LLMClient;
53
- }
54
-
55
- export class FactRecallService {
56
- constructor(private readonly deps: FactRecallServiceDeps) {}
57
-
58
- async search(input: FactRecallQuery): Promise<FactRecallResult> {
59
- const mode: RecallMode = input.mode ?? "keyword";
60
- const limit = clampLimit(input.limit);
61
- const subject = input.subject ?? null;
62
- const predicate = input.predicate ?? null;
63
- const kind = input.kind ?? null;
64
- const queryText = (input.query ?? "").trim();
65
-
66
- const empty: FactRecallResult = {
67
- query: queryText,
68
- subject,
69
- predicate,
70
- kind,
71
- mode,
72
- limit,
73
- total: 0,
74
- results: [],
75
- };
76
-
77
- // A query with no signal at all → empty. Either free-text query, or a
78
- // structured filter (subject / predicate / kind) must be provided.
79
- if (!queryText && subject === null && predicate === null && kind === null) {
80
- return empty;
81
- }
82
-
83
- const filter: Parameters<FactStore["listForRecall"]>[0] = {
84
- includeSuperseded: input.includeSuperseded === true,
85
- minConfidence: input.minConfidence ?? DEFAULT_MIN_CONFIDENCE,
86
- limit: STORAGE_FETCH_CAP,
87
- ...(input.subject !== undefined ? { subject: input.subject } : {}),
88
- ...(input.predicate !== undefined ? { predicate: input.predicate } : {}),
89
- ...(input.kind !== undefined ? { kind: input.kind } : {}),
90
- };
91
-
92
- const candidates = await this.deps.factStore.listForRecall(filter);
93
- if (candidates.length === 0) return empty;
94
-
95
- const byId = new Map<string, Fact>(candidates.map((f) => [f.id, f]));
96
- const queryTokens = queryText ? new Set(tokenSet(queryText)) : new Set<string>();
97
-
98
- const kwHits =
99
- mode === "keyword" || mode === "hybrid"
100
- ? scoreAll(candidates, queryTokens)
101
- : [];
102
-
103
- let semHits: ReadonlyArray<SemanticHit> = [];
104
- let semError: "ollama_unreachable" | null = null;
105
- if ((mode === "semantic" || mode === "hybrid") && queryText) {
106
- try {
107
- semHits = await this.runSemantic(queryText, byId, limit * SEMANTIC_OVERFETCH);
108
- } catch (err) {
109
- if (err instanceof LLMUnreachableError) {
110
- semError = "ollama_unreachable";
111
- } else {
112
- throw err;
113
- }
114
- }
115
- }
116
-
117
- if (mode === "semantic" && semError) {
118
- return { ...empty, modeUnavailable: semError };
119
- }
120
-
121
- // For pure structured queries (no query text, just subject/predicate),
122
- // a keyword pass with empty tokens scores zero and a semantic pass has
123
- // nothing to embed. Fall back to returning the storage filter result
124
- // ordered by created_at DESC. Applies to keyword AND hybrid — hybrid
125
- // is the MCP default, so this path catches exact subject+predicate
126
- // lookups from agent callers that pass no query text.
127
- if ((mode === "keyword" || mode === "hybrid") && !queryText) {
128
- const rows = candidates
129
- .slice(0, limit)
130
- .map((f) => factToHit(f, 0, []));
131
- return finalize(queryText, subject, predicate, kind, mode, limit, rows);
132
- }
133
-
134
- if (mode === "keyword") {
135
- return finalize(queryText, subject, predicate, kind, mode, limit, kwHits.map(toKeywordHit));
136
- }
137
-
138
- if (mode === "semantic") {
139
- return finalize(queryText, subject, predicate, kind, mode, limit, semHits.map(toSemanticHit));
140
- }
141
-
142
- // hybrid
143
- const merged = mergeHybrid(kwHits, semHits, byId);
144
- const result = finalize(queryText, subject, predicate, kind, mode, limit, merged);
145
- return semError ? { ...result, modeUnavailable: semError } : result;
146
- }
147
-
148
- private async runSemantic(
149
- query: string,
150
- byId: ReadonlyMap<string, Fact>,
151
- fetchLimit: number,
152
- ): Promise<ReadonlyArray<SemanticHit>> {
153
- const embedding = await this.deps.llm.embed(query, "query");
154
- const neighbors = await this.deps.factStore.semanticSearch(embedding.vector, fetchLimit);
155
- const hits: SemanticHit[] = [];
156
- for (const n of neighbors) {
157
- const fact = byId.get(n.factId);
158
- if (!fact) continue; // candidate was filtered out by subject/predicate/conf
159
- hits.push({ fact, similarity: cosineFromL2(n.distance) });
160
- }
161
- return hits;
162
- }
163
- }
164
-
165
- interface KeywordHit {
166
- readonly fact: Fact;
167
- readonly score: number;
168
- readonly matchedIn: ReadonlyArray<FactMatchField>;
169
- }
170
-
171
- interface SemanticHit {
172
- readonly fact: Fact;
173
- readonly similarity: number;
174
- }
175
-
176
- function scoreAll(
177
- facts: ReadonlyArray<Fact>,
178
- queryTokens: ReadonlySet<string>,
179
- ): ReadonlyArray<KeywordHit> {
180
- if (queryTokens.size === 0) return [];
181
- const hits: KeywordHit[] = [];
182
- for (const f of facts) {
183
- const { score, matchedIn } = scoreFact(f, queryTokens);
184
- if (score > 0) hits.push({ fact: f, score, matchedIn });
185
- }
186
- hits.sort((a, b) => b.score - a.score);
187
- return hits;
188
- }
189
-
190
- function scoreFact(
191
- fact: Fact,
192
- queryTokens: ReadonlySet<string>,
193
- ): { score: number; matchedIn: ReadonlyArray<FactMatchField> } {
194
- let score = 0;
195
- const matchedIn: FactMatchField[] = [];
196
-
197
- const valueMatches = intersectionSize(queryTokens, tokenSet(fact.value));
198
- if (valueMatches > 0) {
199
- score += FIELD_WEIGHTS.value * valueMatches;
200
- matchedIn.push("value");
201
- }
202
-
203
- const subjectMatches = intersectionSize(queryTokens, tokenSet(fact.subject));
204
- if (subjectMatches > 0) {
205
- score += FIELD_WEIGHTS.subject * subjectMatches;
206
- matchedIn.push("subject");
207
- }
208
-
209
- const predicateMatches = intersectionSize(queryTokens, tokenSet(fact.predicate));
210
- if (predicateMatches > 0) {
211
- score += FIELD_WEIGHTS.predicate * predicateMatches;
212
- matchedIn.push("predicate");
213
- }
214
-
215
- return { score, matchedIn };
216
- }
217
-
218
- function mergeHybrid(
219
- kwHits: ReadonlyArray<KeywordHit>,
220
- semHits: ReadonlyArray<SemanticHit>,
221
- byId: ReadonlyMap<string, Fact>,
222
- ): ReadonlyArray<FactHit> {
223
- const maxKw = Math.max(1, ...kwHits.map((h) => h.score));
224
- const maxSem = Math.max(1, ...semHits.map((h) => h.similarity));
225
-
226
- const kwMap = new Map<string, KeywordHit>(kwHits.map((h) => [h.fact.id, h]));
227
- const semMap = new Map<string, SemanticHit>(semHits.map((h) => [h.fact.id, h]));
228
- const allIds = new Set<string>([...kwMap.keys(), ...semMap.keys()]);
229
-
230
- const rows: FactHit[] = [];
231
- for (const id of allIds) {
232
- const fact = byId.get(id);
233
- if (!fact) continue;
234
- const kw = kwMap.get(id);
235
- const sem = semMap.get(id);
236
- const kwNorm = kw ? kw.score / maxKw : 0;
237
- const semNorm = sem ? sem.similarity / maxSem : 0;
238
- const combined = round4(HYBRID_SEM_WEIGHT * semNorm + HYBRID_KW_WEIGHT * kwNorm);
239
- const matchedIn = uniqueFields(
240
- kw?.matchedIn ?? [],
241
- sem ? (["semantic"] as FactMatchField[]) : [],
242
- );
243
- rows.push({
244
- ...fact,
245
- matchScore: combined,
246
- matchedIn,
247
- keywordScore: round4(kwNorm),
248
- semanticScore: round4(semNorm),
249
- });
250
- }
251
- rows.sort((a, b) => b.matchScore - a.matchScore);
252
- return rows;
253
- }
254
-
255
- function factToHit(
256
- fact: Fact,
257
- score: number,
258
- matchedIn: ReadonlyArray<FactMatchField>,
259
- ): FactHit {
260
- return { ...fact, matchScore: score, matchedIn };
261
- }
262
-
263
- function toKeywordHit(h: KeywordHit): FactHit {
264
- return factToHit(h.fact, h.score, h.matchedIn);
265
- }
266
-
267
- function toSemanticHit(h: SemanticHit): FactHit {
268
- return factToHit(h.fact, h.similarity, ["semantic"]);
269
- }
270
-
271
- function finalize(
272
- query: string,
273
- subject: string | null,
274
- predicate: string | null,
275
- kind: FactRecallResult["kind"],
276
- mode: RecallMode,
277
- limit: number,
278
- hits: ReadonlyArray<FactHit>,
279
- ): FactRecallResult {
280
- return {
281
- query,
282
- subject,
283
- predicate,
284
- kind,
285
- mode,
286
- limit,
287
- total: hits.length,
288
- results: hits.slice(0, limit),
289
- };
290
- }
291
-
292
- function clampLimit(limit: number | undefined): number {
293
- const n = limit ?? DEFAULT_LIMIT;
294
- if (Number.isNaN(n) || n < 1) return 1;
295
- return Math.min(MAX_LIMIT, Math.trunc(n));
296
- }
297
-
298
- function cosineFromL2(distance: number): number {
299
- const cos = 1 - (distance * distance) / 2;
300
- return round4(Math.max(-1, Math.min(1, cos)));
301
- }
302
-
303
- function round4(value: number): number {
304
- return Math.round(value * 10_000) / 10_000;
305
- }
306
-
307
- function intersectionSize<T>(a: ReadonlySet<T>, b: ReadonlySet<T>): number {
308
- const [small, large] = a.size <= b.size ? [a, b] : [b, a];
309
- let count = 0;
310
- for (const item of small) if (large.has(item)) count += 1;
311
- return count;
312
- }
313
-
314
- function uniqueFields(
315
- a: ReadonlyArray<FactMatchField>,
316
- b: ReadonlyArray<FactMatchField>,
317
- ): ReadonlyArray<FactMatchField> {
318
- const seen = new Set<FactMatchField>();
319
- const out: FactMatchField[] = [];
320
- for (const f of [...a, ...b]) {
321
- if (!seen.has(f)) {
322
- seen.add(f);
323
- out.push(f);
324
- }
325
- }
326
- return out;
327
- }
@@ -1,142 +0,0 @@
1
- /**
2
- * scanOnce — mtime-gated incremental discovery shared by every adapter.
3
- *
4
- * The Python codebase bundled this logic into each adapter (`scan_once` +
5
- * `record_classified` methods). In the TS port the adapter stays a pure
6
- * parser (TranscriptAdapter port); the mtime check and adapter_state
7
- * upsert live here, generic over the adapter. Same behavior, less
8
- * duplication across claude-code / hermes / pi.
9
- *
10
- * Contract (per file under adapter.discover()):
11
- * - If `now - mtime < idleMinutes * 60s` → still active, skip
12
- * - Lookup adapter_state by (adapterName, sourcePath):
13
- * no row + file idle → NEW: parse + return (chunk, supersedes=null)
14
- * row exists, size match, failures < ceil → UNCHANGED: skip
15
- * row exists, size match, failures >= ceil → FAILED_CEILING: skip (log once per session)
16
- * row exists, file grew → RESUMED: parse + return, reset failure_count
17
- * - After successful classify+insert downstream, call `recordClassified`
18
- * to upsert adapter_state with the new size + session_id.
19
- * - On classify/storage failure, call `recordFailed` to increment failure_count.
20
- * When failure_count reaches MAX_CLASSIFY_FAILURES and the file hasn't grown,
21
- * the file is permanently skipped until new content arrives.
22
- */
23
-
24
- import { statSync } from "node:fs";
25
- import type Database from "better-sqlite3";
26
- import type {
27
- SessionChunk,
28
- TranscriptAdapter,
29
- } from "@ports/transcript-adapter.js";
30
-
31
- export interface ScanResult {
32
- readonly chunk: SessionChunk;
33
- readonly supersedes: string | null;
34
- }
35
-
36
- export const MAX_CLASSIFY_FAILURES = 3;
37
-
38
- interface AdapterStateRow {
39
- source_path: string;
40
- file_size: number | null;
41
- session_id: string | null;
42
- failure_count: number;
43
- }
44
-
45
- export async function scanOnce(
46
- adapter: TranscriptAdapter,
47
- idleMinutes: number,
48
- db: Database.Database,
49
- now: number = Date.now(),
50
- ): Promise<ReadonlyArray<ScanResult>> {
51
- const idleMs = idleMinutes * 60 * 1000;
52
- const stateRows = db
53
- .prepare<[string], AdapterStateRow>(
54
- "SELECT source_path, file_size, session_id, COALESCE(failure_count, 0) AS failure_count FROM adapter_state WHERE adapter_name = ?",
55
- )
56
- .all(adapter.name);
57
- const byPath = new Map<string, AdapterStateRow>(stateRows.map((r) => [r.source_path, r]));
58
-
59
- const out: ScanResult[] = [];
60
- const files = await adapter.discover();
61
-
62
- for (const path of files) {
63
- let st;
64
- try {
65
- st = statSync(path);
66
- } catch {
67
- continue;
68
- }
69
- const age = now - st.mtimeMs;
70
- if (age < idleMs) continue;
71
-
72
- const prior = byPath.get(path);
73
- let supersedes: string | null = null;
74
- if (prior) {
75
- const sizeUnchanged = (prior.file_size ?? 0) === st.size;
76
- if (sizeUnchanged) {
77
- // File hasn't grown — skip whether clean or failed. Failures only
78
- // retry when the transcript file receives new content.
79
- continue;
80
- }
81
- // File grew: reset failure_count so resume gets a clean slate.
82
- if (prior.failure_count >= MAX_CLASSIFY_FAILURES) {
83
- db.prepare(
84
- "UPDATE adapter_state SET failure_count = 0 WHERE adapter_name = ? AND source_path = ?",
85
- ).run(adapter.name, path);
86
- }
87
- supersedes = prior.session_id;
88
- }
89
-
90
- const chunk = await adapter.parseSession(path);
91
- if (!chunk) continue;
92
- out.push({ chunk, supersedes });
93
- }
94
- return out;
95
- }
96
-
97
- export function recordClassified(
98
- db: Database.Database,
99
- adapterName: string,
100
- sourcePath: string,
101
- sessionId: string,
102
- ): void {
103
- let size = 0;
104
- try {
105
- size = statSync(sourcePath).size;
106
- } catch {
107
- return;
108
- }
109
- db.prepare(
110
- `INSERT INTO adapter_state
111
- (adapter_name, source_path, last_offset, file_size, session_id, failure_count, last_processed_at)
112
- VALUES (?, ?, ?, ?, ?, 0, datetime('now'))
113
- ON CONFLICT(adapter_name, source_path) DO UPDATE SET
114
- last_offset = excluded.last_offset,
115
- file_size = excluded.file_size,
116
- session_id = excluded.session_id,
117
- failure_count = 0,
118
- last_processed_at = excluded.last_processed_at`,
119
- ).run(adapterName, sourcePath, size, size, sessionId);
120
- }
121
-
122
- export function recordFailed(
123
- db: Database.Database,
124
- adapterName: string,
125
- sourcePath: string,
126
- ): void {
127
- let size = 0;
128
- try {
129
- size = statSync(sourcePath).size;
130
- } catch {
131
- return;
132
- }
133
- db.prepare(
134
- `INSERT INTO adapter_state
135
- (adapter_name, source_path, last_offset, file_size, session_id, failure_count, last_processed_at)
136
- VALUES (?, ?, ?, ?, NULL, 1, datetime('now'))
137
- ON CONFLICT(adapter_name, source_path) DO UPDATE SET
138
- file_size = excluded.file_size,
139
- failure_count = failure_count + 1,
140
- last_processed_at = excluded.last_processed_at`,
141
- ).run(adapterName, sourcePath, size, size);
142
- }
@@ -1,225 +0,0 @@
1
- /**
2
- * ScanScheduler — periodic ingest loop. Ports `scheduler.py`.
3
- *
4
- * Each tick walks the registered adapters, runs scanOnce to discover idle
5
- * transcript files, classifies the resulting SessionChunks via the active
6
- * classifier, and persists them through SqliteSessionStore.insertSession
7
- * with the embedder. Records adapter_state after each successful insert
8
- * so the next tick is incremental.
9
- *
10
- * Single-process: the scheduler runs alongside the HTTP server (Phase D
11
- * wires it into `nlm start`). No worker thread; Node's event loop is
12
- * enough — adapter discovery is filesystem-bound and the per-chunk
13
- * classify call is async-awaited with a wall-clock timeout to keep the
14
- * tick loop responsive.
15
- *
16
- * Confidence floor of 0.3 mirrors Python: classifier outputs below that
17
- * are skipped rather than persisted as low-quality noise.
18
- */
19
-
20
- import type { LLMClient } from "@ports/llm-client.js";
21
- import type { TranscriptAdapter } from "@ports/transcript-adapter.js";
22
- import { extractFacts } from "@core/facts/extract-facts.js";
23
- import type { SqliteFactStore } from "@core/storage/sqlite-fact-store.js";
24
- import type {
25
- IngestRecord,
26
- SqliteSessionStore,
27
- } from "@core/storage/sqlite-session-store.js";
28
- import { MAX_CLASSIFY_FAILURES, recordClassified, recordFailed, scanOnce } from "./scan-once.js";
29
-
30
- const DEFAULT_INTERVAL_MS = 30 * 60 * 1000; // 30 min, matches Python default
31
- const DEFAULT_CLASSIFY_TIMEOUT_MS = 120_000;
32
- const DEFAULT_CONFIDENCE_FLOOR = 0.3;
33
- const DEFAULT_IDLE_MINUTES = 15;
34
- const BODY_CAP = 200_000;
35
-
36
- export interface SchedulerOptions {
37
- readonly store: SqliteSessionStore;
38
- readonly adapters: ReadonlyArray<TranscriptAdapter>;
39
- readonly classifier: LLMClient;
40
- readonly embedder?: LLMClient | null;
41
- /**
42
- * FactStore for Phase B.2 fact ingest. When provided, the scheduler
43
- * extracts facts from each classify result and persists them atomically
44
- * with the session row. Optional — when null, sessions ingest as before
45
- * with no facts written (backwards-compatible default for tests not yet
46
- * updated, and for any future caller that wants facts off).
47
- */
48
- readonly factStore?: SqliteFactStore | null;
49
- readonly intervalMs?: number;
50
- readonly classifyTimeoutMs?: number;
51
- readonly confidenceFloor?: number;
52
- readonly idleMinutes?: number;
53
- /** Defaults to console.error. Set to a noop in tests. */
54
- readonly logger?: (msg: string) => void;
55
- }
56
-
57
- export interface TickReport {
58
- readonly inserted: number;
59
- readonly skippedLowConfidence: number;
60
- readonly classifyFailures: number;
61
- readonly storageFailures: number;
62
- readonly chunksSeen: number;
63
- }
64
-
65
- export class ScanScheduler {
66
- private readonly opts: Required<Omit<SchedulerOptions, "embedder" | "factStore">> & {
67
- readonly embedder: LLMClient | null;
68
- readonly factStore: SqliteFactStore | null;
69
- };
70
- private stopped = true;
71
- private timer: NodeJS.Timeout | null = null;
72
-
73
- constructor(opts: SchedulerOptions) {
74
- this.opts = {
75
- store: opts.store,
76
- adapters: opts.adapters,
77
- classifier: opts.classifier,
78
- embedder: opts.embedder ?? null,
79
- factStore: opts.factStore ?? null,
80
- intervalMs: opts.intervalMs ?? DEFAULT_INTERVAL_MS,
81
- classifyTimeoutMs: opts.classifyTimeoutMs ?? DEFAULT_CLASSIFY_TIMEOUT_MS,
82
- confidenceFloor: opts.confidenceFloor ?? DEFAULT_CONFIDENCE_FLOOR,
83
- idleMinutes: opts.idleMinutes ?? DEFAULT_IDLE_MINUTES,
84
- logger: opts.logger ?? ((msg) => console.error(msg)),
85
- };
86
- }
87
-
88
- start(): void {
89
- if (!this.stopped) return;
90
- this.stopped = false;
91
- this.scheduleNext(0);
92
- }
93
-
94
- stop(): void {
95
- this.stopped = true;
96
- if (this.timer) {
97
- clearTimeout(this.timer);
98
- this.timer = null;
99
- }
100
- }
101
-
102
- private scheduleNext(delayMs: number): void {
103
- if (this.stopped) return;
104
- this.timer = setTimeout(() => {
105
- void this.tick().finally(() => this.scheduleNext(this.opts.intervalMs));
106
- }, delayMs);
107
- }
108
-
109
- async tick(): Promise<TickReport> {
110
- let inserted = 0;
111
- let skippedLowConfidence = 0;
112
- let classifyFailures = 0;
113
- let storageFailures = 0;
114
- let chunksSeen = 0;
115
-
116
- for (const adapter of this.opts.adapters) {
117
- let results;
118
- try {
119
- results = await scanOnce(adapter, this.opts.idleMinutes, this.opts.store.rawDb());
120
- } catch (e) {
121
- this.opts.logger(
122
- `[scheduler] scanOnce error for ${adapter.name}: ${e instanceof Error ? e.message : String(e)}`,
123
- );
124
- continue;
125
- }
126
-
127
- for (const { chunk, supersedes } of results) {
128
- chunksSeen += 1;
129
-
130
- let classification;
131
- try {
132
- classification = await withTimeout(
133
- this.opts.classifier.classify(chunk.text),
134
- this.opts.classifyTimeoutMs,
135
- );
136
- } catch (e) {
137
- classifyFailures += 1;
138
- const reason = e instanceof TimeoutError ? "timed out" : `error: ${e instanceof Error ? e.message : String(e)}`;
139
- recordFailed(this.opts.store.rawDb(), adapter.name, chunk.sourcePath);
140
- const failureRow = this.opts.store.rawDb()
141
- .prepare<[string, string], { failure_count: number }>(
142
- "SELECT COALESCE(failure_count, 0) AS failure_count FROM adapter_state WHERE adapter_name = ? AND source_path = ?",
143
- )
144
- .get(adapter.name, chunk.sourcePath);
145
- const count = failureRow?.failure_count ?? 1;
146
- const ceiling = count >= MAX_CLASSIFY_FAILURES ? ` (failure ${count}/${MAX_CLASSIFY_FAILURES} — will skip until file grows)` : ` (failure ${count}/${MAX_CLASSIFY_FAILURES})`;
147
- this.opts.logger(`[scheduler] classifier ${reason} for ${chunk.id}${ceiling}`);
148
- continue;
149
- }
150
-
151
- if (classification.confidence < this.opts.confidenceFloor) {
152
- skippedLowConfidence += 1;
153
- continue;
154
- }
155
-
156
- const record: IngestRecord = {
157
- id: chunk.id,
158
- runtime: chunk.runtime,
159
- runtimeSessionId: chunk.runtimeSessionId || null,
160
- startedAt: chunk.startedAt,
161
- endedAt: chunk.endedAt || null,
162
- durationMin: chunk.durationMin,
163
- label: classification.label,
164
- summary: classification.summary,
165
- body: chunk.text.slice(0, BODY_CAP),
166
- status: "closed",
167
- transcriptKind: adapter.transcriptKind,
168
- transcriptPath: chunk.sourcePath,
169
- transcriptOffset: chunk.byteRange[0],
170
- transcriptLength: chunk.byteRange[1],
171
- entities: classification.entities,
172
- decisions: classification.decisions,
173
- openQuestions: classification.open,
174
- };
175
-
176
- const factSink = this.opts.factStore
177
- ? {
178
- factStore: this.opts.factStore,
179
- facts: extractFacts(classification, chunk.id, chunk.startedAt),
180
- }
181
- : null;
182
-
183
- try {
184
- await this.opts.store.insertSession(
185
- record,
186
- this.opts.embedder,
187
- supersedes,
188
- factSink,
189
- );
190
- recordClassified(
191
- this.opts.store.rawDb(),
192
- adapter.name,
193
- chunk.sourcePath,
194
- chunk.id,
195
- );
196
- inserted += 1;
197
- } catch (e) {
198
- storageFailures += 1;
199
- recordFailed(this.opts.store.rawDb(), adapter.name, chunk.sourcePath);
200
- this.opts.logger(
201
- `[scheduler] storage error for ${chunk.id}: ${e instanceof Error ? e.message : String(e)}`,
202
- );
203
- }
204
- }
205
- }
206
-
207
- return { inserted, skippedLowConfidence, classifyFailures, storageFailures, chunksSeen };
208
- }
209
- }
210
-
211
- class TimeoutError extends Error {}
212
-
213
- async function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
214
- let timer: NodeJS.Timeout | undefined;
215
- try {
216
- return await Promise.race([
217
- promise,
218
- new Promise<T>((_, reject) => {
219
- timer = setTimeout(() => reject(new TimeoutError(`timed out after ${ms}ms`)), ms);
220
- }),
221
- ]);
222
- } finally {
223
- if (timer) clearTimeout(timer);
224
- }
225
- }