nlm-memory 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. package/README.md +72 -34
  2. package/dist/cli/nlm.js +2 -1
  3. package/dist/cli/nlm.js.map +1 -1
  4. package/dist/http/app.js +2 -1
  5. package/dist/http/app.js.map +1 -1
  6. package/dist/mcp/server.js +20 -1
  7. package/dist/mcp/server.js.map +1 -1
  8. package/dist/ui/assets/{index-C8cpwbYJ.css → index-Beo8psd-.css} +1 -1
  9. package/dist/ui/assets/{index-CB50QnL-.js → index-CSPTTeeM.js} +8 -8
  10. package/dist/ui/index.html +2 -2
  11. package/package.json +26 -1
  12. package/.agents/plugins/marketplace.json +0 -20
  13. package/.github/workflows/ci.yml +0 -30
  14. package/docs/methodology/re-derivation-rate.md +0 -112
  15. package/docs/methodology/useful-hit-rate.md +0 -79
  16. package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
  17. package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
  18. package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
  19. package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
  20. package/docs/plans/desktop-product.md +0 -69
  21. package/docs/plans/factstore-design.md +0 -236
  22. package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1575
  23. package/logs/CHANGELOG/CHANGELOG.md +0 -209
  24. package/migrations/000_initial_schema.sql +0 -174
  25. package/migrations/001_entity_type_rename.sql +0 -17
  26. package/migrations/002_adapter_state_extend.sql +0 -12
  27. package/migrations/003_session_embeddings.sql +0 -11
  28. package/migrations/004_facts.sql +0 -46
  29. package/migrations/005_sources.sql +0 -31
  30. package/migrations/006_providers.sql +0 -33
  31. package/migrations/007_source_tokens.sql +0 -17
  32. package/migrations/008_fts_rebuild.sql +0 -9
  33. package/migrations/009_session_embedding_chunks.sql +0 -46
  34. package/migrations/010_sources_opencode.sql +0 -30
  35. package/migrations/011_sources_hermes_agent.sql +0 -30
  36. package/migrations/012_sources_aider.sql +0 -30
  37. package/migrations/013_adapter_state_failure_count.sql +0 -12
  38. package/migrations/014_sources_cursor.sql +0 -30
  39. package/migrations/015_sources_windsurf.sql +0 -30
  40. package/plugin-hermes-agent/README.md +0 -49
  41. package/plugin-hermes-agent/__init__.py +0 -75
  42. package/plugin-hermes-agent/plugin.yaml +0 -15
  43. package/scripts/backfill-citations.mjs +0 -0
  44. package/scripts/build-codex-plugin.mjs +0 -61
  45. package/scripts/deepseek-probe.mjs +0 -67
  46. package/scripts/extract-triples.mjs +0 -207
  47. package/scripts/longmemeval/embedding-cache.ts +0 -77
  48. package/scripts/longmemeval/fetch-dataset.sh +0 -25
  49. package/scripts/longmemeval/run-harness.ts +0 -315
  50. package/scripts/longmemeval/scorer.ts +0 -99
  51. package/scripts/longmemeval/tsconfig.json +0 -9
  52. package/scripts/longmemeval/types.ts +0 -35
  53. package/scripts/nlm-daily-digest.py +0 -239
  54. package/scripts/nlm-daily-digest.sh +0 -28
  55. package/src/cli/classify-parity.ts +0 -257
  56. package/src/cli/launchctl-helpers.ts +0 -49
  57. package/src/cli/nlm.ts +0 -1078
  58. package/src/core/actions/actions-log.ts +0 -118
  59. package/src/core/actions/overlay.ts +0 -117
  60. package/src/core/adapters/aider.ts +0 -205
  61. package/src/core/adapters/claude-code.ts +0 -293
  62. package/src/core/adapters/common.ts +0 -54
  63. package/src/core/adapters/cursor.ts +0 -486
  64. package/src/core/adapters/from-source.ts +0 -67
  65. package/src/core/adapters/hermes-agent.ts +0 -240
  66. package/src/core/adapters/hermes.ts +0 -277
  67. package/src/core/adapters/jsonl-generic.ts +0 -208
  68. package/src/core/adapters/opencode.ts +0 -281
  69. package/src/core/adapters/pi.ts +0 -264
  70. package/src/core/adapters/windsurf.ts +0 -386
  71. package/src/core/classifier/prompt.ts +0 -200
  72. package/src/core/dataset/build-dataset.ts +0 -463
  73. package/src/core/embedding/chunk-body.ts +0 -76
  74. package/src/core/embedding/embed-backfill.ts +0 -210
  75. package/src/core/embedding/embed-normalize.ts +0 -135
  76. package/src/core/facts/backfill-facts.ts +0 -254
  77. package/src/core/facts/extract-facts.ts +0 -50
  78. package/src/core/hook/citation-detect.ts +0 -124
  79. package/src/core/hook/cite-memo.ts +0 -68
  80. package/src/core/hook/claude-settings.ts +0 -187
  81. package/src/core/hook/gate.ts +0 -25
  82. package/src/core/hook/hook-log.ts +0 -41
  83. package/src/core/hook/memo-sweep.ts +0 -164
  84. package/src/core/hook/memo.ts +0 -67
  85. package/src/core/hook/pointer-block.ts +0 -26
  86. package/src/core/hook/select.ts +0 -32
  87. package/src/core/hook/transcript.ts +0 -121
  88. package/src/core/ingest/ingest-session.ts +0 -111
  89. package/src/core/providers/provider-models.ts +0 -100
  90. package/src/core/providers/provider-registry.ts +0 -196
  91. package/src/core/recall/citation-log.ts +0 -108
  92. package/src/core/recall/filter.ts +0 -27
  93. package/src/core/recall/index.ts +0 -6
  94. package/src/core/recall/match-fields.ts +0 -40
  95. package/src/core/recall/query-log.ts +0 -149
  96. package/src/core/recall/query-shape.ts +0 -66
  97. package/src/core/recall/recall-service.ts +0 -320
  98. package/src/core/recall/recent-log.ts +0 -59
  99. package/src/core/recall/tokenize.ts +0 -18
  100. package/src/core/recall/useful-scan.ts +0 -336
  101. package/src/core/recall-facts/fact-query-log.ts +0 -150
  102. package/src/core/recall-facts/fact-recall-service.ts +0 -327
  103. package/src/core/scheduler/scan-once.ts +0 -142
  104. package/src/core/scheduler/scheduler.ts +0 -225
  105. package/src/core/sources/source-registry.ts +0 -278
  106. package/src/core/storage/db-restore.ts +0 -133
  107. package/src/core/storage/live-status.ts +0 -45
  108. package/src/core/storage/migrate.ts +0 -72
  109. package/src/core/storage/sqlite-fact-store.ts +0 -304
  110. package/src/core/storage/sqlite-session-store.ts +0 -810
  111. package/src/hook/hook-auth.ts +0 -18
  112. package/src/hook/prompt-recall-hook.ts +0 -180
  113. package/src/hook/session-end-hook.ts +0 -81
  114. package/src/hook/session-start-hook.ts +0 -168
  115. package/src/hook/stop-hook.ts +0 -239
  116. package/src/http/app.ts +0 -1215
  117. package/src/install/claude-code.ts +0 -128
  118. package/src/install/codex.ts +0 -367
  119. package/src/install/cursor.ts +0 -68
  120. package/src/install/hermes-agent.ts +0 -76
  121. package/src/install/hermes.ts +0 -78
  122. package/src/install/nlm-dir-perms.ts +0 -55
  123. package/src/install/ollama.ts +0 -284
  124. package/src/install/setup.ts +0 -489
  125. package/src/install/windsurf.ts +0 -68
  126. package/src/llm/classifier-box.ts +0 -64
  127. package/src/llm/deepseek-client.ts +0 -150
  128. package/src/llm/env-autoload.ts +0 -55
  129. package/src/llm/ollama-client.ts +0 -189
  130. package/src/mcp/server.ts +0 -534
  131. package/src/ports/fact-store.ts +0 -102
  132. package/src/ports/llm-client.ts +0 -52
  133. package/src/ports/logger.ts +0 -16
  134. package/src/ports/session-store.ts +0 -45
  135. package/src/ports/transcript-adapter.ts +0 -55
  136. package/src/shared/types.ts +0 -149
  137. package/src/ui/App.tsx +0 -58
  138. package/src/ui/components/PromoteOpenButton.tsx +0 -65
  139. package/src/ui/components/SessionDrawer.tsx +0 -199
  140. package/src/ui/components/SideNav.tsx +0 -162
  141. package/src/ui/components/Skeleton.tsx +0 -107
  142. package/src/ui/index.html +0 -13
  143. package/src/ui/lib/actions.ts +0 -30
  144. package/src/ui/lib/api.ts +0 -92
  145. package/src/ui/lib/dataset.ts +0 -141
  146. package/src/ui/lib/registries.ts +0 -155
  147. package/src/ui/lib/view-settings.ts +0 -41
  148. package/src/ui/main.tsx +0 -15
  149. package/src/ui/pages/Live.tsx +0 -229
  150. package/src/ui/pages/Pulse.tsx +0 -415
  151. package/src/ui/pages/Recall.tsx +0 -190
  152. package/src/ui/pages/River.tsx +0 -354
  153. package/src/ui/pages/Search.tsx +0 -386
  154. package/src/ui/pages/Stub.tsx +0 -9
  155. package/src/ui/pages/Thread.tsx +0 -473
  156. package/src/ui/pages/settings/Classifier.tsx +0 -227
  157. package/src/ui/pages/settings/Data.tsx +0 -190
  158. package/src/ui/pages/settings/Index.tsx +0 -65
  159. package/src/ui/pages/settings/Labels.tsx +0 -224
  160. package/src/ui/pages/settings/Providers.tsx +0 -305
  161. package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
  162. package/src/ui/pages/settings/Sources.tsx +0 -326
  163. package/src/ui/pages/settings/Views.tsx +0 -96
  164. package/src/ui/styles.css +0 -1890
  165. package/src/ui/tsconfig.json +0 -21
  166. package/src/ui/vite.config.ts +0 -19
  167. package/tests/fixtures/claude_code/short_session.jsonl +0 -2
  168. package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
  169. package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
  170. package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
  171. package/tests/fixtures/facts.ts +0 -17
  172. package/tests/fixtures/golden-corpus.ts +0 -85
  173. package/tests/fixtures/hermes/paired_request_dump.json +0 -24
  174. package/tests/fixtures/hermes/paired_session.json +0 -23
  175. package/tests/fixtures/hermes/request_dump.json +0 -28
  176. package/tests/fixtures/hermes/session_iso.json +0 -38
  177. package/tests/fixtures/hermes/session_unix.json +0 -38
  178. package/tests/fixtures/hermes/system_only.json +0 -18
  179. package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
  180. package/tests/fixtures/pi/short-successful.jsonl +0 -5
  181. package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
  182. package/tests/fixtures/sessions.ts +0 -22
  183. package/tests/integration/backfill-facts.test.ts +0 -362
  184. package/tests/integration/citation-explicit.test.ts +0 -111
  185. package/tests/integration/cite-event.test.ts +0 -169
  186. package/tests/integration/cite-memo.test.ts +0 -87
  187. package/tests/integration/db-restore.test.ts +0 -153
  188. package/tests/integration/embed-backfill.test.ts +0 -176
  189. package/tests/integration/fact-supersedence.test.ts +0 -313
  190. package/tests/integration/fts-index.test.ts +0 -60
  191. package/tests/integration/getbyids-sqlite.test.ts +0 -100
  192. package/tests/integration/hermes-agent-hooks.test.ts +0 -248
  193. package/tests/integration/hook-claude-settings.test.ts +0 -218
  194. package/tests/integration/hook-log.test.ts +0 -54
  195. package/tests/integration/hook-memo.test.ts +0 -68
  196. package/tests/integration/hook-pre-compact.test.ts +0 -105
  197. package/tests/integration/hook-subagent-start.test.ts +0 -102
  198. package/tests/integration/http.test.ts +0 -401
  199. package/tests/integration/keyword-search-fts.test.ts +0 -66
  200. package/tests/integration/mcp-recall-logging.test.ts +0 -88
  201. package/tests/integration/mcp.test.ts +0 -260
  202. package/tests/integration/memo-sweep.test.ts +0 -91
  203. package/tests/integration/prompt-recall-hook.test.ts +0 -88
  204. package/tests/integration/provider-registry.test.ts +0 -107
  205. package/tests/integration/recall-golden.test.ts +0 -59
  206. package/tests/integration/recall-sqlite.test.ts +0 -169
  207. package/tests/integration/scheduler.test.ts +0 -391
  208. package/tests/integration/session-end-hook.test.ts +0 -48
  209. package/tests/integration/session-start-hook.test.ts +0 -126
  210. package/tests/integration/source-registry.test.ts +0 -122
  211. package/tests/integration/sqlite-fact-store.test.ts +0 -346
  212. package/tests/integration/stop-hook.test.ts +0 -560
  213. package/tests/integration/wal-checkpoint.test.ts +0 -49
  214. package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
  215. package/tests/unit/core/adapters/aider.test.ts +0 -230
  216. package/tests/unit/core/adapters/claude-code.test.ts +0 -118
  217. package/tests/unit/core/adapters/cursor.test.ts +0 -485
  218. package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
  219. package/tests/unit/core/adapters/hermes.test.ts +0 -81
  220. package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
  221. package/tests/unit/core/adapters/opencode.test.ts +0 -354
  222. package/tests/unit/core/adapters/pi.test.ts +0 -110
  223. package/tests/unit/core/adapters/windsurf.test.ts +0 -416
  224. package/tests/unit/core/classifier/prompt.test.ts +0 -126
  225. package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
  226. package/tests/unit/core/facts/extract-facts.test.ts +0 -117
  227. package/tests/unit/core/filter.test.ts +0 -40
  228. package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
  229. package/tests/unit/core/hook/citation-detect.test.ts +0 -124
  230. package/tests/unit/core/hook/gate.test.ts +0 -29
  231. package/tests/unit/core/hook/pointer-block.test.ts +0 -22
  232. package/tests/unit/core/hook/select.test.ts +0 -66
  233. package/tests/unit/core/match-fields.test.ts +0 -39
  234. package/tests/unit/core/mcp-cite-session.test.ts +0 -51
  235. package/tests/unit/core/providers/provider-models.test.ts +0 -101
  236. package/tests/unit/core/query-shape.test.ts +0 -92
  237. package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
  238. package/tests/unit/core/recall-service.test.ts +0 -200
  239. package/tests/unit/core/storage/live-status.test.ts +0 -54
  240. package/tests/unit/core/tokenize.test.ts +0 -32
  241. package/tests/unit/core/useful-scan.test.ts +0 -537
  242. package/tests/unit/llm/embed.test.ts +0 -93
  243. package/tests/unit/llm/ollama-client.test.ts +0 -124
  244. package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
  245. package/tsconfig.json +0 -31
  246. package/tsconfig.test.json +0 -11
  247. package/vitest.config.ts +0 -22
@@ -1,327 +0,0 @@
1
- /**
2
- * FactRecallService — agent-facing recall over the FactStore.
3
- *
4
- * Mirrors RecallService's keyword / semantic / hybrid pattern but works on
5
- * Fact records, not Session records. Sessions and facts answer different
6
- * questions and have incompatibly-shaped results, so this is a separate
7
- * service with its own MCP tool — see Section 4 of factstore-design.md.
8
- *
9
- * Filter pipeline:
10
- * 1. Storage pre-filter (subject, predicate, kind, minConfidence,
11
- * includeSuperseded). Cheap SQL.
12
- * 2. Keyword scoring over (value, subject, predicate). Pure, in-memory.
13
- * 3. Semantic KNN via fact_embeddings vec0 (when mode != keyword).
14
- * 4. Hybrid merge: 0.6 semantic + 0.4 keyword, matching the session
15
- * recall weights.
16
- *
17
- * Confidence policy: default `minConfidence` is 0.6 (Section 1 of the plan).
18
- * Facts with classifier confidence in [0.4, 0.6) get written by
19
- * extractFacts but stay out of agent recall unless the caller lowers the
20
- * floor explicitly.
21
- */
22
-
23
- import type { FactStore } from "@ports/fact-store.js";
24
- import type { LLMClient } from "@ports/llm-client.js";
25
- import { LLMUnreachableError } from "@ports/llm-client.js";
26
- import type {
27
- Fact,
28
- FactHit,
29
- FactMatchField,
30
- FactRecallQuery,
31
- FactRecallResult,
32
- RecallMode,
33
- } from "@shared/types.js";
34
- import { tokenSet } from "@core/recall/tokenize.js";
35
-
36
- const DEFAULT_LIMIT = 10;
37
- const MAX_LIMIT = 100;
38
- const DEFAULT_MIN_CONFIDENCE = 0.6;
39
- const STORAGE_FETCH_CAP = 500;
40
- const HYBRID_KW_WEIGHT = 0.4;
41
- const HYBRID_SEM_WEIGHT = 0.6;
42
- const SEMANTIC_OVERFETCH = 3;
43
-
44
- const FIELD_WEIGHTS = {
45
- value: 3,
46
- subject: 1,
47
- predicate: 1,
48
- } as const;
49
-
50
- export interface FactRecallServiceDeps {
51
- readonly factStore: FactStore;
52
- readonly llm: LLMClient;
53
- }
54
-
55
- export class FactRecallService {
56
- constructor(private readonly deps: FactRecallServiceDeps) {}
57
-
58
- async search(input: FactRecallQuery): Promise<FactRecallResult> {
59
- const mode: RecallMode = input.mode ?? "keyword";
60
- const limit = clampLimit(input.limit);
61
- const subject = input.subject ?? null;
62
- const predicate = input.predicate ?? null;
63
- const kind = input.kind ?? null;
64
- const queryText = (input.query ?? "").trim();
65
-
66
- const empty: FactRecallResult = {
67
- query: queryText,
68
- subject,
69
- predicate,
70
- kind,
71
- mode,
72
- limit,
73
- total: 0,
74
- results: [],
75
- };
76
-
77
- // A query with no signal at all → empty. Either free-text query, or a
78
- // structured filter (subject / predicate / kind) must be provided.
79
- if (!queryText && subject === null && predicate === null && kind === null) {
80
- return empty;
81
- }
82
-
83
- const filter: Parameters<FactStore["listForRecall"]>[0] = {
84
- includeSuperseded: input.includeSuperseded === true,
85
- minConfidence: input.minConfidence ?? DEFAULT_MIN_CONFIDENCE,
86
- limit: STORAGE_FETCH_CAP,
87
- ...(input.subject !== undefined ? { subject: input.subject } : {}),
88
- ...(input.predicate !== undefined ? { predicate: input.predicate } : {}),
89
- ...(input.kind !== undefined ? { kind: input.kind } : {}),
90
- };
91
-
92
- const candidates = await this.deps.factStore.listForRecall(filter);
93
- if (candidates.length === 0) return empty;
94
-
95
- const byId = new Map<string, Fact>(candidates.map((f) => [f.id, f]));
96
- const queryTokens = queryText ? new Set(tokenSet(queryText)) : new Set<string>();
97
-
98
- const kwHits =
99
- mode === "keyword" || mode === "hybrid"
100
- ? scoreAll(candidates, queryTokens)
101
- : [];
102
-
103
- let semHits: ReadonlyArray<SemanticHit> = [];
104
- let semError: "ollama_unreachable" | null = null;
105
- if ((mode === "semantic" || mode === "hybrid") && queryText) {
106
- try {
107
- semHits = await this.runSemantic(queryText, byId, limit * SEMANTIC_OVERFETCH);
108
- } catch (err) {
109
- if (err instanceof LLMUnreachableError) {
110
- semError = "ollama_unreachable";
111
- } else {
112
- throw err;
113
- }
114
- }
115
- }
116
-
117
- if (mode === "semantic" && semError) {
118
- return { ...empty, modeUnavailable: semError };
119
- }
120
-
121
- // For pure structured queries (no query text, just subject/predicate),
122
- // a keyword pass with empty tokens scores zero and a semantic pass has
123
- // nothing to embed. Fall back to returning the storage filter result
124
- // ordered by created_at DESC. Applies to keyword AND hybrid — hybrid
125
- // is the MCP default, so this path catches exact subject+predicate
126
- // lookups from agent callers that pass no query text.
127
- if ((mode === "keyword" || mode === "hybrid") && !queryText) {
128
- const rows = candidates
129
- .slice(0, limit)
130
- .map((f) => factToHit(f, 0, []));
131
- return finalize(queryText, subject, predicate, kind, mode, limit, rows);
132
- }
133
-
134
- if (mode === "keyword") {
135
- return finalize(queryText, subject, predicate, kind, mode, limit, kwHits.map(toKeywordHit));
136
- }
137
-
138
- if (mode === "semantic") {
139
- return finalize(queryText, subject, predicate, kind, mode, limit, semHits.map(toSemanticHit));
140
- }
141
-
142
- // hybrid
143
- const merged = mergeHybrid(kwHits, semHits, byId);
144
- const result = finalize(queryText, subject, predicate, kind, mode, limit, merged);
145
- return semError ? { ...result, modeUnavailable: semError } : result;
146
- }
147
-
148
- private async runSemantic(
149
- query: string,
150
- byId: ReadonlyMap<string, Fact>,
151
- fetchLimit: number,
152
- ): Promise<ReadonlyArray<SemanticHit>> {
153
- const embedding = await this.deps.llm.embed(query, "query");
154
- const neighbors = await this.deps.factStore.semanticSearch(embedding.vector, fetchLimit);
155
- const hits: SemanticHit[] = [];
156
- for (const n of neighbors) {
157
- const fact = byId.get(n.factId);
158
- if (!fact) continue; // candidate was filtered out by subject/predicate/conf
159
- hits.push({ fact, similarity: cosineFromL2(n.distance) });
160
- }
161
- return hits;
162
- }
163
- }
164
-
165
- interface KeywordHit {
166
- readonly fact: Fact;
167
- readonly score: number;
168
- readonly matchedIn: ReadonlyArray<FactMatchField>;
169
- }
170
-
171
- interface SemanticHit {
172
- readonly fact: Fact;
173
- readonly similarity: number;
174
- }
175
-
176
- function scoreAll(
177
- facts: ReadonlyArray<Fact>,
178
- queryTokens: ReadonlySet<string>,
179
- ): ReadonlyArray<KeywordHit> {
180
- if (queryTokens.size === 0) return [];
181
- const hits: KeywordHit[] = [];
182
- for (const f of facts) {
183
- const { score, matchedIn } = scoreFact(f, queryTokens);
184
- if (score > 0) hits.push({ fact: f, score, matchedIn });
185
- }
186
- hits.sort((a, b) => b.score - a.score);
187
- return hits;
188
- }
189
-
190
- function scoreFact(
191
- fact: Fact,
192
- queryTokens: ReadonlySet<string>,
193
- ): { score: number; matchedIn: ReadonlyArray<FactMatchField> } {
194
- let score = 0;
195
- const matchedIn: FactMatchField[] = [];
196
-
197
- const valueMatches = intersectionSize(queryTokens, tokenSet(fact.value));
198
- if (valueMatches > 0) {
199
- score += FIELD_WEIGHTS.value * valueMatches;
200
- matchedIn.push("value");
201
- }
202
-
203
- const subjectMatches = intersectionSize(queryTokens, tokenSet(fact.subject));
204
- if (subjectMatches > 0) {
205
- score += FIELD_WEIGHTS.subject * subjectMatches;
206
- matchedIn.push("subject");
207
- }
208
-
209
- const predicateMatches = intersectionSize(queryTokens, tokenSet(fact.predicate));
210
- if (predicateMatches > 0) {
211
- score += FIELD_WEIGHTS.predicate * predicateMatches;
212
- matchedIn.push("predicate");
213
- }
214
-
215
- return { score, matchedIn };
216
- }
217
-
218
- function mergeHybrid(
219
- kwHits: ReadonlyArray<KeywordHit>,
220
- semHits: ReadonlyArray<SemanticHit>,
221
- byId: ReadonlyMap<string, Fact>,
222
- ): ReadonlyArray<FactHit> {
223
- const maxKw = Math.max(1, ...kwHits.map((h) => h.score));
224
- const maxSem = Math.max(1, ...semHits.map((h) => h.similarity));
225
-
226
- const kwMap = new Map<string, KeywordHit>(kwHits.map((h) => [h.fact.id, h]));
227
- const semMap = new Map<string, SemanticHit>(semHits.map((h) => [h.fact.id, h]));
228
- const allIds = new Set<string>([...kwMap.keys(), ...semMap.keys()]);
229
-
230
- const rows: FactHit[] = [];
231
- for (const id of allIds) {
232
- const fact = byId.get(id);
233
- if (!fact) continue;
234
- const kw = kwMap.get(id);
235
- const sem = semMap.get(id);
236
- const kwNorm = kw ? kw.score / maxKw : 0;
237
- const semNorm = sem ? sem.similarity / maxSem : 0;
238
- const combined = round4(HYBRID_SEM_WEIGHT * semNorm + HYBRID_KW_WEIGHT * kwNorm);
239
- const matchedIn = uniqueFields(
240
- kw?.matchedIn ?? [],
241
- sem ? (["semantic"] as FactMatchField[]) : [],
242
- );
243
- rows.push({
244
- ...fact,
245
- matchScore: combined,
246
- matchedIn,
247
- keywordScore: round4(kwNorm),
248
- semanticScore: round4(semNorm),
249
- });
250
- }
251
- rows.sort((a, b) => b.matchScore - a.matchScore);
252
- return rows;
253
- }
254
-
255
- function factToHit(
256
- fact: Fact,
257
- score: number,
258
- matchedIn: ReadonlyArray<FactMatchField>,
259
- ): FactHit {
260
- return { ...fact, matchScore: score, matchedIn };
261
- }
262
-
263
- function toKeywordHit(h: KeywordHit): FactHit {
264
- return factToHit(h.fact, h.score, h.matchedIn);
265
- }
266
-
267
- function toSemanticHit(h: SemanticHit): FactHit {
268
- return factToHit(h.fact, h.similarity, ["semantic"]);
269
- }
270
-
271
- function finalize(
272
- query: string,
273
- subject: string | null,
274
- predicate: string | null,
275
- kind: FactRecallResult["kind"],
276
- mode: RecallMode,
277
- limit: number,
278
- hits: ReadonlyArray<FactHit>,
279
- ): FactRecallResult {
280
- return {
281
- query,
282
- subject,
283
- predicate,
284
- kind,
285
- mode,
286
- limit,
287
- total: hits.length,
288
- results: hits.slice(0, limit),
289
- };
290
- }
291
-
292
- function clampLimit(limit: number | undefined): number {
293
- const n = limit ?? DEFAULT_LIMIT;
294
- if (Number.isNaN(n) || n < 1) return 1;
295
- return Math.min(MAX_LIMIT, Math.trunc(n));
296
- }
297
-
298
- function cosineFromL2(distance: number): number {
299
- const cos = 1 - (distance * distance) / 2;
300
- return round4(Math.max(-1, Math.min(1, cos)));
301
- }
302
-
303
- function round4(value: number): number {
304
- return Math.round(value * 10_000) / 10_000;
305
- }
306
-
307
- function intersectionSize<T>(a: ReadonlySet<T>, b: ReadonlySet<T>): number {
308
- const [small, large] = a.size <= b.size ? [a, b] : [b, a];
309
- let count = 0;
310
- for (const item of small) if (large.has(item)) count += 1;
311
- return count;
312
- }
313
-
314
- function uniqueFields(
315
- a: ReadonlyArray<FactMatchField>,
316
- b: ReadonlyArray<FactMatchField>,
317
- ): ReadonlyArray<FactMatchField> {
318
- const seen = new Set<FactMatchField>();
319
- const out: FactMatchField[] = [];
320
- for (const f of [...a, ...b]) {
321
- if (!seen.has(f)) {
322
- seen.add(f);
323
- out.push(f);
324
- }
325
- }
326
- return out;
327
- }
@@ -1,142 +0,0 @@
1
- /**
2
- * scanOnce — mtime-gated incremental discovery shared by every adapter.
3
- *
4
- * The Python codebase bundled this logic into each adapter (`scan_once` +
5
- * `record_classified` methods). In the TS port the adapter stays a pure
6
- * parser (TranscriptAdapter port); the mtime check and adapter_state
7
- * upsert live here, generic over the adapter. Same behavior, less
8
- * duplication across claude-code / hermes / pi.
9
- *
10
- * Contract (per file under adapter.discover()):
11
- * - If `now - mtime < idleMinutes * 60s` → still active, skip
12
- * - Lookup adapter_state by (adapterName, sourcePath):
13
- * no row + file idle → NEW: parse + return (chunk, supersedes=null)
14
- * row exists, size match, failures < ceil → UNCHANGED: skip
15
- * row exists, size match, failures >= ceil → FAILED_CEILING: skip (log once per session)
16
- * row exists, file grew → RESUMED: parse + return, reset failure_count
17
- * - After successful classify+insert downstream, call `recordClassified`
18
- * to upsert adapter_state with the new size + session_id.
19
- * - On classify/storage failure, call `recordFailed` to increment failure_count.
20
- * When failure_count reaches MAX_CLASSIFY_FAILURES and the file hasn't grown,
21
- * the file is permanently skipped until new content arrives.
22
- */
23
-
24
- import { statSync } from "node:fs";
25
- import type Database from "better-sqlite3";
26
- import type {
27
- SessionChunk,
28
- TranscriptAdapter,
29
- } from "@ports/transcript-adapter.js";
30
-
31
- export interface ScanResult {
32
- readonly chunk: SessionChunk;
33
- readonly supersedes: string | null;
34
- }
35
-
36
- export const MAX_CLASSIFY_FAILURES = 3;
37
-
38
- interface AdapterStateRow {
39
- source_path: string;
40
- file_size: number | null;
41
- session_id: string | null;
42
- failure_count: number;
43
- }
44
-
45
- export async function scanOnce(
46
- adapter: TranscriptAdapter,
47
- idleMinutes: number,
48
- db: Database.Database,
49
- now: number = Date.now(),
50
- ): Promise<ReadonlyArray<ScanResult>> {
51
- const idleMs = idleMinutes * 60 * 1000;
52
- const stateRows = db
53
- .prepare<[string], AdapterStateRow>(
54
- "SELECT source_path, file_size, session_id, COALESCE(failure_count, 0) AS failure_count FROM adapter_state WHERE adapter_name = ?",
55
- )
56
- .all(adapter.name);
57
- const byPath = new Map<string, AdapterStateRow>(stateRows.map((r) => [r.source_path, r]));
58
-
59
- const out: ScanResult[] = [];
60
- const files = await adapter.discover();
61
-
62
- for (const path of files) {
63
- let st;
64
- try {
65
- st = statSync(path);
66
- } catch {
67
- continue;
68
- }
69
- const age = now - st.mtimeMs;
70
- if (age < idleMs) continue;
71
-
72
- const prior = byPath.get(path);
73
- let supersedes: string | null = null;
74
- if (prior) {
75
- const sizeUnchanged = (prior.file_size ?? 0) === st.size;
76
- if (sizeUnchanged) {
77
- // File hasn't grown — skip whether clean or failed. Failures only
78
- // retry when the transcript file receives new content.
79
- continue;
80
- }
81
- // File grew: reset failure_count so resume gets a clean slate.
82
- if (prior.failure_count >= MAX_CLASSIFY_FAILURES) {
83
- db.prepare(
84
- "UPDATE adapter_state SET failure_count = 0 WHERE adapter_name = ? AND source_path = ?",
85
- ).run(adapter.name, path);
86
- }
87
- supersedes = prior.session_id;
88
- }
89
-
90
- const chunk = await adapter.parseSession(path);
91
- if (!chunk) continue;
92
- out.push({ chunk, supersedes });
93
- }
94
- return out;
95
- }
96
-
97
- export function recordClassified(
98
- db: Database.Database,
99
- adapterName: string,
100
- sourcePath: string,
101
- sessionId: string,
102
- ): void {
103
- let size = 0;
104
- try {
105
- size = statSync(sourcePath).size;
106
- } catch {
107
- return;
108
- }
109
- db.prepare(
110
- `INSERT INTO adapter_state
111
- (adapter_name, source_path, last_offset, file_size, session_id, failure_count, last_processed_at)
112
- VALUES (?, ?, ?, ?, ?, 0, datetime('now'))
113
- ON CONFLICT(adapter_name, source_path) DO UPDATE SET
114
- last_offset = excluded.last_offset,
115
- file_size = excluded.file_size,
116
- session_id = excluded.session_id,
117
- failure_count = 0,
118
- last_processed_at = excluded.last_processed_at`,
119
- ).run(adapterName, sourcePath, size, size, sessionId);
120
- }
121
-
122
- export function recordFailed(
123
- db: Database.Database,
124
- adapterName: string,
125
- sourcePath: string,
126
- ): void {
127
- let size = 0;
128
- try {
129
- size = statSync(sourcePath).size;
130
- } catch {
131
- return;
132
- }
133
- db.prepare(
134
- `INSERT INTO adapter_state
135
- (adapter_name, source_path, last_offset, file_size, session_id, failure_count, last_processed_at)
136
- VALUES (?, ?, ?, ?, NULL, 1, datetime('now'))
137
- ON CONFLICT(adapter_name, source_path) DO UPDATE SET
138
- file_size = excluded.file_size,
139
- failure_count = failure_count + 1,
140
- last_processed_at = excluded.last_processed_at`,
141
- ).run(adapterName, sourcePath, size, size);
142
- }
@@ -1,225 +0,0 @@
1
- /**
2
- * ScanScheduler — periodic ingest loop. Ports `scheduler.py`.
3
- *
4
- * Each tick walks the registered adapters, runs scanOnce to discover idle
5
- * transcript files, classifies the resulting SessionChunks via the active
6
- * classifier, and persists them through SqliteSessionStore.insertSession
7
- * with the embedder. Records adapter_state after each successful insert
8
- * so the next tick is incremental.
9
- *
10
- * Single-process: the scheduler runs alongside the HTTP server (Phase D
11
- * wires it into `nlm start`). No worker thread; Node's event loop is
12
- * enough — adapter discovery is filesystem-bound and the per-chunk
13
- * classify call is async-awaited with a wall-clock timeout to keep the
14
- * tick loop responsive.
15
- *
16
- * Confidence floor of 0.3 mirrors Python: classifier outputs below that
17
- * are skipped rather than persisted as low-quality noise.
18
- */
19
-
20
- import type { LLMClient } from "@ports/llm-client.js";
21
- import type { TranscriptAdapter } from "@ports/transcript-adapter.js";
22
- import { extractFacts } from "@core/facts/extract-facts.js";
23
- import type { SqliteFactStore } from "@core/storage/sqlite-fact-store.js";
24
- import type {
25
- IngestRecord,
26
- SqliteSessionStore,
27
- } from "@core/storage/sqlite-session-store.js";
28
- import { MAX_CLASSIFY_FAILURES, recordClassified, recordFailed, scanOnce } from "./scan-once.js";
29
-
30
- const DEFAULT_INTERVAL_MS = 30 * 60 * 1000; // 30 min, matches Python default
31
- const DEFAULT_CLASSIFY_TIMEOUT_MS = 120_000;
32
- const DEFAULT_CONFIDENCE_FLOOR = 0.3;
33
- const DEFAULT_IDLE_MINUTES = 15;
34
- const BODY_CAP = 200_000;
35
-
36
- export interface SchedulerOptions {
37
- readonly store: SqliteSessionStore;
38
- readonly adapters: ReadonlyArray<TranscriptAdapter>;
39
- readonly classifier: LLMClient;
40
- readonly embedder?: LLMClient | null;
41
- /**
42
- * FactStore for Phase B.2 fact ingest. When provided, the scheduler
43
- * extracts facts from each classify result and persists them atomically
44
- * with the session row. Optional — when null, sessions ingest as before
45
- * with no facts written (backwards-compatible default for tests not yet
46
- * updated, and for any future caller that wants facts off).
47
- */
48
- readonly factStore?: SqliteFactStore | null;
49
- readonly intervalMs?: number;
50
- readonly classifyTimeoutMs?: number;
51
- readonly confidenceFloor?: number;
52
- readonly idleMinutes?: number;
53
- /** Defaults to console.error. Set to a noop in tests. */
54
- readonly logger?: (msg: string) => void;
55
- }
56
-
57
- export interface TickReport {
58
- readonly inserted: number;
59
- readonly skippedLowConfidence: number;
60
- readonly classifyFailures: number;
61
- readonly storageFailures: number;
62
- readonly chunksSeen: number;
63
- }
64
-
65
- export class ScanScheduler {
66
- private readonly opts: Required<Omit<SchedulerOptions, "embedder" | "factStore">> & {
67
- readonly embedder: LLMClient | null;
68
- readonly factStore: SqliteFactStore | null;
69
- };
70
- private stopped = true;
71
- private timer: NodeJS.Timeout | null = null;
72
-
73
- constructor(opts: SchedulerOptions) {
74
- this.opts = {
75
- store: opts.store,
76
- adapters: opts.adapters,
77
- classifier: opts.classifier,
78
- embedder: opts.embedder ?? null,
79
- factStore: opts.factStore ?? null,
80
- intervalMs: opts.intervalMs ?? DEFAULT_INTERVAL_MS,
81
- classifyTimeoutMs: opts.classifyTimeoutMs ?? DEFAULT_CLASSIFY_TIMEOUT_MS,
82
- confidenceFloor: opts.confidenceFloor ?? DEFAULT_CONFIDENCE_FLOOR,
83
- idleMinutes: opts.idleMinutes ?? DEFAULT_IDLE_MINUTES,
84
- logger: opts.logger ?? ((msg) => console.error(msg)),
85
- };
86
- }
87
-
88
- start(): void {
89
- if (!this.stopped) return;
90
- this.stopped = false;
91
- this.scheduleNext(0);
92
- }
93
-
94
- stop(): void {
95
- this.stopped = true;
96
- if (this.timer) {
97
- clearTimeout(this.timer);
98
- this.timer = null;
99
- }
100
- }
101
-
102
- private scheduleNext(delayMs: number): void {
103
- if (this.stopped) return;
104
- this.timer = setTimeout(() => {
105
- void this.tick().finally(() => this.scheduleNext(this.opts.intervalMs));
106
- }, delayMs);
107
- }
108
-
109
- async tick(): Promise<TickReport> {
110
- let inserted = 0;
111
- let skippedLowConfidence = 0;
112
- let classifyFailures = 0;
113
- let storageFailures = 0;
114
- let chunksSeen = 0;
115
-
116
- for (const adapter of this.opts.adapters) {
117
- let results;
118
- try {
119
- results = await scanOnce(adapter, this.opts.idleMinutes, this.opts.store.rawDb());
120
- } catch (e) {
121
- this.opts.logger(
122
- `[scheduler] scanOnce error for ${adapter.name}: ${e instanceof Error ? e.message : String(e)}`,
123
- );
124
- continue;
125
- }
126
-
127
- for (const { chunk, supersedes } of results) {
128
- chunksSeen += 1;
129
-
130
- let classification;
131
- try {
132
- classification = await withTimeout(
133
- this.opts.classifier.classify(chunk.text),
134
- this.opts.classifyTimeoutMs,
135
- );
136
- } catch (e) {
137
- classifyFailures += 1;
138
- const reason = e instanceof TimeoutError ? "timed out" : `error: ${e instanceof Error ? e.message : String(e)}`;
139
- recordFailed(this.opts.store.rawDb(), adapter.name, chunk.sourcePath);
140
- const failureRow = this.opts.store.rawDb()
141
- .prepare<[string, string], { failure_count: number }>(
142
- "SELECT COALESCE(failure_count, 0) AS failure_count FROM adapter_state WHERE adapter_name = ? AND source_path = ?",
143
- )
144
- .get(adapter.name, chunk.sourcePath);
145
- const count = failureRow?.failure_count ?? 1;
146
- const ceiling = count >= MAX_CLASSIFY_FAILURES ? ` (failure ${count}/${MAX_CLASSIFY_FAILURES} — will skip until file grows)` : ` (failure ${count}/${MAX_CLASSIFY_FAILURES})`;
147
- this.opts.logger(`[scheduler] classifier ${reason} for ${chunk.id}${ceiling}`);
148
- continue;
149
- }
150
-
151
- if (classification.confidence < this.opts.confidenceFloor) {
152
- skippedLowConfidence += 1;
153
- continue;
154
- }
155
-
156
- const record: IngestRecord = {
157
- id: chunk.id,
158
- runtime: chunk.runtime,
159
- runtimeSessionId: chunk.runtimeSessionId || null,
160
- startedAt: chunk.startedAt,
161
- endedAt: chunk.endedAt || null,
162
- durationMin: chunk.durationMin,
163
- label: classification.label,
164
- summary: classification.summary,
165
- body: chunk.text.slice(0, BODY_CAP),
166
- status: "closed",
167
- transcriptKind: adapter.transcriptKind,
168
- transcriptPath: chunk.sourcePath,
169
- transcriptOffset: chunk.byteRange[0],
170
- transcriptLength: chunk.byteRange[1],
171
- entities: classification.entities,
172
- decisions: classification.decisions,
173
- openQuestions: classification.open,
174
- };
175
-
176
- const factSink = this.opts.factStore
177
- ? {
178
- factStore: this.opts.factStore,
179
- facts: extractFacts(classification, chunk.id, chunk.startedAt),
180
- }
181
- : null;
182
-
183
- try {
184
- await this.opts.store.insertSession(
185
- record,
186
- this.opts.embedder,
187
- supersedes,
188
- factSink,
189
- );
190
- recordClassified(
191
- this.opts.store.rawDb(),
192
- adapter.name,
193
- chunk.sourcePath,
194
- chunk.id,
195
- );
196
- inserted += 1;
197
- } catch (e) {
198
- storageFailures += 1;
199
- recordFailed(this.opts.store.rawDb(), adapter.name, chunk.sourcePath);
200
- this.opts.logger(
201
- `[scheduler] storage error for ${chunk.id}: ${e instanceof Error ? e.message : String(e)}`,
202
- );
203
- }
204
- }
205
- }
206
-
207
- return { inserted, skippedLowConfidence, classifyFailures, storageFailures, chunksSeen };
208
- }
209
- }
210
-
211
- class TimeoutError extends Error {}
212
-
213
- async function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
214
- let timer: NodeJS.Timeout | undefined;
215
- try {
216
- return await Promise.race([
217
- promise,
218
- new Promise<T>((_, reject) => {
219
- timer = setTimeout(() => reject(new TimeoutError(`timed out after ${ms}ms`)), ms);
220
- }),
221
- ]);
222
- } finally {
223
- if (timer) clearTimeout(timer);
224
- }
225
- }