nlm-memory 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (257) hide show
  1. package/README.md +89 -34
  2. package/dist/cli/digest.d.ts +20 -0
  3. package/dist/cli/digest.js +142 -0
  4. package/dist/cli/digest.js.map +1 -0
  5. package/dist/cli/nlm.d.ts +1 -0
  6. package/dist/cli/nlm.js +25 -1
  7. package/dist/cli/nlm.js.map +1 -1
  8. package/dist/core/digest/compose.d.ts +38 -0
  9. package/dist/core/digest/compose.js +93 -0
  10. package/dist/core/digest/compose.js.map +1 -0
  11. package/dist/core/digest/hook-liveness.d.ts +32 -0
  12. package/dist/core/digest/hook-liveness.js +54 -0
  13. package/dist/core/digest/hook-liveness.js.map +1 -0
  14. package/dist/http/app.js +2 -1
  15. package/dist/http/app.js.map +1 -1
  16. package/dist/mcp/server.js +20 -1
  17. package/dist/mcp/server.js.map +1 -1
  18. package/dist/ui/assets/{index-C8cpwbYJ.css → index-Beo8psd-.css} +1 -1
  19. package/dist/ui/assets/{index-CB50QnL-.js → index-CSPTTeeM.js} +8 -8
  20. package/dist/ui/index.html +2 -2
  21. package/package.json +26 -1
  22. package/.agents/plugins/marketplace.json +0 -20
  23. package/.github/workflows/ci.yml +0 -30
  24. package/docs/methodology/re-derivation-rate.md +0 -112
  25. package/docs/methodology/useful-hit-rate.md +0 -79
  26. package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
  27. package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
  28. package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
  29. package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
  30. package/docs/plans/desktop-product.md +0 -69
  31. package/docs/plans/factstore-design.md +0 -236
  32. package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1575
  33. package/logs/CHANGELOG/CHANGELOG.md +0 -209
  34. package/migrations/000_initial_schema.sql +0 -174
  35. package/migrations/001_entity_type_rename.sql +0 -17
  36. package/migrations/002_adapter_state_extend.sql +0 -12
  37. package/migrations/003_session_embeddings.sql +0 -11
  38. package/migrations/004_facts.sql +0 -46
  39. package/migrations/005_sources.sql +0 -31
  40. package/migrations/006_providers.sql +0 -33
  41. package/migrations/007_source_tokens.sql +0 -17
  42. package/migrations/008_fts_rebuild.sql +0 -9
  43. package/migrations/009_session_embedding_chunks.sql +0 -46
  44. package/migrations/010_sources_opencode.sql +0 -30
  45. package/migrations/011_sources_hermes_agent.sql +0 -30
  46. package/migrations/012_sources_aider.sql +0 -30
  47. package/migrations/013_adapter_state_failure_count.sql +0 -12
  48. package/migrations/014_sources_cursor.sql +0 -30
  49. package/migrations/015_sources_windsurf.sql +0 -30
  50. package/plugin-hermes-agent/README.md +0 -49
  51. package/plugin-hermes-agent/__init__.py +0 -75
  52. package/plugin-hermes-agent/plugin.yaml +0 -15
  53. package/scripts/backfill-citations.mjs +0 -0
  54. package/scripts/build-codex-plugin.mjs +0 -61
  55. package/scripts/deepseek-probe.mjs +0 -67
  56. package/scripts/extract-triples.mjs +0 -207
  57. package/scripts/longmemeval/embedding-cache.ts +0 -77
  58. package/scripts/longmemeval/fetch-dataset.sh +0 -25
  59. package/scripts/longmemeval/run-harness.ts +0 -315
  60. package/scripts/longmemeval/scorer.ts +0 -99
  61. package/scripts/longmemeval/tsconfig.json +0 -9
  62. package/scripts/longmemeval/types.ts +0 -35
  63. package/scripts/nlm-daily-digest.py +0 -239
  64. package/scripts/nlm-daily-digest.sh +0 -28
  65. package/src/cli/classify-parity.ts +0 -257
  66. package/src/cli/launchctl-helpers.ts +0 -49
  67. package/src/cli/nlm.ts +0 -1078
  68. package/src/core/actions/actions-log.ts +0 -118
  69. package/src/core/actions/overlay.ts +0 -117
  70. package/src/core/adapters/aider.ts +0 -205
  71. package/src/core/adapters/claude-code.ts +0 -293
  72. package/src/core/adapters/common.ts +0 -54
  73. package/src/core/adapters/cursor.ts +0 -486
  74. package/src/core/adapters/from-source.ts +0 -67
  75. package/src/core/adapters/hermes-agent.ts +0 -240
  76. package/src/core/adapters/hermes.ts +0 -277
  77. package/src/core/adapters/jsonl-generic.ts +0 -208
  78. package/src/core/adapters/opencode.ts +0 -281
  79. package/src/core/adapters/pi.ts +0 -264
  80. package/src/core/adapters/windsurf.ts +0 -386
  81. package/src/core/classifier/prompt.ts +0 -200
  82. package/src/core/dataset/build-dataset.ts +0 -463
  83. package/src/core/embedding/chunk-body.ts +0 -76
  84. package/src/core/embedding/embed-backfill.ts +0 -210
  85. package/src/core/embedding/embed-normalize.ts +0 -135
  86. package/src/core/facts/backfill-facts.ts +0 -254
  87. package/src/core/facts/extract-facts.ts +0 -50
  88. package/src/core/hook/citation-detect.ts +0 -124
  89. package/src/core/hook/cite-memo.ts +0 -68
  90. package/src/core/hook/claude-settings.ts +0 -187
  91. package/src/core/hook/gate.ts +0 -25
  92. package/src/core/hook/hook-log.ts +0 -41
  93. package/src/core/hook/memo-sweep.ts +0 -164
  94. package/src/core/hook/memo.ts +0 -67
  95. package/src/core/hook/pointer-block.ts +0 -26
  96. package/src/core/hook/select.ts +0 -32
  97. package/src/core/hook/transcript.ts +0 -121
  98. package/src/core/ingest/ingest-session.ts +0 -111
  99. package/src/core/providers/provider-models.ts +0 -100
  100. package/src/core/providers/provider-registry.ts +0 -196
  101. package/src/core/recall/citation-log.ts +0 -108
  102. package/src/core/recall/filter.ts +0 -27
  103. package/src/core/recall/index.ts +0 -6
  104. package/src/core/recall/match-fields.ts +0 -40
  105. package/src/core/recall/query-log.ts +0 -149
  106. package/src/core/recall/query-shape.ts +0 -66
  107. package/src/core/recall/recall-service.ts +0 -320
  108. package/src/core/recall/recent-log.ts +0 -59
  109. package/src/core/recall/tokenize.ts +0 -18
  110. package/src/core/recall/useful-scan.ts +0 -336
  111. package/src/core/recall-facts/fact-query-log.ts +0 -150
  112. package/src/core/recall-facts/fact-recall-service.ts +0 -327
  113. package/src/core/scheduler/scan-once.ts +0 -142
  114. package/src/core/scheduler/scheduler.ts +0 -225
  115. package/src/core/sources/source-registry.ts +0 -278
  116. package/src/core/storage/db-restore.ts +0 -133
  117. package/src/core/storage/live-status.ts +0 -45
  118. package/src/core/storage/migrate.ts +0 -72
  119. package/src/core/storage/sqlite-fact-store.ts +0 -304
  120. package/src/core/storage/sqlite-session-store.ts +0 -810
  121. package/src/hook/hook-auth.ts +0 -18
  122. package/src/hook/prompt-recall-hook.ts +0 -180
  123. package/src/hook/session-end-hook.ts +0 -81
  124. package/src/hook/session-start-hook.ts +0 -168
  125. package/src/hook/stop-hook.ts +0 -239
  126. package/src/http/app.ts +0 -1215
  127. package/src/install/claude-code.ts +0 -128
  128. package/src/install/codex.ts +0 -367
  129. package/src/install/cursor.ts +0 -68
  130. package/src/install/hermes-agent.ts +0 -76
  131. package/src/install/hermes.ts +0 -78
  132. package/src/install/nlm-dir-perms.ts +0 -55
  133. package/src/install/ollama.ts +0 -284
  134. package/src/install/setup.ts +0 -489
  135. package/src/install/windsurf.ts +0 -68
  136. package/src/llm/classifier-box.ts +0 -64
  137. package/src/llm/deepseek-client.ts +0 -150
  138. package/src/llm/env-autoload.ts +0 -55
  139. package/src/llm/ollama-client.ts +0 -189
  140. package/src/mcp/server.ts +0 -534
  141. package/src/ports/fact-store.ts +0 -102
  142. package/src/ports/llm-client.ts +0 -52
  143. package/src/ports/logger.ts +0 -16
  144. package/src/ports/session-store.ts +0 -45
  145. package/src/ports/transcript-adapter.ts +0 -55
  146. package/src/shared/types.ts +0 -149
  147. package/src/ui/App.tsx +0 -58
  148. package/src/ui/components/PromoteOpenButton.tsx +0 -65
  149. package/src/ui/components/SessionDrawer.tsx +0 -199
  150. package/src/ui/components/SideNav.tsx +0 -162
  151. package/src/ui/components/Skeleton.tsx +0 -107
  152. package/src/ui/index.html +0 -13
  153. package/src/ui/lib/actions.ts +0 -30
  154. package/src/ui/lib/api.ts +0 -92
  155. package/src/ui/lib/dataset.ts +0 -141
  156. package/src/ui/lib/registries.ts +0 -155
  157. package/src/ui/lib/view-settings.ts +0 -41
  158. package/src/ui/main.tsx +0 -15
  159. package/src/ui/pages/Live.tsx +0 -229
  160. package/src/ui/pages/Pulse.tsx +0 -415
  161. package/src/ui/pages/Recall.tsx +0 -190
  162. package/src/ui/pages/River.tsx +0 -354
  163. package/src/ui/pages/Search.tsx +0 -386
  164. package/src/ui/pages/Stub.tsx +0 -9
  165. package/src/ui/pages/Thread.tsx +0 -473
  166. package/src/ui/pages/settings/Classifier.tsx +0 -227
  167. package/src/ui/pages/settings/Data.tsx +0 -190
  168. package/src/ui/pages/settings/Index.tsx +0 -65
  169. package/src/ui/pages/settings/Labels.tsx +0 -224
  170. package/src/ui/pages/settings/Providers.tsx +0 -305
  171. package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
  172. package/src/ui/pages/settings/Sources.tsx +0 -326
  173. package/src/ui/pages/settings/Views.tsx +0 -96
  174. package/src/ui/styles.css +0 -1890
  175. package/src/ui/tsconfig.json +0 -21
  176. package/src/ui/vite.config.ts +0 -19
  177. package/tests/fixtures/claude_code/short_session.jsonl +0 -2
  178. package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
  179. package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
  180. package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
  181. package/tests/fixtures/facts.ts +0 -17
  182. package/tests/fixtures/golden-corpus.ts +0 -85
  183. package/tests/fixtures/hermes/paired_request_dump.json +0 -24
  184. package/tests/fixtures/hermes/paired_session.json +0 -23
  185. package/tests/fixtures/hermes/request_dump.json +0 -28
  186. package/tests/fixtures/hermes/session_iso.json +0 -38
  187. package/tests/fixtures/hermes/session_unix.json +0 -38
  188. package/tests/fixtures/hermes/system_only.json +0 -18
  189. package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
  190. package/tests/fixtures/pi/short-successful.jsonl +0 -5
  191. package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
  192. package/tests/fixtures/sessions.ts +0 -22
  193. package/tests/integration/backfill-facts.test.ts +0 -362
  194. package/tests/integration/citation-explicit.test.ts +0 -111
  195. package/tests/integration/cite-event.test.ts +0 -169
  196. package/tests/integration/cite-memo.test.ts +0 -87
  197. package/tests/integration/db-restore.test.ts +0 -153
  198. package/tests/integration/embed-backfill.test.ts +0 -176
  199. package/tests/integration/fact-supersedence.test.ts +0 -313
  200. package/tests/integration/fts-index.test.ts +0 -60
  201. package/tests/integration/getbyids-sqlite.test.ts +0 -100
  202. package/tests/integration/hermes-agent-hooks.test.ts +0 -248
  203. package/tests/integration/hook-claude-settings.test.ts +0 -218
  204. package/tests/integration/hook-log.test.ts +0 -54
  205. package/tests/integration/hook-memo.test.ts +0 -68
  206. package/tests/integration/hook-pre-compact.test.ts +0 -105
  207. package/tests/integration/hook-subagent-start.test.ts +0 -102
  208. package/tests/integration/http.test.ts +0 -401
  209. package/tests/integration/keyword-search-fts.test.ts +0 -66
  210. package/tests/integration/mcp-recall-logging.test.ts +0 -88
  211. package/tests/integration/mcp.test.ts +0 -260
  212. package/tests/integration/memo-sweep.test.ts +0 -91
  213. package/tests/integration/prompt-recall-hook.test.ts +0 -88
  214. package/tests/integration/provider-registry.test.ts +0 -107
  215. package/tests/integration/recall-golden.test.ts +0 -59
  216. package/tests/integration/recall-sqlite.test.ts +0 -169
  217. package/tests/integration/scheduler.test.ts +0 -391
  218. package/tests/integration/session-end-hook.test.ts +0 -48
  219. package/tests/integration/session-start-hook.test.ts +0 -126
  220. package/tests/integration/source-registry.test.ts +0 -122
  221. package/tests/integration/sqlite-fact-store.test.ts +0 -346
  222. package/tests/integration/stop-hook.test.ts +0 -560
  223. package/tests/integration/wal-checkpoint.test.ts +0 -49
  224. package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
  225. package/tests/unit/core/adapters/aider.test.ts +0 -230
  226. package/tests/unit/core/adapters/claude-code.test.ts +0 -118
  227. package/tests/unit/core/adapters/cursor.test.ts +0 -485
  228. package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
  229. package/tests/unit/core/adapters/hermes.test.ts +0 -81
  230. package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
  231. package/tests/unit/core/adapters/opencode.test.ts +0 -354
  232. package/tests/unit/core/adapters/pi.test.ts +0 -110
  233. package/tests/unit/core/adapters/windsurf.test.ts +0 -416
  234. package/tests/unit/core/classifier/prompt.test.ts +0 -126
  235. package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
  236. package/tests/unit/core/facts/extract-facts.test.ts +0 -117
  237. package/tests/unit/core/filter.test.ts +0 -40
  238. package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
  239. package/tests/unit/core/hook/citation-detect.test.ts +0 -124
  240. package/tests/unit/core/hook/gate.test.ts +0 -29
  241. package/tests/unit/core/hook/pointer-block.test.ts +0 -22
  242. package/tests/unit/core/hook/select.test.ts +0 -66
  243. package/tests/unit/core/match-fields.test.ts +0 -39
  244. package/tests/unit/core/mcp-cite-session.test.ts +0 -51
  245. package/tests/unit/core/providers/provider-models.test.ts +0 -101
  246. package/tests/unit/core/query-shape.test.ts +0 -92
  247. package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
  248. package/tests/unit/core/recall-service.test.ts +0 -200
  249. package/tests/unit/core/storage/live-status.test.ts +0 -54
  250. package/tests/unit/core/tokenize.test.ts +0 -32
  251. package/tests/unit/core/useful-scan.test.ts +0 -537
  252. package/tests/unit/llm/embed.test.ts +0 -93
  253. package/tests/unit/llm/ollama-client.test.ts +0 -124
  254. package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
  255. package/tsconfig.json +0 -31
  256. package/tsconfig.test.json +0 -11
  257. package/vitest.config.ts +0 -22
@@ -1,327 +0,0 @@
1
- /**
2
- * FactRecallService — agent-facing recall over the FactStore.
3
- *
4
- * Mirrors RecallService's keyword / semantic / hybrid pattern but works on
5
- * Fact records, not Session records. Sessions and facts answer different
6
- * questions and have incompatibly-shaped results, so this is a separate
7
- * service with its own MCP tool — see Section 4 of factstore-design.md.
8
- *
9
- * Filter pipeline:
10
- * 1. Storage pre-filter (subject, predicate, kind, minConfidence,
11
- * includeSuperseded). Cheap SQL.
12
- * 2. Keyword scoring over (value, subject, predicate). Pure, in-memory.
13
- * 3. Semantic KNN via fact_embeddings vec0 (when mode != keyword).
14
- * 4. Hybrid merge: 0.6 semantic + 0.4 keyword, matching the session
15
- * recall weights.
16
- *
17
- * Confidence policy: default `minConfidence` is 0.6 (Section 1 of the plan).
18
- * Facts with classifier confidence in [0.4, 0.6) get written by
19
- * extractFacts but stay out of agent recall unless the caller lowers the
20
- * floor explicitly.
21
- */
22
-
23
- import type { FactStore } from "@ports/fact-store.js";
24
- import type { LLMClient } from "@ports/llm-client.js";
25
- import { LLMUnreachableError } from "@ports/llm-client.js";
26
- import type {
27
- Fact,
28
- FactHit,
29
- FactMatchField,
30
- FactRecallQuery,
31
- FactRecallResult,
32
- RecallMode,
33
- } from "@shared/types.js";
34
- import { tokenSet } from "@core/recall/tokenize.js";
35
-
36
- const DEFAULT_LIMIT = 10;
37
- const MAX_LIMIT = 100;
38
- const DEFAULT_MIN_CONFIDENCE = 0.6;
39
- const STORAGE_FETCH_CAP = 500;
40
- const HYBRID_KW_WEIGHT = 0.4;
41
- const HYBRID_SEM_WEIGHT = 0.6;
42
- const SEMANTIC_OVERFETCH = 3;
43
-
44
- const FIELD_WEIGHTS = {
45
- value: 3,
46
- subject: 1,
47
- predicate: 1,
48
- } as const;
49
-
50
- export interface FactRecallServiceDeps {
51
- readonly factStore: FactStore;
52
- readonly llm: LLMClient;
53
- }
54
-
55
- export class FactRecallService {
56
- constructor(private readonly deps: FactRecallServiceDeps) {}
57
-
58
- async search(input: FactRecallQuery): Promise<FactRecallResult> {
59
- const mode: RecallMode = input.mode ?? "keyword";
60
- const limit = clampLimit(input.limit);
61
- const subject = input.subject ?? null;
62
- const predicate = input.predicate ?? null;
63
- const kind = input.kind ?? null;
64
- const queryText = (input.query ?? "").trim();
65
-
66
- const empty: FactRecallResult = {
67
- query: queryText,
68
- subject,
69
- predicate,
70
- kind,
71
- mode,
72
- limit,
73
- total: 0,
74
- results: [],
75
- };
76
-
77
- // A query with no signal at all → empty. Either free-text query, or a
78
- // structured filter (subject / predicate / kind) must be provided.
79
- if (!queryText && subject === null && predicate === null && kind === null) {
80
- return empty;
81
- }
82
-
83
- const filter: Parameters<FactStore["listForRecall"]>[0] = {
84
- includeSuperseded: input.includeSuperseded === true,
85
- minConfidence: input.minConfidence ?? DEFAULT_MIN_CONFIDENCE,
86
- limit: STORAGE_FETCH_CAP,
87
- ...(input.subject !== undefined ? { subject: input.subject } : {}),
88
- ...(input.predicate !== undefined ? { predicate: input.predicate } : {}),
89
- ...(input.kind !== undefined ? { kind: input.kind } : {}),
90
- };
91
-
92
- const candidates = await this.deps.factStore.listForRecall(filter);
93
- if (candidates.length === 0) return empty;
94
-
95
- const byId = new Map<string, Fact>(candidates.map((f) => [f.id, f]));
96
- const queryTokens = queryText ? new Set(tokenSet(queryText)) : new Set<string>();
97
-
98
- const kwHits =
99
- mode === "keyword" || mode === "hybrid"
100
- ? scoreAll(candidates, queryTokens)
101
- : [];
102
-
103
- let semHits: ReadonlyArray<SemanticHit> = [];
104
- let semError: "ollama_unreachable" | null = null;
105
- if ((mode === "semantic" || mode === "hybrid") && queryText) {
106
- try {
107
- semHits = await this.runSemantic(queryText, byId, limit * SEMANTIC_OVERFETCH);
108
- } catch (err) {
109
- if (err instanceof LLMUnreachableError) {
110
- semError = "ollama_unreachable";
111
- } else {
112
- throw err;
113
- }
114
- }
115
- }
116
-
117
- if (mode === "semantic" && semError) {
118
- return { ...empty, modeUnavailable: semError };
119
- }
120
-
121
- // For pure structured queries (no query text, just subject/predicate),
122
- // a keyword pass with empty tokens scores zero and a semantic pass has
123
- // nothing to embed. Fall back to returning the storage filter result
124
- // ordered by created_at DESC. Applies to keyword AND hybrid — hybrid
125
- // is the MCP default, so this path catches exact subject+predicate
126
- // lookups from agent callers that pass no query text.
127
- if ((mode === "keyword" || mode === "hybrid") && !queryText) {
128
- const rows = candidates
129
- .slice(0, limit)
130
- .map((f) => factToHit(f, 0, []));
131
- return finalize(queryText, subject, predicate, kind, mode, limit, rows);
132
- }
133
-
134
- if (mode === "keyword") {
135
- return finalize(queryText, subject, predicate, kind, mode, limit, kwHits.map(toKeywordHit));
136
- }
137
-
138
- if (mode === "semantic") {
139
- return finalize(queryText, subject, predicate, kind, mode, limit, semHits.map(toSemanticHit));
140
- }
141
-
142
- // hybrid
143
- const merged = mergeHybrid(kwHits, semHits, byId);
144
- const result = finalize(queryText, subject, predicate, kind, mode, limit, merged);
145
- return semError ? { ...result, modeUnavailable: semError } : result;
146
- }
147
-
148
- private async runSemantic(
149
- query: string,
150
- byId: ReadonlyMap<string, Fact>,
151
- fetchLimit: number,
152
- ): Promise<ReadonlyArray<SemanticHit>> {
153
- const embedding = await this.deps.llm.embed(query, "query");
154
- const neighbors = await this.deps.factStore.semanticSearch(embedding.vector, fetchLimit);
155
- const hits: SemanticHit[] = [];
156
- for (const n of neighbors) {
157
- const fact = byId.get(n.factId);
158
- if (!fact) continue; // candidate was filtered out by subject/predicate/conf
159
- hits.push({ fact, similarity: cosineFromL2(n.distance) });
160
- }
161
- return hits;
162
- }
163
- }
164
-
165
- interface KeywordHit {
166
- readonly fact: Fact;
167
- readonly score: number;
168
- readonly matchedIn: ReadonlyArray<FactMatchField>;
169
- }
170
-
171
- interface SemanticHit {
172
- readonly fact: Fact;
173
- readonly similarity: number;
174
- }
175
-
176
- function scoreAll(
177
- facts: ReadonlyArray<Fact>,
178
- queryTokens: ReadonlySet<string>,
179
- ): ReadonlyArray<KeywordHit> {
180
- if (queryTokens.size === 0) return [];
181
- const hits: KeywordHit[] = [];
182
- for (const f of facts) {
183
- const { score, matchedIn } = scoreFact(f, queryTokens);
184
- if (score > 0) hits.push({ fact: f, score, matchedIn });
185
- }
186
- hits.sort((a, b) => b.score - a.score);
187
- return hits;
188
- }
189
-
190
- function scoreFact(
191
- fact: Fact,
192
- queryTokens: ReadonlySet<string>,
193
- ): { score: number; matchedIn: ReadonlyArray<FactMatchField> } {
194
- let score = 0;
195
- const matchedIn: FactMatchField[] = [];
196
-
197
- const valueMatches = intersectionSize(queryTokens, tokenSet(fact.value));
198
- if (valueMatches > 0) {
199
- score += FIELD_WEIGHTS.value * valueMatches;
200
- matchedIn.push("value");
201
- }
202
-
203
- const subjectMatches = intersectionSize(queryTokens, tokenSet(fact.subject));
204
- if (subjectMatches > 0) {
205
- score += FIELD_WEIGHTS.subject * subjectMatches;
206
- matchedIn.push("subject");
207
- }
208
-
209
- const predicateMatches = intersectionSize(queryTokens, tokenSet(fact.predicate));
210
- if (predicateMatches > 0) {
211
- score += FIELD_WEIGHTS.predicate * predicateMatches;
212
- matchedIn.push("predicate");
213
- }
214
-
215
- return { score, matchedIn };
216
- }
217
-
218
- function mergeHybrid(
219
- kwHits: ReadonlyArray<KeywordHit>,
220
- semHits: ReadonlyArray<SemanticHit>,
221
- byId: ReadonlyMap<string, Fact>,
222
- ): ReadonlyArray<FactHit> {
223
- const maxKw = Math.max(1, ...kwHits.map((h) => h.score));
224
- const maxSem = Math.max(1, ...semHits.map((h) => h.similarity));
225
-
226
- const kwMap = new Map<string, KeywordHit>(kwHits.map((h) => [h.fact.id, h]));
227
- const semMap = new Map<string, SemanticHit>(semHits.map((h) => [h.fact.id, h]));
228
- const allIds = new Set<string>([...kwMap.keys(), ...semMap.keys()]);
229
-
230
- const rows: FactHit[] = [];
231
- for (const id of allIds) {
232
- const fact = byId.get(id);
233
- if (!fact) continue;
234
- const kw = kwMap.get(id);
235
- const sem = semMap.get(id);
236
- const kwNorm = kw ? kw.score / maxKw : 0;
237
- const semNorm = sem ? sem.similarity / maxSem : 0;
238
- const combined = round4(HYBRID_SEM_WEIGHT * semNorm + HYBRID_KW_WEIGHT * kwNorm);
239
- const matchedIn = uniqueFields(
240
- kw?.matchedIn ?? [],
241
- sem ? (["semantic"] as FactMatchField[]) : [],
242
- );
243
- rows.push({
244
- ...fact,
245
- matchScore: combined,
246
- matchedIn,
247
- keywordScore: round4(kwNorm),
248
- semanticScore: round4(semNorm),
249
- });
250
- }
251
- rows.sort((a, b) => b.matchScore - a.matchScore);
252
- return rows;
253
- }
254
-
255
- function factToHit(
256
- fact: Fact,
257
- score: number,
258
- matchedIn: ReadonlyArray<FactMatchField>,
259
- ): FactHit {
260
- return { ...fact, matchScore: score, matchedIn };
261
- }
262
-
263
- function toKeywordHit(h: KeywordHit): FactHit {
264
- return factToHit(h.fact, h.score, h.matchedIn);
265
- }
266
-
267
- function toSemanticHit(h: SemanticHit): FactHit {
268
- return factToHit(h.fact, h.similarity, ["semantic"]);
269
- }
270
-
271
- function finalize(
272
- query: string,
273
- subject: string | null,
274
- predicate: string | null,
275
- kind: FactRecallResult["kind"],
276
- mode: RecallMode,
277
- limit: number,
278
- hits: ReadonlyArray<FactHit>,
279
- ): FactRecallResult {
280
- return {
281
- query,
282
- subject,
283
- predicate,
284
- kind,
285
- mode,
286
- limit,
287
- total: hits.length,
288
- results: hits.slice(0, limit),
289
- };
290
- }
291
-
292
- function clampLimit(limit: number | undefined): number {
293
- const n = limit ?? DEFAULT_LIMIT;
294
- if (Number.isNaN(n) || n < 1) return 1;
295
- return Math.min(MAX_LIMIT, Math.trunc(n));
296
- }
297
-
298
- function cosineFromL2(distance: number): number {
299
- const cos = 1 - (distance * distance) / 2;
300
- return round4(Math.max(-1, Math.min(1, cos)));
301
- }
302
-
303
- function round4(value: number): number {
304
- return Math.round(value * 10_000) / 10_000;
305
- }
306
-
307
- function intersectionSize<T>(a: ReadonlySet<T>, b: ReadonlySet<T>): number {
308
- const [small, large] = a.size <= b.size ? [a, b] : [b, a];
309
- let count = 0;
310
- for (const item of small) if (large.has(item)) count += 1;
311
- return count;
312
- }
313
-
314
- function uniqueFields(
315
- a: ReadonlyArray<FactMatchField>,
316
- b: ReadonlyArray<FactMatchField>,
317
- ): ReadonlyArray<FactMatchField> {
318
- const seen = new Set<FactMatchField>();
319
- const out: FactMatchField[] = [];
320
- for (const f of [...a, ...b]) {
321
- if (!seen.has(f)) {
322
- seen.add(f);
323
- out.push(f);
324
- }
325
- }
326
- return out;
327
- }
@@ -1,142 +0,0 @@
1
- /**
2
- * scanOnce — mtime-gated incremental discovery shared by every adapter.
3
- *
4
- * The Python codebase bundled this logic into each adapter (`scan_once` +
5
- * `record_classified` methods). In the TS port the adapter stays a pure
6
- * parser (TranscriptAdapter port); the mtime check and adapter_state
7
- * upsert live here, generic over the adapter. Same behavior, less
8
- * duplication across claude-code / hermes / pi.
9
- *
10
- * Contract (per file under adapter.discover()):
11
- * - If `now - mtime < idleMinutes * 60s` → still active, skip
12
- * - Lookup adapter_state by (adapterName, sourcePath):
13
- * no row + file idle → NEW: parse + return (chunk, supersedes=null)
14
- * row exists, size match, failures < ceil → UNCHANGED: skip
15
- * row exists, size match, failures >= ceil → FAILED_CEILING: skip (log once per session)
16
- * row exists, file grew → RESUMED: parse + return, reset failure_count
17
- * - After successful classify+insert downstream, call `recordClassified`
18
- * to upsert adapter_state with the new size + session_id.
19
- * - On classify/storage failure, call `recordFailed` to increment failure_count.
20
- * When failure_count reaches MAX_CLASSIFY_FAILURES and the file hasn't grown,
21
- * the file is permanently skipped until new content arrives.
22
- */
23
-
24
- import { statSync } from "node:fs";
25
- import type Database from "better-sqlite3";
26
- import type {
27
- SessionChunk,
28
- TranscriptAdapter,
29
- } from "@ports/transcript-adapter.js";
30
-
31
- export interface ScanResult {
32
- readonly chunk: SessionChunk;
33
- readonly supersedes: string | null;
34
- }
35
-
36
- export const MAX_CLASSIFY_FAILURES = 3;
37
-
38
- interface AdapterStateRow {
39
- source_path: string;
40
- file_size: number | null;
41
- session_id: string | null;
42
- failure_count: number;
43
- }
44
-
45
- export async function scanOnce(
46
- adapter: TranscriptAdapter,
47
- idleMinutes: number,
48
- db: Database.Database,
49
- now: number = Date.now(),
50
- ): Promise<ReadonlyArray<ScanResult>> {
51
- const idleMs = idleMinutes * 60 * 1000;
52
- const stateRows = db
53
- .prepare<[string], AdapterStateRow>(
54
- "SELECT source_path, file_size, session_id, COALESCE(failure_count, 0) AS failure_count FROM adapter_state WHERE adapter_name = ?",
55
- )
56
- .all(adapter.name);
57
- const byPath = new Map<string, AdapterStateRow>(stateRows.map((r) => [r.source_path, r]));
58
-
59
- const out: ScanResult[] = [];
60
- const files = await adapter.discover();
61
-
62
- for (const path of files) {
63
- let st;
64
- try {
65
- st = statSync(path);
66
- } catch {
67
- continue;
68
- }
69
- const age = now - st.mtimeMs;
70
- if (age < idleMs) continue;
71
-
72
- const prior = byPath.get(path);
73
- let supersedes: string | null = null;
74
- if (prior) {
75
- const sizeUnchanged = (prior.file_size ?? 0) === st.size;
76
- if (sizeUnchanged) {
77
- // File hasn't grown — skip whether clean or failed. Failures only
78
- // retry when the transcript file receives new content.
79
- continue;
80
- }
81
- // File grew: reset failure_count so resume gets a clean slate.
82
- if (prior.failure_count >= MAX_CLASSIFY_FAILURES) {
83
- db.prepare(
84
- "UPDATE adapter_state SET failure_count = 0 WHERE adapter_name = ? AND source_path = ?",
85
- ).run(adapter.name, path);
86
- }
87
- supersedes = prior.session_id;
88
- }
89
-
90
- const chunk = await adapter.parseSession(path);
91
- if (!chunk) continue;
92
- out.push({ chunk, supersedes });
93
- }
94
- return out;
95
- }
96
-
97
- export function recordClassified(
98
- db: Database.Database,
99
- adapterName: string,
100
- sourcePath: string,
101
- sessionId: string,
102
- ): void {
103
- let size = 0;
104
- try {
105
- size = statSync(sourcePath).size;
106
- } catch {
107
- return;
108
- }
109
- db.prepare(
110
- `INSERT INTO adapter_state
111
- (adapter_name, source_path, last_offset, file_size, session_id, failure_count, last_processed_at)
112
- VALUES (?, ?, ?, ?, ?, 0, datetime('now'))
113
- ON CONFLICT(adapter_name, source_path) DO UPDATE SET
114
- last_offset = excluded.last_offset,
115
- file_size = excluded.file_size,
116
- session_id = excluded.session_id,
117
- failure_count = 0,
118
- last_processed_at = excluded.last_processed_at`,
119
- ).run(adapterName, sourcePath, size, size, sessionId);
120
- }
121
-
122
- export function recordFailed(
123
- db: Database.Database,
124
- adapterName: string,
125
- sourcePath: string,
126
- ): void {
127
- let size = 0;
128
- try {
129
- size = statSync(sourcePath).size;
130
- } catch {
131
- return;
132
- }
133
- db.prepare(
134
- `INSERT INTO adapter_state
135
- (adapter_name, source_path, last_offset, file_size, session_id, failure_count, last_processed_at)
136
- VALUES (?, ?, ?, ?, NULL, 1, datetime('now'))
137
- ON CONFLICT(adapter_name, source_path) DO UPDATE SET
138
- file_size = excluded.file_size,
139
- failure_count = failure_count + 1,
140
- last_processed_at = excluded.last_processed_at`,
141
- ).run(adapterName, sourcePath, size, size);
142
- }
@@ -1,225 +0,0 @@
1
- /**
2
- * ScanScheduler — periodic ingest loop. Ports `scheduler.py`.
3
- *
4
- * Each tick walks the registered adapters, runs scanOnce to discover idle
5
- * transcript files, classifies the resulting SessionChunks via the active
6
- * classifier, and persists them through SqliteSessionStore.insertSession
7
- * with the embedder. Records adapter_state after each successful insert
8
- * so the next tick is incremental.
9
- *
10
- * Single-process: the scheduler runs alongside the HTTP server (Phase D
11
- * wires it into `nlm start`). No worker thread; Node's event loop is
12
- * enough — adapter discovery is filesystem-bound and the per-chunk
13
- * classify call is async-awaited with a wall-clock timeout to keep the
14
- * tick loop responsive.
15
- *
16
- * Confidence floor of 0.3 mirrors Python: classifier outputs below that
17
- * are skipped rather than persisted as low-quality noise.
18
- */
19
-
20
- import type { LLMClient } from "@ports/llm-client.js";
21
- import type { TranscriptAdapter } from "@ports/transcript-adapter.js";
22
- import { extractFacts } from "@core/facts/extract-facts.js";
23
- import type { SqliteFactStore } from "@core/storage/sqlite-fact-store.js";
24
- import type {
25
- IngestRecord,
26
- SqliteSessionStore,
27
- } from "@core/storage/sqlite-session-store.js";
28
- import { MAX_CLASSIFY_FAILURES, recordClassified, recordFailed, scanOnce } from "./scan-once.js";
29
-
30
- const DEFAULT_INTERVAL_MS = 30 * 60 * 1000; // 30 min, matches Python default
31
- const DEFAULT_CLASSIFY_TIMEOUT_MS = 120_000;
32
- const DEFAULT_CONFIDENCE_FLOOR = 0.3;
33
- const DEFAULT_IDLE_MINUTES = 15;
34
- const BODY_CAP = 200_000;
35
-
36
- export interface SchedulerOptions {
37
- readonly store: SqliteSessionStore;
38
- readonly adapters: ReadonlyArray<TranscriptAdapter>;
39
- readonly classifier: LLMClient;
40
- readonly embedder?: LLMClient | null;
41
- /**
42
- * FactStore for Phase B.2 fact ingest. When provided, the scheduler
43
- * extracts facts from each classify result and persists them atomically
44
- * with the session row. Optional — when null, sessions ingest as before
45
- * with no facts written (backwards-compatible default for tests not yet
46
- * updated, and for any future caller that wants facts off).
47
- */
48
- readonly factStore?: SqliteFactStore | null;
49
- readonly intervalMs?: number;
50
- readonly classifyTimeoutMs?: number;
51
- readonly confidenceFloor?: number;
52
- readonly idleMinutes?: number;
53
- /** Defaults to console.error. Set to a noop in tests. */
54
- readonly logger?: (msg: string) => void;
55
- }
56
-
57
- export interface TickReport {
58
- readonly inserted: number;
59
- readonly skippedLowConfidence: number;
60
- readonly classifyFailures: number;
61
- readonly storageFailures: number;
62
- readonly chunksSeen: number;
63
- }
64
-
65
- export class ScanScheduler {
66
- private readonly opts: Required<Omit<SchedulerOptions, "embedder" | "factStore">> & {
67
- readonly embedder: LLMClient | null;
68
- readonly factStore: SqliteFactStore | null;
69
- };
70
- private stopped = true;
71
- private timer: NodeJS.Timeout | null = null;
72
-
73
- constructor(opts: SchedulerOptions) {
74
- this.opts = {
75
- store: opts.store,
76
- adapters: opts.adapters,
77
- classifier: opts.classifier,
78
- embedder: opts.embedder ?? null,
79
- factStore: opts.factStore ?? null,
80
- intervalMs: opts.intervalMs ?? DEFAULT_INTERVAL_MS,
81
- classifyTimeoutMs: opts.classifyTimeoutMs ?? DEFAULT_CLASSIFY_TIMEOUT_MS,
82
- confidenceFloor: opts.confidenceFloor ?? DEFAULT_CONFIDENCE_FLOOR,
83
- idleMinutes: opts.idleMinutes ?? DEFAULT_IDLE_MINUTES,
84
- logger: opts.logger ?? ((msg) => console.error(msg)),
85
- };
86
- }
87
-
88
- start(): void {
89
- if (!this.stopped) return;
90
- this.stopped = false;
91
- this.scheduleNext(0);
92
- }
93
-
94
- stop(): void {
95
- this.stopped = true;
96
- if (this.timer) {
97
- clearTimeout(this.timer);
98
- this.timer = null;
99
- }
100
- }
101
-
102
- private scheduleNext(delayMs: number): void {
103
- if (this.stopped) return;
104
- this.timer = setTimeout(() => {
105
- void this.tick().finally(() => this.scheduleNext(this.opts.intervalMs));
106
- }, delayMs);
107
- }
108
-
109
- async tick(): Promise<TickReport> {
110
- let inserted = 0;
111
- let skippedLowConfidence = 0;
112
- let classifyFailures = 0;
113
- let storageFailures = 0;
114
- let chunksSeen = 0;
115
-
116
- for (const adapter of this.opts.adapters) {
117
- let results;
118
- try {
119
- results = await scanOnce(adapter, this.opts.idleMinutes, this.opts.store.rawDb());
120
- } catch (e) {
121
- this.opts.logger(
122
- `[scheduler] scanOnce error for ${adapter.name}: ${e instanceof Error ? e.message : String(e)}`,
123
- );
124
- continue;
125
- }
126
-
127
- for (const { chunk, supersedes } of results) {
128
- chunksSeen += 1;
129
-
130
- let classification;
131
- try {
132
- classification = await withTimeout(
133
- this.opts.classifier.classify(chunk.text),
134
- this.opts.classifyTimeoutMs,
135
- );
136
- } catch (e) {
137
- classifyFailures += 1;
138
- const reason = e instanceof TimeoutError ? "timed out" : `error: ${e instanceof Error ? e.message : String(e)}`;
139
- recordFailed(this.opts.store.rawDb(), adapter.name, chunk.sourcePath);
140
- const failureRow = this.opts.store.rawDb()
141
- .prepare<[string, string], { failure_count: number }>(
142
- "SELECT COALESCE(failure_count, 0) AS failure_count FROM adapter_state WHERE adapter_name = ? AND source_path = ?",
143
- )
144
- .get(adapter.name, chunk.sourcePath);
145
- const count = failureRow?.failure_count ?? 1;
146
- const ceiling = count >= MAX_CLASSIFY_FAILURES ? ` (failure ${count}/${MAX_CLASSIFY_FAILURES} — will skip until file grows)` : ` (failure ${count}/${MAX_CLASSIFY_FAILURES})`;
147
- this.opts.logger(`[scheduler] classifier ${reason} for ${chunk.id}${ceiling}`);
148
- continue;
149
- }
150
-
151
- if (classification.confidence < this.opts.confidenceFloor) {
152
- skippedLowConfidence += 1;
153
- continue;
154
- }
155
-
156
- const record: IngestRecord = {
157
- id: chunk.id,
158
- runtime: chunk.runtime,
159
- runtimeSessionId: chunk.runtimeSessionId || null,
160
- startedAt: chunk.startedAt,
161
- endedAt: chunk.endedAt || null,
162
- durationMin: chunk.durationMin,
163
- label: classification.label,
164
- summary: classification.summary,
165
- body: chunk.text.slice(0, BODY_CAP),
166
- status: "closed",
167
- transcriptKind: adapter.transcriptKind,
168
- transcriptPath: chunk.sourcePath,
169
- transcriptOffset: chunk.byteRange[0],
170
- transcriptLength: chunk.byteRange[1],
171
- entities: classification.entities,
172
- decisions: classification.decisions,
173
- openQuestions: classification.open,
174
- };
175
-
176
- const factSink = this.opts.factStore
177
- ? {
178
- factStore: this.opts.factStore,
179
- facts: extractFacts(classification, chunk.id, chunk.startedAt),
180
- }
181
- : null;
182
-
183
- try {
184
- await this.opts.store.insertSession(
185
- record,
186
- this.opts.embedder,
187
- supersedes,
188
- factSink,
189
- );
190
- recordClassified(
191
- this.opts.store.rawDb(),
192
- adapter.name,
193
- chunk.sourcePath,
194
- chunk.id,
195
- );
196
- inserted += 1;
197
- } catch (e) {
198
- storageFailures += 1;
199
- recordFailed(this.opts.store.rawDb(), adapter.name, chunk.sourcePath);
200
- this.opts.logger(
201
- `[scheduler] storage error for ${chunk.id}: ${e instanceof Error ? e.message : String(e)}`,
202
- );
203
- }
204
- }
205
- }
206
-
207
- return { inserted, skippedLowConfidence, classifyFailures, storageFailures, chunksSeen };
208
- }
209
- }
210
-
211
- class TimeoutError extends Error {}
212
-
213
- async function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
214
- let timer: NodeJS.Timeout | undefined;
215
- try {
216
- return await Promise.race([
217
- promise,
218
- new Promise<T>((_, reject) => {
219
- timer = setTimeout(() => reject(new TimeoutError(`timed out after ${ms}ms`)), ms);
220
- }),
221
- ]);
222
- } finally {
223
- if (timer) clearTimeout(timer);
224
- }
225
- }