claude-brain 0.30.2 → 0.30.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/README.md +241 -191
  2. package/VERSION +1 -1
  3. package/assets/CLAUDE-unified.md +11 -11
  4. package/assets/CLAUDE.md +29 -29
  5. package/package.json +7 -3
  6. package/packs/backend/node.json +173 -173
  7. package/packs/core/javascript.json +176 -176
  8. package/packs/core/typescript.json +222 -222
  9. package/packs/frontend/react.json +254 -254
  10. package/packs/meta/testing.json +172 -172
  11. package/scripts/postinstall.mjs +531 -531
  12. package/src/automation/decision-detector.ts +452 -452
  13. package/src/automation/phase12-manager.ts +456 -456
  14. package/src/automation/proactive-recall.ts +373 -373
  15. package/src/automation/project-detector.ts +310 -310
  16. package/src/automation/repo-scanner.ts +210 -205
  17. package/src/cli/auto-setup.ts +75 -75
  18. package/src/cli/auto-start.ts +266 -266
  19. package/src/cli/bin.ts +264 -264
  20. package/src/cli/commands/autostart.ts +90 -90
  21. package/src/cli/commands/chroma.ts +578 -577
  22. package/src/cli/commands/export-training.ts +70 -70
  23. package/src/cli/commands/export.ts +130 -130
  24. package/src/cli/commands/git-hook.ts +183 -183
  25. package/src/cli/commands/hooks.ts +217 -217
  26. package/src/cli/commands/init.ts +123 -123
  27. package/src/cli/commands/install-mcp.ts +122 -111
  28. package/src/cli/commands/models.ts +979 -979
  29. package/src/cli/commands/pack.ts +200 -200
  30. package/src/cli/commands/refresh.ts +344 -339
  31. package/src/cli/commands/reindex.ts +120 -120
  32. package/src/cli/commands/serve.ts +466 -463
  33. package/src/cli/commands/start.ts +44 -44
  34. package/src/cli/commands/status.ts +220 -203
  35. package/src/cli/commands/uninstall-mcp.ts +45 -41
  36. package/src/cli/commands/update.ts +130 -124
  37. package/src/cli/migrate-chroma.ts +106 -106
  38. package/src/cli/ui/animations.ts +80 -80
  39. package/src/cli/ui/components.ts +82 -82
  40. package/src/cli/ui/index.ts +4 -4
  41. package/src/cli/ui/logo.ts +36 -36
  42. package/src/cli/ui/theme.ts +55 -55
  43. package/src/code-intelligence/indexer.ts +352 -352
  44. package/src/code-intelligence/linker.ts +178 -178
  45. package/src/code-intelligence/parser.ts +484 -484
  46. package/src/code-intelligence/query.ts +291 -291
  47. package/src/code-intelligence/schema.ts +83 -83
  48. package/src/code-intelligence/types.ts +95 -95
  49. package/src/config/defaults.ts +52 -52
  50. package/src/config/home.ts +56 -56
  51. package/src/config/index.ts +5 -5
  52. package/src/config/loader.ts +192 -192
  53. package/src/config/schema.ts +446 -415
  54. package/src/config/validator.ts +182 -182
  55. package/src/context/assembler.ts +407 -400
  56. package/src/context/index.ts +79 -79
  57. package/src/context/progress-tracker.ts +174 -174
  58. package/src/context/standards-manager.ts +287 -287
  59. package/src/context/validator.ts +58 -58
  60. package/src/diagnostics/index.ts +122 -121
  61. package/src/health/index.ts +233 -232
  62. package/src/hooks/brain-hook.ts +134 -131
  63. package/src/hooks/capture.ts +168 -168
  64. package/src/hooks/claude-code-mastery.md +112 -112
  65. package/src/hooks/context-hook.ts +260 -245
  66. package/src/hooks/deduplicator.ts +72 -72
  67. package/src/hooks/git-capture.ts +109 -109
  68. package/src/hooks/git-hook-installer.ts +211 -207
  69. package/src/hooks/index.ts +20 -20
  70. package/src/hooks/installer.ts +306 -288
  71. package/src/hooks/interceptor-hook.ts +204 -201
  72. package/src/hooks/passive-classifier.ts +397 -397
  73. package/src/hooks/queue.ts +160 -129
  74. package/src/hooks/session-tracker.ts +312 -312
  75. package/src/hooks/types.ts +52 -52
  76. package/src/index.ts +7 -7
  77. package/src/intelligence/cross-project/generalizer.ts +283 -283
  78. package/src/intelligence/cross-project/index.ts +7 -7
  79. package/src/intelligence/hf-downloader.ts +222 -222
  80. package/src/intelligence/hf-manifest.json +78 -78
  81. package/src/intelligence/index.ts +24 -24
  82. package/src/intelligence/inference-router.ts +762 -762
  83. package/src/intelligence/model-manager.ts +263 -245
  84. package/src/intelligence/optimization/index.ts +10 -10
  85. package/src/intelligence/optimization/precompute.ts +202 -202
  86. package/src/intelligence/optimization/semantic-cache.ts +213 -207
  87. package/src/intelligence/prediction/index.ts +7 -7
  88. package/src/intelligence/prediction/recommender.ts +276 -268
  89. package/src/intelligence/reasoning/chain-retrieval.ts +243 -247
  90. package/src/intelligence/reasoning/index.ts +7 -7
  91. package/src/intelligence/temporal/evolution.ts +193 -197
  92. package/src/intelligence/temporal/index.ts +16 -16
  93. package/src/intelligence/temporal/query-processor.ts +190 -190
  94. package/src/intelligence/temporal/timeline.ts +272 -259
  95. package/src/intelligence/temporal/trends.ts +263 -263
  96. package/src/intelligence/tokenizer.ts +118 -118
  97. package/src/knowledge/entity-extractor.ts +447 -443
  98. package/src/knowledge/graph/builder.ts +185 -185
  99. package/src/knowledge/graph/linker.ts +201 -201
  100. package/src/knowledge/graph/memory-graph.ts +359 -359
  101. package/src/knowledge/graph/schema.ts +99 -99
  102. package/src/knowledge/graph/search.ts +166 -166
  103. package/src/knowledge/relationship-extractor.ts +108 -108
  104. package/src/memory/chroma/client.ts +211 -192
  105. package/src/memory/chroma/collection-manager.ts +92 -92
  106. package/src/memory/chroma/config.ts +57 -57
  107. package/src/memory/chroma/embeddings.ts +177 -175
  108. package/src/memory/chroma/index.ts +82 -82
  109. package/src/memory/chroma/migration.ts +270 -270
  110. package/src/memory/chroma/schemas.ts +69 -69
  111. package/src/memory/chroma/search.ts +319 -315
  112. package/src/memory/chroma/store.ts +755 -747
  113. package/src/memory/compression.ts +121 -121
  114. package/src/memory/consolidation/archiver.ts +162 -165
  115. package/src/memory/consolidation/merger.ts +182 -186
  116. package/src/memory/consolidation/scorer.ts +136 -136
  117. package/src/memory/database.ts +9 -0
  118. package/src/memory/dual-write.ts +145 -0
  119. package/src/memory/embeddings.ts +226 -226
  120. package/src/memory/episodic/detector.ts +108 -108
  121. package/src/memory/episodic/manager.ts +347 -351
  122. package/src/memory/episodic/summarizer.ts +179 -179
  123. package/src/memory/episodic/types.ts +52 -52
  124. package/src/memory/fts5-search.ts +692 -633
  125. package/src/memory/index.ts +943 -1060
  126. package/src/memory/migrations/add-fts5.ts +118 -108
  127. package/src/memory/patterns.ts +438 -438
  128. package/src/memory/pruning.ts +60 -60
  129. package/src/memory/schema.ts +88 -88
  130. package/src/memory/store.ts +911 -787
  131. package/src/orchestrator/handlers/decision-handler.ts +204 -204
  132. package/src/packs/index.ts +9 -9
  133. package/src/packs/loader.ts +134 -134
  134. package/src/packs/manager.ts +204 -204
  135. package/src/packs/ranker.ts +78 -78
  136. package/src/packs/types.ts +81 -81
  137. package/src/phase12/index.ts +5 -5
  138. package/src/retrieval/bm25/index.ts +300 -297
  139. package/src/retrieval/bm25/tokenizer.ts +184 -184
  140. package/src/retrieval/feedback/adaptive.ts +221 -221
  141. package/src/retrieval/feedback/index.ts +16 -16
  142. package/src/retrieval/feedback/metrics.ts +221 -221
  143. package/src/retrieval/feedback/store.ts +283 -283
  144. package/src/retrieval/fusion/index.ts +194 -194
  145. package/src/retrieval/fusion/rrf.ts +165 -165
  146. package/src/retrieval/index.ts +12 -12
  147. package/src/retrieval/pipeline.ts +375 -375
  148. package/src/retrieval/query/expander.ts +203 -203
  149. package/src/retrieval/query/index.ts +27 -27
  150. package/src/retrieval/query/intent-classifier.ts +252 -252
  151. package/src/retrieval/query/temporal-parser.ts +295 -295
  152. package/src/retrieval/reranker/index.ts +189 -188
  153. package/src/retrieval/reranker/model.ts +99 -95
  154. package/src/retrieval/service.ts +125 -125
  155. package/src/retrieval/types.ts +162 -162
  156. package/src/routing/entity-extractor.ts +454 -454
  157. package/src/routing/handlers/exploration-handler.ts +369 -0
  158. package/src/routing/handlers/index.ts +19 -0
  159. package/src/routing/handlers/memory-handler.ts +273 -0
  160. package/src/routing/handlers/mutation-handler.ts +241 -0
  161. package/src/routing/handlers/recall-handler.ts +642 -0
  162. package/src/routing/handlers/shared.ts +515 -0
  163. package/src/routing/handlers/types.ts +48 -0
  164. package/src/routing/intent-classifier.ts +552 -552
  165. package/src/routing/response-filter.ts +399 -391
  166. package/src/routing/router.ts +245 -2193
  167. package/src/routing/search-engine.ts +521 -514
  168. package/src/routing/types.ts +104 -94
  169. package/src/scripts/health-check.ts +118 -118
  170. package/src/scripts/setup.ts +122 -122
  171. package/src/server/auto-updater.ts +283 -276
  172. package/src/server/handlers/call-tool.ts +159 -159
  173. package/src/server/handlers/list-tools.ts +35 -35
  174. package/src/server/handlers/tools/auto-remember.ts +165 -165
  175. package/src/server/handlers/tools/brain.ts +86 -86
  176. package/src/server/handlers/tools/create-project.ts +135 -135
  177. package/src/server/handlers/tools/get-code-standards.ts +123 -123
  178. package/src/server/handlers/tools/get-corrections.ts +152 -152
  179. package/src/server/handlers/tools/get-patterns.ts +156 -156
  180. package/src/server/handlers/tools/get-project-context.ts +75 -75
  181. package/src/server/handlers/tools/index.ts +30 -30
  182. package/src/server/handlers/tools/init-project.ts +756 -756
  183. package/src/server/handlers/tools/list-projects.ts +126 -126
  184. package/src/server/handlers/tools/recall-similar.ts +87 -87
  185. package/src/server/handlers/tools/recognize-pattern.ts +132 -132
  186. package/src/server/handlers/tools/record-correction.ts +131 -131
  187. package/src/server/handlers/tools/remember-decision.ts +168 -168
  188. package/src/server/handlers/tools/schemas.ts +179 -179
  189. package/src/server/handlers/tools/search-code.ts +122 -122
  190. package/src/server/handlers/tools/smart-context.ts +146 -146
  191. package/src/server/handlers/tools/update-progress.ts +131 -131
  192. package/src/server/http-api.ts +215 -1229
  193. package/src/server/mcp-proxy.ts +85 -84
  194. package/src/server/mcp-server.ts +285 -284
  195. package/src/server/middleware/auth.ts +39 -0
  196. package/src/server/middleware/error-handler.ts +37 -0
  197. package/src/server/middleware/rate-limit.ts +53 -0
  198. package/src/server/middleware/validate.ts +42 -0
  199. package/src/server/pid-manager.ts +137 -136
  200. package/src/server/providers/resources.ts +581 -581
  201. package/src/server/routes/code.ts +228 -0
  202. package/src/server/routes/context.ts +26 -0
  203. package/src/server/routes/health.ts +19 -0
  204. package/src/server/routes/helpers.ts +100 -0
  205. package/src/server/routes/hooks.ts +197 -0
  206. package/src/server/routes/mcp.ts +47 -0
  207. package/src/server/routes/memory.ts +397 -0
  208. package/src/server/routes/models.ts +96 -0
  209. package/src/server/routes/projects.ts +89 -0
  210. package/src/server/routes/types.ts +21 -0
  211. package/src/server/schemas/api-schemas.ts +202 -0
  212. package/src/server/services.ts +720 -720
  213. package/src/server/utils/memory-indicator.ts +84 -84
  214. package/src/server/utils/response-formatter.ts +129 -129
  215. package/src/server/web-viewer.ts +1145 -1115
  216. package/src/setup/index.ts +38 -38
  217. package/src/tools/registry.ts +115 -115
  218. package/src/tools/schemas.ts +666 -666
  219. package/src/tools/types.ts +412 -412
  220. package/src/training/data-store.ts +320 -298
  221. package/src/training/retrain-pipeline.ts +399 -394
  222. package/src/utils/error-handler.ts +136 -136
  223. package/src/utils/index.ts +58 -58
  224. package/src/utils/kill-port.ts +55 -53
  225. package/src/utils/phase12-helper.ts +56 -56
  226. package/src/utils/safe-path.ts +43 -0
  227. package/src/utils/timing.ts +47 -47
  228. package/src/utils/transaction.ts +63 -63
  229. package/src/vault/index.ts +4 -3
  230. package/src/vault/paths.ts +106 -106
  231. package/src/vault/query.ts +4 -1
  232. package/src/vault/reader.ts +44 -1
  233. package/src/vault/watcher.ts +24 -1
  234. package/src/vault/writer.ts +487 -413
  235. package/skills/persistent-memory/SKILL.md +0 -148
  236. package/skills/persistent-memory/references/tool-reference.md +0 -90
@@ -1,762 +1,762 @@
1
- /**
2
- * Inference Router — SLM Upgrade Phase 4B
3
- * Routes each classification task to model or regex fallback.
4
- *
5
- * For each task:
6
- * 1. If config mode is 'regex'/'api' → use regex/API only
7
- * 2. If config mode is 'model' → try model, fall back to regex on failure
8
- * 3. If config mode is 'both' → run both, log comparison, use model output
9
- *
10
- * Confidence thresholding: if model confidence < threshold, use regex instead.
11
- * Function signatures match the existing regex classifiers exactly.
12
- */
13
-
14
- import type { Logger } from 'pino'
15
- import type { Config } from '@/config'
16
- import type { ModelManager, ModelTask } from './model-manager'
17
- import { logTrainingData, logModelFeedback } from '@/training/data-store'
18
- import { getTokenizer } from './tokenizer'
19
-
20
- // Import existing regex classifiers
21
- import { IntentClassifier as BrainIntentClassifier, type ClassificationResult } from '@/routing/intent-classifier'
22
- import type { Intent } from '@/routing/intent-classifier'
23
- import { EntityExtractor, type ExtractedEntity } from '@/knowledge/entity-extractor'
24
- import { classifyIntent as classifyQueryIntent } from '@/retrieval/query/intent-classifier'
25
- import type { QueryIntent } from '@/retrieval/types'
26
- import type { Pattern } from '@/memory/patterns'
27
-
28
- /** Default intent labels matching the model training order */
29
- const INTENT_LABELS: Intent[] = [
30
- 'session_start', 'context_needed', 'decision_made', 'store_this',
31
- 'pattern_found', 'mistake_learned', 'progress_update', 'question',
32
- 'comparison', 'exploration', 'list_all', 'update_memory',
33
- 'delete_memory', 'detail_request', 'timeline', 'no_action'
34
- ]
35
-
36
- /** BIO labels for token-level entity extraction */
37
- const ENTITY_BIO_LABELS = ['O', 'B-TECH', 'I-TECH', 'B-PROJECT', 'I-PROJECT', 'B-CONCEPT', 'I-CONCEPT']
38
-
39
- /** Query intent labels matching model training order.
40
- * Model was trained with 'procedural'/'comparative' which map to code's 'pattern'/'comparison'. */
41
- const QUERY_LABELS = ['factual', 'procedural', 'comparative', 'temporal', 'exploratory', 'decision'] as const
42
-
43
- /** Map model query labels → QueryIntent['type'] (handles training label renames) */
44
- const QUERY_LABEL_MAP: Record<string, QueryIntent['type']> = {
45
- factual: 'factual',
46
- procedural: 'pattern',
47
- comparative: 'comparison',
48
- temporal: 'temporal',
49
- exploratory: 'exploratory',
50
- decision: 'decision',
51
- }
52
-
53
- /** Pattern type labels */
54
- const PATTERN_LABELS: Pattern['type'][] = ['solution', 'anti-pattern', 'best-practice', 'common-issue']
55
-
56
- /** Knowledge type labels */
57
- const KNOWLEDGE_LABELS = ['fact', 'preference', 'constraint', 'goal', 'definition']
58
-
59
- /** Map BIO entity tag prefix to EntityType */
60
- const BIO_TYPE_MAP: Record<string, ExtractedEntity['type']> = {
61
- 'TECH': 'technology',
62
- 'PROJECT': 'project',
63
- 'CONCEPT': 'concept',
64
- }
65
-
66
- /** Minimum softmax probability to accept a B-/I- entity tag (vs forcing O) */
67
- const ENTITY_TOKEN_THRESHOLD = 0.25
68
-
69
- /** Common words that should never be entities (false positive filter) */
70
- const ENTITY_STOPWORDS = new Set([
71
- 'i', 'we', 'you', 'he', 'she', 'it', 'they', 'me', 'us', 'him', 'her', 'them',
72
- 'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
73
- 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
74
- 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might',
75
- 'not', 'no', 'yes', 'this', 'that', 'these', 'those', 'my', 'your', 'our', 'their',
76
- 'use', 'using', 'used', 'make', 'like', 'just', 'also', 'very', 'much',
77
- ])
78
-
79
- /** EOS token ID for greedy decode (GPT-2 uses 50256) */
80
- const EOS_TOKEN_ID = 50256
81
-
82
- /** Max tokens to generate for compression */
83
- const COMPRESS_MAX_TOKENS = 128
84
-
85
- /** Inference mode per task */
86
- type TaskMode = 'model' | 'regex' | 'both' | 'api'
87
-
88
- export class InferenceRouter {
89
- private logger: Logger
90
- private config: Config
91
- private modelManager: ModelManager
92
- private confidenceThreshold: number
93
-
94
- // Regex fallback instances
95
- private intentClassifier: BrainIntentClassifier
96
- private entityExtractor: EntityExtractor
97
-
98
- constructor(logger: Logger, config: Config, modelManager: ModelManager) {
99
- this.logger = logger.child({ component: 'inference-router' })
100
- this.config = config
101
- this.modelManager = modelManager
102
- this.confidenceThreshold = config.slm?.confidenceThreshold ?? 0.7
103
-
104
- // Initialize regex fallbacks
105
- this.intentClassifier = new BrainIntentClassifier()
106
- this.entityExtractor = new EntityExtractor()
107
- }
108
-
109
- /**
110
- * Check if SLM is enabled globally
111
- */
112
- get enabled(): boolean {
113
- return this.config.slm?.enabled ?? false
114
- }
115
-
116
- /**
117
- * Get the mode for a specific task
118
- */
119
- private getTaskMode(task: ModelTask): TaskMode {
120
- if (!this.enabled) return task === 'compress' ? 'api' : 'regex'
121
- const taskConfig = this.config.slm?.tasks
122
- if (!taskConfig) return task === 'compress' ? 'api' : 'regex'
123
-
124
- switch (task) {
125
- case 'intent': return taskConfig.intent ?? 'regex'
126
- case 'entity': return taskConfig.entity ?? 'regex'
127
- case 'query': return taskConfig.query ?? 'regex'
128
- case 'knowledge': return taskConfig.knowledge ?? 'regex'
129
- case 'compress': return taskConfig.compress ?? 'api'
130
- case 'pattern': return taskConfig.pattern ?? 'regex'
131
- default: return 'regex'
132
- }
133
- }
134
-
135
- /**
136
- * Softmax over logits array (used when ONNX models are available)
137
- */
138
- softmax(logits: Float32Array): number[] {
139
- const max = Math.max(...logits)
140
- const exps = Array.from(logits).map(x => Math.exp(x - max))
141
- const sum = exps.reduce((a, b) => a + b, 0)
142
- return exps.map(e => e / sum)
143
- }
144
-
145
- // ── Intent Classification ──────────────────────────────────────────
146
-
147
- /**
148
- * Classify brain() message intent.
149
- * Drop-in replacement for IntentClassifier.classify().
150
- * Async to support model inference when ONNX models are available.
151
- */
152
- async classifyIntent(message: string): Promise<ClassificationResult> {
153
- const mode = this.getTaskMode('intent')
154
-
155
- if (mode === 'regex') {
156
- return this.intentClassifier.classify(message)
157
- }
158
-
159
- // Try model
160
- const modelResult = await this.tryModelClassifyIntent(message)
161
- const regexResult = this.intentClassifier.classify(message)
162
-
163
- if (mode === 'both') {
164
- this.logComparison('intent', message, modelResult, regexResult)
165
- return modelResult ?? regexResult
166
- }
167
-
168
- // mode === 'model': use model if available and confident
169
- if (modelResult && modelResult.confidence >= this.confidenceThreshold) {
170
- return modelResult
171
- }
172
- return regexResult
173
- }
174
-
175
- /**
176
- * Attempt model-based intent classification. Returns null if model unavailable.
177
- * Tokenizes message, runs ONNX inference, and maps logits to ClassificationResult.
178
- */
179
- private async tryModelClassifyIntent(message: string): Promise<ClassificationResult | null> {
180
- if (!this.modelManager.hasModel('intent')) return null
181
-
182
- try {
183
- const tokenizer = await getTokenizer(this.logger)
184
- const { inputIds, attentionMask } = tokenizer.encode(message, 128)
185
- const logits = await this.modelManager.infer('intent', inputIds, attentionMask)
186
- if (!logits) return null
187
-
188
- const probs = this.softmax(logits)
189
- const manifest = this.modelManager.getManifestEntry('intent')
190
- const labels = (manifest?.labels as Intent[] | undefined) ?? INTENT_LABELS
191
-
192
- // Find top prediction
193
- let maxIdx = 0
194
- let maxProb = probs[0]!
195
- for (let i = 1; i < probs.length; i++) {
196
- if (probs[i]! > maxProb) {
197
- maxProb = probs[i]!
198
- maxIdx = i
199
- }
200
- }
201
-
202
- // Find secondary intents (above a lower threshold)
203
- const secondary: Intent[] = []
204
- for (let i = 0; i < probs.length; i++) {
205
- if (i !== maxIdx && probs[i]! > 0.15) {
206
- secondary.push(labels[i]!)
207
- }
208
- }
209
-
210
- return {
211
- primary: labels[maxIdx]!,
212
- confidence: maxProb,
213
- secondary,
214
- }
215
- } catch (error) {
216
- this.logger.warn({ error }, 'Model intent classification failed, falling back to regex')
217
- return null
218
- }
219
- }
220
-
221
- // ── Entity Extraction ──────────────────────────────────────────────
222
-
223
- /**
224
- * Extract entities from text.
225
- * Drop-in replacement for EntityExtractor.extract().
226
- * Now async to support model inference.
227
- */
228
- async extractEntities(text: string): Promise<ExtractedEntity[]> {
229
- const mode = this.getTaskMode('entity')
230
-
231
- if (mode === 'regex') {
232
- return this.entityExtractor.extract(text)
233
- }
234
-
235
- const modelResult = await this.tryModelExtractEntities(text)
236
- const regexResult = this.entityExtractor.extract(text)
237
-
238
- if (mode === 'both') {
239
- this.logComparison('entity', text, modelResult, regexResult)
240
- return modelResult ?? regexResult
241
- }
242
-
243
- return modelResult ?? regexResult
244
- }
245
-
246
- /**
247
- * Attempt model-based entity extraction using BIO sequence labeling.
248
- * Output is per-token logits [numTokens * numLabels].
249
- * Groups consecutive B-/I- tags into ExtractedEntity objects.
250
- * Applies softmax per-token and filters low-confidence/garbage entities.
251
- */
252
- private async tryModelExtractEntities(text: string): Promise<ExtractedEntity[] | null> {
253
- if (!this.modelManager.hasModel('entity')) return null
254
-
255
- try {
256
- const tokenizer = await getTokenizer(this.logger)
257
- const maxLen = this.modelManager.getManifestEntry('entity')?.maxSeqLen ?? 128
258
- const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
259
- const logits = await this.modelManager.infer('entity', inputIds, attentionMask)
260
- if (!logits) return null
261
-
262
- const manifest = this.modelManager.getManifestEntry('entity')
263
- const labels = manifest?.labels ?? ENTITY_BIO_LABELS
264
- const numLabels = labels.length
265
-
266
- // Count active tokens (non-padding)
267
- const numTokens = attentionMask.filter(m => m === 1).length
268
-
269
- // Decode per-token BIO tags with softmax probabilities
270
- const tokenTags: { tag: string; prob: number }[] = []
271
- for (let t = 0; t < numTokens; t++) {
272
- // Extract logits for this token and apply softmax
273
- const tokenLogits = new Float32Array(numLabels)
274
- for (let l = 0; l < numLabels; l++) {
275
- tokenLogits[l] = logits[t * numLabels + l]!
276
- }
277
- const probs = this.softmax(tokenLogits)
278
-
279
- let bestIdx = 0
280
- let bestProb = probs[0]!
281
- for (let l = 1; l < numLabels; l++) {
282
- if (probs[l]! > bestProb) {
283
- bestProb = probs[l]!
284
- bestIdx = l
285
- }
286
- }
287
-
288
- // Only accept B-/I- tags if softmax probability exceeds entity threshold
289
- const tag = labels[bestIdx]!
290
- const isEntityTag = tag.startsWith('B-') || tag.startsWith('I-')
291
- if (isEntityTag && bestProb < ENTITY_TOKEN_THRESHOLD) {
292
- tokenTags.push({ tag: 'O', prob: bestProb })
293
- } else {
294
- tokenTags.push({ tag, prob: bestProb })
295
- }
296
- }
297
-
298
- // Group consecutive B-/I- tags into entities
299
- const entities: ExtractedEntity[] = []
300
- let currentType: string | null = null
301
- let currentTokenIds: number[] = []
302
- let currentProbs: number[] = []
303
- let startPos = 0
304
-
305
- const flushEntity = () => {
306
- if (currentType && currentTokenIds.length > 0) {
307
- const entityType = BIO_TYPE_MAP[currentType]
308
- if (entityType) {
309
- const name = tokenizer.decode(currentTokenIds).trim()
310
- const avgProb = currentProbs.reduce((a, b) => a + b, 0) / currentProbs.length
311
-
312
- // Filter: minimum 2 chars, not a stopword, decent average confidence
313
- if (name.length >= 2 && avgProb >= ENTITY_TOKEN_THRESHOLD && !ENTITY_STOPWORDS.has(name.toLowerCase())) {
314
- entities.push({
315
- name,
316
- normalizedName: name.toLowerCase(),
317
- type: entityType,
318
- confidence: avgProb,
319
- source: 'model',
320
- positions: [startPos],
321
- })
322
- }
323
- }
324
- }
325
- currentType = null
326
- currentTokenIds = []
327
- currentProbs = []
328
- }
329
-
330
- for (let i = 0; i < tokenTags.length; i++) {
331
- const { tag, prob } = tokenTags[i]!
332
-
333
- if (tag.startsWith('B-')) {
334
- flushEntity()
335
- currentType = tag.slice(2)
336
- currentTokenIds = [inputIds[i]!]
337
- currentProbs = [prob]
338
- startPos = i
339
- } else if (tag.startsWith('I-') && currentType === tag.slice(2)) {
340
- currentTokenIds.push(inputIds[i]!)
341
- currentProbs.push(prob)
342
- } else {
343
- flushEntity()
344
- }
345
- }
346
- flushEntity()
347
-
348
- return entities.length > 0 ? entities : null
349
- } catch (error) {
350
- this.logger.warn({ error }, 'Model entity extraction failed, falling back to regex')
351
- return null
352
- }
353
- }
354
-
355
- // ── Query Intent Classification ────────────────────────────────────
356
-
357
- /**
358
- * Classify search query intent.
359
- * Drop-in replacement for classifyIntent() in retrieval/query/intent-classifier.
360
- * Now async to support model inference.
361
- */
362
- async classifyQueryIntent(query: string): Promise<QueryIntent> {
363
- const mode = this.getTaskMode('query')
364
-
365
- if (mode === 'regex') {
366
- return classifyQueryIntent(query)
367
- }
368
-
369
- const modelResult = await this.tryModelClassifyQuery(query)
370
- const regexResult = classifyQueryIntent(query)
371
-
372
- if (mode === 'both') {
373
- this.logComparison('query', query, modelResult, regexResult)
374
- return modelResult ?? regexResult
375
- }
376
-
377
- if (modelResult && modelResult.confidence >= this.confidenceThreshold) {
378
- return modelResult
379
- }
380
- return regexResult
381
- }
382
-
383
- /**
384
- * Attempt model-based query intent classification.
385
- * 6-class classifier. Model uses training labels (procedural, comparative)
386
- * which get mapped to code labels (pattern, comparison) via QUERY_LABEL_MAP.
387
- */
388
- private async tryModelClassifyQuery(query: string): Promise<QueryIntent | null> {
389
- if (!this.modelManager.hasModel('query')) return null
390
-
391
- try {
392
- const tokenizer = await getTokenizer(this.logger)
393
- const maxLen = this.modelManager.getManifestEntry('query')?.maxSeqLen ?? 128
394
- const { inputIds, attentionMask } = tokenizer.encode(query, maxLen)
395
- const logits = await this.modelManager.infer('query', inputIds, attentionMask)
396
- if (!logits) return null
397
-
398
- const probs = this.softmax(logits)
399
- const manifest = this.modelManager.getManifestEntry('query')
400
- const labels = manifest?.labels ?? [...QUERY_LABELS]
401
-
402
- let maxIdx = 0
403
- let maxProb = probs[0]!
404
- for (let i = 1; i < probs.length; i++) {
405
- if (probs[i]! > maxProb) {
406
- maxProb = probs[i]!
407
- maxIdx = i
408
- }
409
- }
410
-
411
- // Map model label to QueryIntent type (handles training label renames)
412
- const rawLabel = labels[maxIdx]!
413
- const mappedType = QUERY_LABEL_MAP[rawLabel] ?? (rawLabel as QueryIntent['type'])
414
-
415
- return {
416
- type: mappedType,
417
- confidence: maxProb,
418
- }
419
- } catch (error) {
420
- this.logger.warn({ error }, 'Model query classification failed, falling back to regex')
421
- return null
422
- }
423
- }
424
-
425
- // ── Pattern Classification ─────────────────────────────────────────
426
-
427
- /**
428
- * Classify pattern type.
429
- * Drop-in for PatternRecognizer.determinePatternType().
430
- * Now async to support model inference.
431
- */
432
- async classifyPatternType(description: string): Promise<Pattern['type']> {
433
- const mode = this.getTaskMode('pattern')
434
-
435
- if (mode === 'regex') {
436
- return this.regexClassifyPattern(description)
437
- }
438
-
439
- const modelResult = await this.tryModelClassifyPattern(description)
440
- const regexResult = this.regexClassifyPattern(description)
441
-
442
- if (mode === 'both') {
443
- this.logComparison('pattern', description, modelResult, regexResult)
444
- return modelResult ?? regexResult
445
- }
446
-
447
- return modelResult ?? regexResult
448
- }
449
-
450
- /**
451
- * Regex fallback for pattern classification (extracted from PatternRecognizer)
452
- */
453
- private regexClassifyPattern(description: string): Pattern['type'] {
454
- const lower = description.toLowerCase()
455
- if (lower.includes('avoid') || lower.includes('anti-pattern') || lower.includes("don't use")) {
456
- return 'anti-pattern'
457
- }
458
- if (lower.includes('always') || lower.includes('best practice') || lower.includes('standard')) {
459
- return 'best-practice'
460
- }
461
- if (lower.includes('issue') || lower.includes('bug') || lower.includes('fix')) {
462
- return 'common-issue'
463
- }
464
- return 'solution'
465
- }
466
-
467
- /**
468
- * Attempt model-based pattern type classification.
469
- * 4-class classifier: solution, anti-pattern, best-practice, common-issue.
470
- */
471
- private async tryModelClassifyPattern(description: string): Promise<Pattern['type'] | null> {
472
- if (!this.modelManager.hasModel('pattern')) return null
473
-
474
- try {
475
- const tokenizer = await getTokenizer(this.logger)
476
- const maxLen = this.modelManager.getManifestEntry('pattern')?.maxSeqLen ?? 128
477
- const { inputIds, attentionMask } = tokenizer.encode(description, maxLen)
478
- const logits = await this.modelManager.infer('pattern', inputIds, attentionMask)
479
- if (!logits) return null
480
-
481
- const probs = this.softmax(logits)
482
- const manifest = this.modelManager.getManifestEntry('pattern')
483
- const labels = (manifest?.labels as Pattern['type'][] | undefined) ?? PATTERN_LABELS
484
-
485
- let maxIdx = 0
486
- let maxProb = probs[0]!
487
- for (let i = 1; i < probs.length; i++) {
488
- if (probs[i]! > maxProb) {
489
- maxProb = probs[i]!
490
- maxIdx = i
491
- }
492
- }
493
-
494
- // Only return if confidence exceeds threshold
495
- if (maxProb < this.confidenceThreshold) return null
496
-
497
- return labels[maxIdx]!
498
- } catch (error) {
499
- this.logger.warn({ error }, 'Model pattern classification failed, falling back to regex')
500
- return null
501
- }
502
- }
503
-
504
- // ── Knowledge Type Classification ──────────────────────────────────
505
-
506
- /**
507
- * Classify knowledge type from text.
508
- * 5-class classifier: fact, preference, constraint, goal, definition.
509
- */
510
- async classifyKnowledgeType(text: string): Promise<string> {
511
- const mode = this.getTaskMode('knowledge')
512
-
513
- if (mode === 'regex') {
514
- return this.regexClassifyKnowledge(text)
515
- }
516
-
517
- const modelResult = await this.tryModelClassifyKnowledge(text)
518
- const regexResult = this.regexClassifyKnowledge(text)
519
-
520
- if (mode === 'both') {
521
- this.logComparison('knowledge', text, modelResult, regexResult)
522
- }
523
-
524
- // Cross-check: if regex found a keyword match (non-'fact') and model disagrees, prefer regex.
525
- // Regex only returns non-'fact' when it finds a definitive keyword (prefer, must, goal, etc.)
526
- // so it's more trustworthy than an undertrained model for those cases.
527
- if (modelResult && regexResult !== 'fact' && modelResult !== regexResult) {
528
- this.logger.debug({ modelResult, regexResult }, 'Knowledge cross-check: regex keyword wins over model')
529
- return regexResult
530
- }
531
-
532
- if (modelResult) {
533
- return modelResult
534
- }
535
- return regexResult
536
- }
537
-
538
- /**
539
- * Regex fallback for knowledge type classification.
540
- */
541
- private regexClassifyKnowledge(text: string): string {
542
- const lower = text.toLowerCase()
543
- if (lower.includes('prefer') || lower.includes('like') || lower.includes('want') || lower.includes('favorite')) {
544
- return 'preference'
545
- }
546
- if (lower.includes('must') || lower.includes('require') || lower.includes('constraint') || lower.includes('cannot') || lower.includes("can't") || lower.includes('never')) {
547
- return 'constraint'
548
- }
549
- if (lower.includes('goal') || lower.includes('plan') || lower.includes('aim') || lower.includes('target') || lower.includes('objective')) {
550
- return 'goal'
551
- }
552
- if (lower.includes('define') || lower.includes('definition') || lower.includes('means') || lower.includes('refers to') || lower.includes('is a')) {
553
- return 'definition'
554
- }
555
- return 'fact'
556
- }
557
-
558
- /**
559
- * Attempt model-based knowledge type classification.
560
- * 5-class classifier: fact, preference, constraint, goal, definition.
561
- */
562
- private async tryModelClassifyKnowledge(text: string): Promise<string | null> {
563
- if (!this.modelManager.hasModel('knowledge')) return null
564
-
565
- try {
566
- const tokenizer = await getTokenizer(this.logger)
567
- const maxLen = this.modelManager.getManifestEntry('knowledge')?.maxSeqLen ?? 128
568
- const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
569
- const logits = await this.modelManager.infer('knowledge', inputIds, attentionMask)
570
- if (!logits) return null
571
-
572
- const probs = this.softmax(logits)
573
- const manifest = this.modelManager.getManifestEntry('knowledge')
574
- const labels = manifest?.labels ?? KNOWLEDGE_LABELS
575
-
576
- let maxIdx = 0
577
- let maxProb = probs[0]!
578
- for (let i = 1; i < probs.length; i++) {
579
- if (probs[i]! > maxProb) {
580
- maxProb = probs[i]!
581
- maxIdx = i
582
- }
583
- }
584
-
585
- if (maxProb < this.confidenceThreshold) return null
586
-
587
- return labels[maxIdx]!
588
- } catch (error) {
589
- this.logger.warn({ error }, 'Model knowledge classification failed, falling back to regex')
590
- return null
591
- }
592
- }
593
-
594
- // ── Local Compression ──────────────────────────────────────────────
595
-
596
- /**
597
- * Compress text using local model or return original if unavailable.
598
- * Uses greedy autoregressive decoding for seq2seq generation.
599
- */
600
- async compress(text: string): Promise<string> {
601
- const mode = this.getTaskMode('compress')
602
-
603
- // In 'api' mode or 'regex' mode, no local compression available
604
- if (mode === 'api' || mode === 'regex') {
605
- return text
606
- }
607
-
608
- const modelResult = await this.tryModelCompress(text)
609
-
610
- if (mode === 'both' && modelResult) {
611
- this.logger.debug({
612
- originalLen: text.length,
613
- compressedLen: modelResult.length,
614
- ratio: (modelResult.length / text.length).toFixed(2),
615
- }, 'Compression comparison')
616
- }
617
-
618
- return modelResult ?? text
619
- }
620
-
621
- /**
622
- * Attempt model-based text compression using greedy autoregressive decoding.
623
- * Generates tokens one at a time until EOS or max length.
624
- */
625
- private async tryModelCompress(text: string): Promise<string | null> {
626
- if (!this.modelManager.hasModel('compress')) return null
627
-
628
- try {
629
- const tokenizer = await getTokenizer(this.logger)
630
- const maxLen = this.modelManager.getManifestEntry('compress')?.maxSeqLen ?? 256
631
- const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
632
-
633
- // Keep the padded arrays (fixed shape [1, maxLen]) and track active length.
634
- // The ONNX model expects fixed input shape [1, maxLen]; stripping padding
635
- // would create variable-length tensors that the model rejects.
636
- let activeLen = attentionMask.filter(m => m === 1).length
637
- const maxGenTokens = Math.min(COMPRESS_MAX_TOKENS, maxLen)
638
- const inputLen = activeLen
639
-
640
- for (let step = 0; step < maxGenTokens; step++) {
641
- if (activeLen >= maxLen) break
642
-
643
- const logits = await this.modelManager.infer('compress', [...inputIds], [...attentionMask])
644
- if (!logits) break
645
-
646
- // vocabSize = total logits / sequence length (maxLen, the padded dimension)
647
- const vocabSize = logits.length / maxLen
648
- const lastActiveOffset = (activeLen - 1) * vocabSize
649
- const lastTokenLogits = logits.slice(lastActiveOffset, lastActiveOffset + vocabSize)
650
-
651
- // Find argmax (greedy decode)
652
- let bestId = 0
653
- let bestVal = -Infinity
654
- for (let v = 0; v < lastTokenLogits.length; v++) {
655
- if (lastTokenLogits[v]! > bestVal) {
656
- bestVal = lastTokenLogits[v]!
657
- bestId = v
658
- }
659
- }
660
-
661
- // Stop on EOS
662
- if (bestId === EOS_TOKEN_ID || bestId === 0) break
663
-
664
- // Write new token into the next padding slot
665
- inputIds[activeLen] = bestId
666
- attentionMask[activeLen] = 1
667
- activeLen++
668
- }
669
-
670
- // Decode only the generated tokens (after input)
671
- const outputIds = inputIds.slice(inputLen, activeLen)
672
-
673
- if (outputIds.length === 0) return null
674
-
675
- const decoded = tokenizer.decode(outputIds).trim()
676
- return decoded.length > 0 ? decoded : null
677
- } catch (err) {
678
- const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
679
- this.logger.warn({ error }, 'Model compression failed, returning original text')
680
- return null
681
- }
682
- }
683
-
684
- // ── Comparison Logging ─────────────────────────────────────────────
685
-
686
- /**
687
- * Log model vs regex comparison for "both" mode validation.
688
- * Stored in model_feedback table for later analysis.
689
- */
690
- private logComparison(task: ModelTask, input: string, modelResult: any, regexResult: any): void {
691
- try {
692
- const modelLabel = this.extractLabel(modelResult)
693
- const regexLabel = this.extractLabel(regexResult)
694
- const agree = modelLabel === regexLabel
695
-
696
- this.logger.debug({
697
- task,
698
- modelLabel,
699
- regexLabel,
700
- agree,
701
- }, 'Model vs regex comparison')
702
-
703
- // Always log to feedback table (both agreements and disagreements)
704
- logModelFeedback({
705
- task,
706
- input,
707
- modelPrediction: modelLabel ?? 'null',
708
- modelConfidence: this.extractConfidence(modelResult),
709
- regexPrediction: regexLabel ?? 'null',
710
- })
711
-
712
- if (!agree) {
713
- // Also log disagreement to training data for review
714
- logTrainingData({
715
- task,
716
- input,
717
- output: JSON.stringify({ model: modelLabel, regex: regexLabel, agreed: false }),
718
- metadata: JSON.stringify({ mode: 'both', comparison: true }),
719
- })
720
- }
721
- } catch {
722
- // Non-critical
723
- }
724
- }
725
-
726
- /**
727
- * Extract confidence from a model result object.
728
- */
729
- private extractConfidence(result: any): number {
730
- if (!result) return 0
731
- if (typeof result.confidence === 'number') return result.confidence
732
- return 0
733
- }
734
-
735
- private extractLabel(result: any): string | null {
736
- if (!result) return null
737
- if (typeof result === 'string') return result
738
- if (result.primary) return result.primary
739
- if (result.type) return result.type
740
- return JSON.stringify(result).slice(0, 50)
741
- }
742
-
743
- // ── Status ─────────────────────────────────────────────────────────
744
-
745
- /**
746
- * Get inference routing status for all tasks
747
- */
748
- getStatus(): Record<ModelTask, { mode: TaskMode; modelAvailable: boolean; modelLoaded: boolean }> {
749
- const tasks: ModelTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
750
- const modelStatus = this.modelManager.getStatus()
751
- const status = {} as Record<ModelTask, { mode: TaskMode; modelAvailable: boolean; modelLoaded: boolean }>
752
-
753
- for (const task of tasks) {
754
- status[task] = {
755
- mode: this.getTaskMode(task),
756
- modelAvailable: modelStatus[task].available,
757
- modelLoaded: modelStatus[task].loaded,
758
- }
759
- }
760
- return status
761
- }
762
- }
1
+ /**
2
+ * Inference Router — SLM Upgrade Phase 4B
3
+ * Routes each classification task to model or regex fallback.
4
+ *
5
+ * For each task:
6
+ * 1. If config mode is 'regex'/'api' → use regex/API only
7
+ * 2. If config mode is 'model' → try model, fall back to regex on failure
8
+ * 3. If config mode is 'both' → run both, log comparison, use model output
9
+ *
10
+ * Confidence thresholding: if model confidence < threshold, use regex instead.
11
+ * Function signatures match the existing regex classifiers exactly.
12
+ */
13
+
14
+ import type { Logger } from 'pino'
15
+ import type { Config } from '@/config'
16
+ import type { ModelManager, ModelTask } from './model-manager'
17
+ import { logTrainingData, logModelFeedback } from '@/training/data-store'
18
+ import { getTokenizer } from './tokenizer'
19
+
20
+ // Import existing regex classifiers
21
+ import { IntentClassifier as BrainIntentClassifier, type ClassificationResult } from '@/routing/intent-classifier'
22
+ import type { Intent } from '@/routing/intent-classifier'
23
+ import { EntityExtractor, type ExtractedEntity } from '@/knowledge/entity-extractor'
24
+ import { classifyIntent as classifyQueryIntent } from '@/retrieval/query/intent-classifier'
25
+ import type { QueryIntent } from '@/retrieval/types'
26
+ import type { Pattern } from '@/memory/patterns'
27
+
28
+ /** Default intent labels matching the model training order */
29
+ const INTENT_LABELS: Intent[] = [
30
+ 'session_start', 'context_needed', 'decision_made', 'store_this',
31
+ 'pattern_found', 'mistake_learned', 'progress_update', 'question',
32
+ 'comparison', 'exploration', 'list_all', 'update_memory',
33
+ 'delete_memory', 'detail_request', 'timeline', 'no_action'
34
+ ]
35
+
36
+ /** BIO labels for token-level entity extraction */
37
+ const ENTITY_BIO_LABELS = ['O', 'B-TECH', 'I-TECH', 'B-PROJECT', 'I-PROJECT', 'B-CONCEPT', 'I-CONCEPT']
38
+
39
+ /** Query intent labels matching model training order.
40
+ * Model was trained with 'procedural'/'comparative' which map to code's 'pattern'/'comparison'. */
41
+ const QUERY_LABELS = ['factual', 'procedural', 'comparative', 'temporal', 'exploratory', 'decision'] as const
42
+
43
+ /** Map model query labels → QueryIntent['type'] (handles training label renames) */
44
+ const QUERY_LABEL_MAP: Record<string, QueryIntent['type']> = {
45
+ factual: 'factual',
46
+ procedural: 'pattern',
47
+ comparative: 'comparison',
48
+ temporal: 'temporal',
49
+ exploratory: 'exploratory',
50
+ decision: 'decision',
51
+ }
52
+
53
+ /** Pattern type labels */
54
+ const PATTERN_LABELS: Pattern['type'][] = ['solution', 'anti-pattern', 'best-practice', 'common-issue']
55
+
56
+ /** Knowledge type labels */
57
+ const KNOWLEDGE_LABELS = ['fact', 'preference', 'constraint', 'goal', 'definition']
58
+
59
+ /** Map BIO entity tag prefix to EntityType */
60
+ const BIO_TYPE_MAP: Record<string, ExtractedEntity['type']> = {
61
+ 'TECH': 'technology',
62
+ 'PROJECT': 'project',
63
+ 'CONCEPT': 'concept',
64
+ }
65
+
66
+ /** Minimum softmax probability to accept a B-/I- entity tag (vs forcing O) */
67
+ const ENTITY_TOKEN_THRESHOLD = 0.25
68
+
69
+ /** Common words that should never be entities (false positive filter) */
70
+ const ENTITY_STOPWORDS = new Set([
71
+ 'i', 'we', 'you', 'he', 'she', 'it', 'they', 'me', 'us', 'him', 'her', 'them',
72
+ 'a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
73
+ 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had',
74
+ 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might',
75
+ 'not', 'no', 'yes', 'this', 'that', 'these', 'those', 'my', 'your', 'our', 'their',
76
+ 'use', 'using', 'used', 'make', 'like', 'just', 'also', 'very', 'much',
77
+ ])
78
+
79
+ /** EOS token ID for greedy decode (GPT-2 uses 50256) */
80
+ const EOS_TOKEN_ID = 50256
81
+
82
+ /** Max tokens to generate for compression */
83
+ const COMPRESS_MAX_TOKENS = 128
84
+
85
+ /** Inference mode per task */
86
+ type TaskMode = 'model' | 'regex' | 'both' | 'api'
87
+
88
+ export class InferenceRouter {
89
+ private logger: Logger
90
+ private config: Config
91
+ private modelManager: ModelManager
92
+ private confidenceThreshold: number
93
+
94
+ // Regex fallback instances
95
+ private intentClassifier: BrainIntentClassifier
96
+ private entityExtractor: EntityExtractor
97
+
98
+ constructor(logger: Logger, config: Config, modelManager: ModelManager) {
99
+ this.logger = logger.child({ component: 'inference-router' })
100
+ this.config = config
101
+ this.modelManager = modelManager
102
+ this.confidenceThreshold = config.slm?.confidenceThreshold ?? 0.7
103
+
104
+ // Initialize regex fallbacks
105
+ this.intentClassifier = new BrainIntentClassifier()
106
+ this.entityExtractor = new EntityExtractor()
107
+ }
108
+
109
+ /**
110
+ * Check if SLM is enabled globally
111
+ */
112
+ get enabled(): boolean {
113
+ return this.config.slm?.enabled ?? false
114
+ }
115
+
116
+ /**
117
+ * Get the mode for a specific task
118
+ */
119
+ private getTaskMode(task: ModelTask): TaskMode {
120
+ if (!this.enabled) return task === 'compress' ? 'api' : 'regex'
121
+ const taskConfig = this.config.slm?.tasks
122
+ if (!taskConfig) return task === 'compress' ? 'api' : 'regex'
123
+
124
+ switch (task) {
125
+ case 'intent': return taskConfig.intent ?? 'regex'
126
+ case 'entity': return taskConfig.entity ?? 'regex'
127
+ case 'query': return taskConfig.query ?? 'regex'
128
+ case 'knowledge': return taskConfig.knowledge ?? 'regex'
129
+ case 'compress': return taskConfig.compress ?? 'api'
130
+ case 'pattern': return taskConfig.pattern ?? 'regex'
131
+ default: return 'regex'
132
+ }
133
+ }
134
+
135
+ /**
136
+ * Softmax over logits array (used when ONNX models are available)
137
+ */
138
+ softmax(logits: Float32Array): number[] {
139
+ const max = Math.max(...logits)
140
+ const exps = Array.from(logits).map(x => Math.exp(x - max))
141
+ const sum = exps.reduce((a, b) => a + b, 0)
142
+ return exps.map(e => e / sum)
143
+ }
144
+
145
+ // ── Intent Classification ──────────────────────────────────────────
146
+
147
+ /**
148
+ * Classify brain() message intent.
149
+ * Drop-in replacement for IntentClassifier.classify().
150
+ * Async to support model inference when ONNX models are available.
151
+ */
152
+ async classifyIntent(message: string): Promise<ClassificationResult> {
153
+ const mode = this.getTaskMode('intent')
154
+
155
+ if (mode === 'regex') {
156
+ return this.intentClassifier.classify(message)
157
+ }
158
+
159
+ // Try model
160
+ const modelResult = await this.tryModelClassifyIntent(message)
161
+ const regexResult = this.intentClassifier.classify(message)
162
+
163
+ if (mode === 'both') {
164
+ this.logComparison('intent', message, modelResult, regexResult)
165
+ return modelResult ?? regexResult
166
+ }
167
+
168
+ // mode === 'model': use model if available and confident
169
+ if (modelResult && modelResult.confidence >= this.confidenceThreshold) {
170
+ return modelResult
171
+ }
172
+ return regexResult
173
+ }
174
+
175
+ /**
176
+ * Attempt model-based intent classification. Returns null if model unavailable.
177
+ * Tokenizes message, runs ONNX inference, and maps logits to ClassificationResult.
178
+ */
179
+ private async tryModelClassifyIntent(message: string): Promise<ClassificationResult | null> {
180
+ if (!this.modelManager.hasModel('intent')) return null
181
+
182
+ try {
183
+ const tokenizer = await getTokenizer(this.logger)
184
+ const { inputIds, attentionMask } = tokenizer.encode(message, 128)
185
+ const logits = await this.modelManager.infer('intent', inputIds, attentionMask)
186
+ if (!logits) return null
187
+
188
+ const probs = this.softmax(logits)
189
+ const manifest = this.modelManager.getManifestEntry('intent')
190
+ const labels = (manifest?.labels as Intent[] | undefined) ?? INTENT_LABELS
191
+
192
+ // Find top prediction
193
+ let maxIdx = 0
194
+ let maxProb = probs[0]!
195
+ for (let i = 1; i < probs.length; i++) {
196
+ if (probs[i]! > maxProb) {
197
+ maxProb = probs[i]!
198
+ maxIdx = i
199
+ }
200
+ }
201
+
202
+ // Find secondary intents (above a lower threshold)
203
+ const secondary: Intent[] = []
204
+ for (let i = 0; i < probs.length; i++) {
205
+ if (i !== maxIdx && probs[i]! > 0.15) {
206
+ secondary.push(labels[i]!)
207
+ }
208
+ }
209
+
210
+ return {
211
+ primary: labels[maxIdx]!,
212
+ confidence: maxProb,
213
+ secondary,
214
+ }
215
+ } catch (error) {
216
+ this.logger.warn({ error }, 'Model intent classification failed, falling back to regex')
217
+ return null
218
+ }
219
+ }
220
+
221
+ // ── Entity Extraction ──────────────────────────────────────────────
222
+
223
+ /**
224
+ * Extract entities from text.
225
+ * Drop-in replacement for EntityExtractor.extract().
226
+ * Now async to support model inference.
227
+ */
228
+ async extractEntities(text: string): Promise<ExtractedEntity[]> {
229
+ const mode = this.getTaskMode('entity')
230
+
231
+ if (mode === 'regex') {
232
+ return this.entityExtractor.extract(text)
233
+ }
234
+
235
+ const modelResult = await this.tryModelExtractEntities(text)
236
+ const regexResult = this.entityExtractor.extract(text)
237
+
238
+ if (mode === 'both') {
239
+ this.logComparison('entity', text, modelResult, regexResult)
240
+ return modelResult ?? regexResult
241
+ }
242
+
243
+ return modelResult ?? regexResult
244
+ }
245
+
246
+ /**
247
+ * Attempt model-based entity extraction using BIO sequence labeling.
248
+ * Output is per-token logits [numTokens * numLabels].
249
+ * Groups consecutive B-/I- tags into ExtractedEntity objects.
250
+ * Applies softmax per-token and filters low-confidence/garbage entities.
251
+ */
252
+ private async tryModelExtractEntities(text: string): Promise<ExtractedEntity[] | null> {
253
+ if (!this.modelManager.hasModel('entity')) return null
254
+
255
+ try {
256
+ const tokenizer = await getTokenizer(this.logger)
257
+ const maxLen = this.modelManager.getManifestEntry('entity')?.maxSeqLen ?? 128
258
+ const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
259
+ const logits = await this.modelManager.infer('entity', inputIds, attentionMask)
260
+ if (!logits) return null
261
+
262
+ const manifest = this.modelManager.getManifestEntry('entity')
263
+ const labels = manifest?.labels ?? ENTITY_BIO_LABELS
264
+ const numLabels = labels.length
265
+
266
+ // Count active tokens (non-padding)
267
+ const numTokens = attentionMask.filter(m => m === 1).length
268
+
269
+ // Decode per-token BIO tags with softmax probabilities
270
+ const tokenTags: { tag: string; prob: number }[] = []
271
+ for (let t = 0; t < numTokens; t++) {
272
+ // Extract logits for this token and apply softmax
273
+ const tokenLogits = new Float32Array(numLabels)
274
+ for (let l = 0; l < numLabels; l++) {
275
+ tokenLogits[l] = logits[t * numLabels + l]!
276
+ }
277
+ const probs = this.softmax(tokenLogits)
278
+
279
+ let bestIdx = 0
280
+ let bestProb = probs[0]!
281
+ for (let l = 1; l < numLabels; l++) {
282
+ if (probs[l]! > bestProb) {
283
+ bestProb = probs[l]!
284
+ bestIdx = l
285
+ }
286
+ }
287
+
288
+ // Only accept B-/I- tags if softmax probability exceeds entity threshold
289
+ const tag = labels[bestIdx]!
290
+ const isEntityTag = tag.startsWith('B-') || tag.startsWith('I-')
291
+ if (isEntityTag && bestProb < ENTITY_TOKEN_THRESHOLD) {
292
+ tokenTags.push({ tag: 'O', prob: bestProb })
293
+ } else {
294
+ tokenTags.push({ tag, prob: bestProb })
295
+ }
296
+ }
297
+
298
+ // Group consecutive B-/I- tags into entities
299
+ const entities: ExtractedEntity[] = []
300
+ let currentType: string | null = null
301
+ let currentTokenIds: number[] = []
302
+ let currentProbs: number[] = []
303
+ let startPos = 0
304
+
305
+ const flushEntity = () => {
306
+ if (currentType && currentTokenIds.length > 0) {
307
+ const entityType = BIO_TYPE_MAP[currentType]
308
+ if (entityType) {
309
+ const name = tokenizer.decode(currentTokenIds).trim()
310
+ const avgProb = currentProbs.reduce((a, b) => a + b, 0) / currentProbs.length
311
+
312
+ // Filter: minimum 2 chars, not a stopword, decent average confidence
313
+ if (name.length >= 2 && avgProb >= ENTITY_TOKEN_THRESHOLD && !ENTITY_STOPWORDS.has(name.toLowerCase())) {
314
+ entities.push({
315
+ name,
316
+ normalizedName: name.toLowerCase(),
317
+ type: entityType,
318
+ confidence: avgProb,
319
+ source: 'model',
320
+ positions: [startPos],
321
+ })
322
+ }
323
+ }
324
+ }
325
+ currentType = null
326
+ currentTokenIds = []
327
+ currentProbs = []
328
+ }
329
+
330
+ for (let i = 0; i < tokenTags.length; i++) {
331
+ const { tag, prob } = tokenTags[i]!
332
+
333
+ if (tag.startsWith('B-')) {
334
+ flushEntity()
335
+ currentType = tag.slice(2)
336
+ currentTokenIds = [inputIds[i]!]
337
+ currentProbs = [prob]
338
+ startPos = i
339
+ } else if (tag.startsWith('I-') && currentType === tag.slice(2)) {
340
+ currentTokenIds.push(inputIds[i]!)
341
+ currentProbs.push(prob)
342
+ } else {
343
+ flushEntity()
344
+ }
345
+ }
346
+ flushEntity()
347
+
348
+ return entities.length > 0 ? entities : null
349
+ } catch (error) {
350
+ this.logger.warn({ error }, 'Model entity extraction failed, falling back to regex')
351
+ return null
352
+ }
353
+ }
354
+
355
+ // ── Query Intent Classification ────────────────────────────────────
356
+
357
+ /**
358
+ * Classify search query intent.
359
+ * Drop-in replacement for classifyIntent() in retrieval/query/intent-classifier.
360
+ * Now async to support model inference.
361
+ */
362
+ async classifyQueryIntent(query: string): Promise<QueryIntent> {
363
+ const mode = this.getTaskMode('query')
364
+
365
+ if (mode === 'regex') {
366
+ return classifyQueryIntent(query)
367
+ }
368
+
369
+ const modelResult = await this.tryModelClassifyQuery(query)
370
+ const regexResult = classifyQueryIntent(query)
371
+
372
+ if (mode === 'both') {
373
+ this.logComparison('query', query, modelResult, regexResult)
374
+ return modelResult ?? regexResult
375
+ }
376
+
377
+ if (modelResult && modelResult.confidence >= this.confidenceThreshold) {
378
+ return modelResult
379
+ }
380
+ return regexResult
381
+ }
382
+
383
+ /**
384
+ * Attempt model-based query intent classification.
385
+ * 6-class classifier. Model uses training labels (procedural, comparative)
386
+ * which get mapped to code labels (pattern, comparison) via QUERY_LABEL_MAP.
387
+ */
388
+ private async tryModelClassifyQuery(query: string): Promise<QueryIntent | null> {
389
+ if (!this.modelManager.hasModel('query')) return null
390
+
391
+ try {
392
+ const tokenizer = await getTokenizer(this.logger)
393
+ const maxLen = this.modelManager.getManifestEntry('query')?.maxSeqLen ?? 128
394
+ const { inputIds, attentionMask } = tokenizer.encode(query, maxLen)
395
+ const logits = await this.modelManager.infer('query', inputIds, attentionMask)
396
+ if (!logits) return null
397
+
398
+ const probs = this.softmax(logits)
399
+ const manifest = this.modelManager.getManifestEntry('query')
400
+ const labels = manifest?.labels ?? [...QUERY_LABELS]
401
+
402
+ let maxIdx = 0
403
+ let maxProb = probs[0]!
404
+ for (let i = 1; i < probs.length; i++) {
405
+ if (probs[i]! > maxProb) {
406
+ maxProb = probs[i]!
407
+ maxIdx = i
408
+ }
409
+ }
410
+
411
+ // Map model label to QueryIntent type (handles training label renames)
412
+ const rawLabel = labels[maxIdx]!
413
+ const mappedType = QUERY_LABEL_MAP[rawLabel] ?? (rawLabel as QueryIntent['type'])
414
+
415
+ return {
416
+ type: mappedType,
417
+ confidence: maxProb,
418
+ }
419
+ } catch (error) {
420
+ this.logger.warn({ error }, 'Model query classification failed, falling back to regex')
421
+ return null
422
+ }
423
+ }
424
+
425
+ // ── Pattern Classification ─────────────────────────────────────────
426
+
427
+ /**
428
+ * Classify pattern type.
429
+ * Drop-in for PatternRecognizer.determinePatternType().
430
+ * Now async to support model inference.
431
+ */
432
+ async classifyPatternType(description: string): Promise<Pattern['type']> {
433
+ const mode = this.getTaskMode('pattern')
434
+
435
+ if (mode === 'regex') {
436
+ return this.regexClassifyPattern(description)
437
+ }
438
+
439
+ const modelResult = await this.tryModelClassifyPattern(description)
440
+ const regexResult = this.regexClassifyPattern(description)
441
+
442
+ if (mode === 'both') {
443
+ this.logComparison('pattern', description, modelResult, regexResult)
444
+ return modelResult ?? regexResult
445
+ }
446
+
447
+ return modelResult ?? regexResult
448
+ }
449
+
450
+ /**
451
+ * Regex fallback for pattern classification (extracted from PatternRecognizer)
452
+ */
453
+ private regexClassifyPattern(description: string): Pattern['type'] {
454
+ const lower = description.toLowerCase()
455
+ if (lower.includes('avoid') || lower.includes('anti-pattern') || lower.includes("don't use")) {
456
+ return 'anti-pattern'
457
+ }
458
+ if (lower.includes('always') || lower.includes('best practice') || lower.includes('standard')) {
459
+ return 'best-practice'
460
+ }
461
+ if (lower.includes('issue') || lower.includes('bug') || lower.includes('fix')) {
462
+ return 'common-issue'
463
+ }
464
+ return 'solution'
465
+ }
466
+
467
+ /**
468
+ * Attempt model-based pattern type classification.
469
+ * 4-class classifier: solution, anti-pattern, best-practice, common-issue.
470
+ */
471
+ private async tryModelClassifyPattern(description: string): Promise<Pattern['type'] | null> {
472
+ if (!this.modelManager.hasModel('pattern')) return null
473
+
474
+ try {
475
+ const tokenizer = await getTokenizer(this.logger)
476
+ const maxLen = this.modelManager.getManifestEntry('pattern')?.maxSeqLen ?? 128
477
+ const { inputIds, attentionMask } = tokenizer.encode(description, maxLen)
478
+ const logits = await this.modelManager.infer('pattern', inputIds, attentionMask)
479
+ if (!logits) return null
480
+
481
+ const probs = this.softmax(logits)
482
+ const manifest = this.modelManager.getManifestEntry('pattern')
483
+ const labels = (manifest?.labels as Pattern['type'][] | undefined) ?? PATTERN_LABELS
484
+
485
+ let maxIdx = 0
486
+ let maxProb = probs[0]!
487
+ for (let i = 1; i < probs.length; i++) {
488
+ if (probs[i]! > maxProb) {
489
+ maxProb = probs[i]!
490
+ maxIdx = i
491
+ }
492
+ }
493
+
494
+ // Only return if confidence exceeds threshold
495
+ if (maxProb < this.confidenceThreshold) return null
496
+
497
+ return labels[maxIdx]!
498
+ } catch (error) {
499
+ this.logger.warn({ error }, 'Model pattern classification failed, falling back to regex')
500
+ return null
501
+ }
502
+ }
503
+
504
+ // ── Knowledge Type Classification ──────────────────────────────────
505
+
506
+ /**
507
+ * Classify knowledge type from text.
508
+ * 5-class classifier: fact, preference, constraint, goal, definition.
509
+ */
510
+ async classifyKnowledgeType(text: string): Promise<string> {
511
+ const mode = this.getTaskMode('knowledge')
512
+
513
+ if (mode === 'regex') {
514
+ return this.regexClassifyKnowledge(text)
515
+ }
516
+
517
+ const modelResult = await this.tryModelClassifyKnowledge(text)
518
+ const regexResult = this.regexClassifyKnowledge(text)
519
+
520
+ if (mode === 'both') {
521
+ this.logComparison('knowledge', text, modelResult, regexResult)
522
+ }
523
+
524
+ // Cross-check: if regex found a keyword match (non-'fact') and model disagrees, prefer regex.
525
+ // Regex only returns non-'fact' when it finds a definitive keyword (prefer, must, goal, etc.)
526
+ // so it's more trustworthy than an undertrained model for those cases.
527
+ if (modelResult && regexResult !== 'fact' && modelResult !== regexResult) {
528
+ this.logger.debug({ modelResult, regexResult }, 'Knowledge cross-check: regex keyword wins over model')
529
+ return regexResult
530
+ }
531
+
532
+ if (modelResult) {
533
+ return modelResult
534
+ }
535
+ return regexResult
536
+ }
537
+
538
+ /**
539
+ * Regex fallback for knowledge type classification.
540
+ */
541
+ private regexClassifyKnowledge(text: string): string {
542
+ const lower = text.toLowerCase()
543
+ if (lower.includes('prefer') || lower.includes('like') || lower.includes('want') || lower.includes('favorite')) {
544
+ return 'preference'
545
+ }
546
+ if (lower.includes('must') || lower.includes('require') || lower.includes('constraint') || lower.includes('cannot') || lower.includes("can't") || lower.includes('never')) {
547
+ return 'constraint'
548
+ }
549
+ if (lower.includes('goal') || lower.includes('plan') || lower.includes('aim') || lower.includes('target') || lower.includes('objective')) {
550
+ return 'goal'
551
+ }
552
+ if (lower.includes('define') || lower.includes('definition') || lower.includes('means') || lower.includes('refers to') || lower.includes('is a')) {
553
+ return 'definition'
554
+ }
555
+ return 'fact'
556
+ }
557
+
558
+ /**
559
+ * Attempt model-based knowledge type classification.
560
+ * 5-class classifier: fact, preference, constraint, goal, definition.
561
+ */
562
+ private async tryModelClassifyKnowledge(text: string): Promise<string | null> {
563
+ if (!this.modelManager.hasModel('knowledge')) return null
564
+
565
+ try {
566
+ const tokenizer = await getTokenizer(this.logger)
567
+ const maxLen = this.modelManager.getManifestEntry('knowledge')?.maxSeqLen ?? 128
568
+ const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
569
+ const logits = await this.modelManager.infer('knowledge', inputIds, attentionMask)
570
+ if (!logits) return null
571
+
572
+ const probs = this.softmax(logits)
573
+ const manifest = this.modelManager.getManifestEntry('knowledge')
574
+ const labels = manifest?.labels ?? KNOWLEDGE_LABELS
575
+
576
+ let maxIdx = 0
577
+ let maxProb = probs[0]!
578
+ for (let i = 1; i < probs.length; i++) {
579
+ if (probs[i]! > maxProb) {
580
+ maxProb = probs[i]!
581
+ maxIdx = i
582
+ }
583
+ }
584
+
585
+ if (maxProb < this.confidenceThreshold) return null
586
+
587
+ return labels[maxIdx]!
588
+ } catch (error) {
589
+ this.logger.warn({ error }, 'Model knowledge classification failed, falling back to regex')
590
+ return null
591
+ }
592
+ }
593
+
594
+ // ── Local Compression ──────────────────────────────────────────────
595
+
596
+ /**
597
+ * Compress text using local model or return original if unavailable.
598
+ * Uses greedy autoregressive decoding for seq2seq generation.
599
+ */
600
+ async compress(text: string): Promise<string> {
601
+ const mode = this.getTaskMode('compress')
602
+
603
+ // In 'api' mode or 'regex' mode, no local compression available
604
+ if (mode === 'api' || mode === 'regex') {
605
+ return text
606
+ }
607
+
608
+ const modelResult = await this.tryModelCompress(text)
609
+
610
+ if (mode === 'both' && modelResult) {
611
+ this.logger.debug({
612
+ originalLen: text.length,
613
+ compressedLen: modelResult.length,
614
+ ratio: (modelResult.length / text.length).toFixed(2),
615
+ }, 'Compression comparison')
616
+ }
617
+
618
+ return modelResult ?? text
619
+ }
620
+
621
+ /**
622
+ * Attempt model-based text compression using greedy autoregressive decoding.
623
+ * Generates tokens one at a time until EOS or max length.
624
+ */
625
+ private async tryModelCompress(text: string): Promise<string | null> {
626
+ if (!this.modelManager.hasModel('compress')) return null
627
+
628
+ try {
629
+ const tokenizer = await getTokenizer(this.logger)
630
+ const maxLen = this.modelManager.getManifestEntry('compress')?.maxSeqLen ?? 256
631
+ const { inputIds, attentionMask } = tokenizer.encode(text, maxLen)
632
+
633
+ // Keep the padded arrays (fixed shape [1, maxLen]) and track active length.
634
+ // The ONNX model expects fixed input shape [1, maxLen]; stripping padding
635
+ // would create variable-length tensors that the model rejects.
636
+ let activeLen = attentionMask.filter(m => m === 1).length
637
+ const maxGenTokens = Math.min(COMPRESS_MAX_TOKENS, maxLen)
638
+ const inputLen = activeLen
639
+
640
+ for (let step = 0; step < maxGenTokens; step++) {
641
+ if (activeLen >= maxLen) break
642
+
643
+ const logits = await this.modelManager.infer('compress', [...inputIds], [...attentionMask])
644
+ if (!logits) break
645
+
646
+ // vocabSize = total logits / sequence length (maxLen, the padded dimension)
647
+ const vocabSize = logits.length / maxLen
648
+ const lastActiveOffset = (activeLen - 1) * vocabSize
649
+ const lastTokenLogits = logits.slice(lastActiveOffset, lastActiveOffset + vocabSize)
650
+
651
+ // Find argmax (greedy decode)
652
+ let bestId = 0
653
+ let bestVal = -Infinity
654
+ for (let v = 0; v < lastTokenLogits.length; v++) {
655
+ if (lastTokenLogits[v]! > bestVal) {
656
+ bestVal = lastTokenLogits[v]!
657
+ bestId = v
658
+ }
659
+ }
660
+
661
+ // Stop on EOS
662
+ if (bestId === EOS_TOKEN_ID || bestId === 0) break
663
+
664
+ // Write new token into the next padding slot
665
+ inputIds[activeLen] = bestId
666
+ attentionMask[activeLen] = 1
667
+ activeLen++
668
+ }
669
+
670
+ // Decode only the generated tokens (after input)
671
+ const outputIds = inputIds.slice(inputLen, activeLen)
672
+
673
+ if (outputIds.length === 0) return null
674
+
675
+ const decoded = tokenizer.decode(outputIds).trim()
676
+ return decoded.length > 0 ? decoded : null
677
+ } catch (err) {
678
+ const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
679
+ this.logger.warn({ error }, 'Model compression failed, returning original text')
680
+ return null
681
+ }
682
+ }
683
+
684
+ // ── Comparison Logging ─────────────────────────────────────────────
685
+
686
+ /**
687
+ * Log model vs regex comparison for "both" mode validation.
688
+ * Stored in model_feedback table for later analysis.
689
+ */
690
+ private logComparison(task: ModelTask, input: string, modelResult: Record<string, unknown> | string | null, regexResult: Record<string, unknown> | string | null): void {
691
+ try {
692
+ const modelLabel = this.extractLabel(modelResult)
693
+ const regexLabel = this.extractLabel(regexResult)
694
+ const agree = modelLabel === regexLabel
695
+
696
+ this.logger.debug({
697
+ task,
698
+ modelLabel,
699
+ regexLabel,
700
+ agree,
701
+ }, 'Model vs regex comparison')
702
+
703
+ // Always log to feedback table (both agreements and disagreements)
704
+ logModelFeedback({
705
+ task,
706
+ input,
707
+ modelPrediction: modelLabel ?? 'null',
708
+ modelConfidence: this.extractConfidence(modelResult),
709
+ regexPrediction: regexLabel ?? 'null',
710
+ })
711
+
712
+ if (!agree) {
713
+ // Also log disagreement to training data for review
714
+ logTrainingData({
715
+ task,
716
+ input,
717
+ output: JSON.stringify({ model: modelLabel, regex: regexLabel, agreed: false }),
718
+ metadata: JSON.stringify({ mode: 'both', comparison: true }),
719
+ })
720
+ }
721
+ } catch {
722
+ // Non-critical
723
+ }
724
+ }
725
+
726
+ /**
727
+ * Extract confidence from a model result object.
728
+ */
729
+ private extractConfidence(result: Record<string, unknown> | string | null): number {
730
+ if (!result || typeof result === 'string') return 0
731
+ if (typeof result.confidence === 'number') return result.confidence
732
+ return 0
733
+ }
734
+
735
+ private extractLabel(result: Record<string, unknown> | string | null): string | null {
736
+ if (!result) return null
737
+ if (typeof result === 'string') return result
738
+ if (result.primary) return String(result.primary)
739
+ if (result.type) return String(result.type)
740
+ return JSON.stringify(result).slice(0, 50)
741
+ }
742
+
743
+ // ── Status ─────────────────────────────────────────────────────────
744
+
745
+ /**
746
+ * Get inference routing status for all tasks
747
+ */
748
+ getStatus(): Record<ModelTask, { mode: TaskMode; modelAvailable: boolean; modelLoaded: boolean }> {
749
+ const tasks: ModelTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
750
+ const modelStatus = this.modelManager.getStatus()
751
+ const status = {} as Record<ModelTask, { mode: TaskMode; modelAvailable: boolean; modelLoaded: boolean }>
752
+
753
+ for (const task of tasks) {
754
+ status[task] = {
755
+ mode: this.getTaskMode(task),
756
+ modelAvailable: modelStatus[task].available,
757
+ modelLoaded: modelStatus[task].loaded,
758
+ }
759
+ }
760
+ return status
761
+ }
762
+ }