claude-brain 0.30.2 → 0.30.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/README.md +241 -191
  2. package/VERSION +1 -1
  3. package/assets/CLAUDE-unified.md +11 -11
  4. package/assets/CLAUDE.md +29 -29
  5. package/package.json +7 -3
  6. package/packs/backend/node.json +173 -173
  7. package/packs/core/javascript.json +176 -176
  8. package/packs/core/typescript.json +222 -222
  9. package/packs/frontend/react.json +254 -254
  10. package/packs/meta/testing.json +172 -172
  11. package/scripts/postinstall.mjs +531 -531
  12. package/src/automation/decision-detector.ts +452 -452
  13. package/src/automation/phase12-manager.ts +456 -456
  14. package/src/automation/proactive-recall.ts +373 -373
  15. package/src/automation/project-detector.ts +310 -310
  16. package/src/automation/repo-scanner.ts +210 -205
  17. package/src/cli/auto-setup.ts +75 -75
  18. package/src/cli/auto-start.ts +266 -266
  19. package/src/cli/bin.ts +264 -264
  20. package/src/cli/commands/autostart.ts +90 -90
  21. package/src/cli/commands/chroma.ts +578 -577
  22. package/src/cli/commands/export-training.ts +70 -70
  23. package/src/cli/commands/export.ts +130 -130
  24. package/src/cli/commands/git-hook.ts +183 -183
  25. package/src/cli/commands/hooks.ts +217 -217
  26. package/src/cli/commands/init.ts +123 -123
  27. package/src/cli/commands/install-mcp.ts +122 -111
  28. package/src/cli/commands/models.ts +979 -979
  29. package/src/cli/commands/pack.ts +200 -200
  30. package/src/cli/commands/refresh.ts +344 -339
  31. package/src/cli/commands/reindex.ts +120 -120
  32. package/src/cli/commands/serve.ts +466 -463
  33. package/src/cli/commands/start.ts +44 -44
  34. package/src/cli/commands/status.ts +220 -203
  35. package/src/cli/commands/uninstall-mcp.ts +45 -41
  36. package/src/cli/commands/update.ts +130 -124
  37. package/src/cli/migrate-chroma.ts +106 -106
  38. package/src/cli/ui/animations.ts +80 -80
  39. package/src/cli/ui/components.ts +82 -82
  40. package/src/cli/ui/index.ts +4 -4
  41. package/src/cli/ui/logo.ts +36 -36
  42. package/src/cli/ui/theme.ts +55 -55
  43. package/src/code-intelligence/indexer.ts +352 -352
  44. package/src/code-intelligence/linker.ts +178 -178
  45. package/src/code-intelligence/parser.ts +484 -484
  46. package/src/code-intelligence/query.ts +291 -291
  47. package/src/code-intelligence/schema.ts +83 -83
  48. package/src/code-intelligence/types.ts +95 -95
  49. package/src/config/defaults.ts +52 -52
  50. package/src/config/home.ts +56 -56
  51. package/src/config/index.ts +5 -5
  52. package/src/config/loader.ts +192 -192
  53. package/src/config/schema.ts +446 -415
  54. package/src/config/validator.ts +182 -182
  55. package/src/context/assembler.ts +407 -400
  56. package/src/context/index.ts +79 -79
  57. package/src/context/progress-tracker.ts +174 -174
  58. package/src/context/standards-manager.ts +287 -287
  59. package/src/context/validator.ts +58 -58
  60. package/src/diagnostics/index.ts +122 -121
  61. package/src/health/index.ts +233 -232
  62. package/src/hooks/brain-hook.ts +134 -131
  63. package/src/hooks/capture.ts +168 -168
  64. package/src/hooks/claude-code-mastery.md +112 -112
  65. package/src/hooks/context-hook.ts +260 -245
  66. package/src/hooks/deduplicator.ts +72 -72
  67. package/src/hooks/git-capture.ts +109 -109
  68. package/src/hooks/git-hook-installer.ts +211 -207
  69. package/src/hooks/index.ts +20 -20
  70. package/src/hooks/installer.ts +306 -288
  71. package/src/hooks/interceptor-hook.ts +204 -201
  72. package/src/hooks/passive-classifier.ts +397 -397
  73. package/src/hooks/queue.ts +160 -129
  74. package/src/hooks/session-tracker.ts +312 -312
  75. package/src/hooks/types.ts +52 -52
  76. package/src/index.ts +7 -7
  77. package/src/intelligence/cross-project/generalizer.ts +283 -283
  78. package/src/intelligence/cross-project/index.ts +7 -7
  79. package/src/intelligence/hf-downloader.ts +222 -222
  80. package/src/intelligence/hf-manifest.json +78 -78
  81. package/src/intelligence/index.ts +24 -24
  82. package/src/intelligence/inference-router.ts +762 -762
  83. package/src/intelligence/model-manager.ts +263 -245
  84. package/src/intelligence/optimization/index.ts +10 -10
  85. package/src/intelligence/optimization/precompute.ts +202 -202
  86. package/src/intelligence/optimization/semantic-cache.ts +213 -207
  87. package/src/intelligence/prediction/index.ts +7 -7
  88. package/src/intelligence/prediction/recommender.ts +276 -268
  89. package/src/intelligence/reasoning/chain-retrieval.ts +243 -247
  90. package/src/intelligence/reasoning/index.ts +7 -7
  91. package/src/intelligence/temporal/evolution.ts +193 -197
  92. package/src/intelligence/temporal/index.ts +16 -16
  93. package/src/intelligence/temporal/query-processor.ts +190 -190
  94. package/src/intelligence/temporal/timeline.ts +272 -259
  95. package/src/intelligence/temporal/trends.ts +263 -263
  96. package/src/intelligence/tokenizer.ts +118 -118
  97. package/src/knowledge/entity-extractor.ts +447 -443
  98. package/src/knowledge/graph/builder.ts +185 -185
  99. package/src/knowledge/graph/linker.ts +201 -201
  100. package/src/knowledge/graph/memory-graph.ts +359 -359
  101. package/src/knowledge/graph/schema.ts +99 -99
  102. package/src/knowledge/graph/search.ts +166 -166
  103. package/src/knowledge/relationship-extractor.ts +108 -108
  104. package/src/memory/chroma/client.ts +211 -192
  105. package/src/memory/chroma/collection-manager.ts +92 -92
  106. package/src/memory/chroma/config.ts +57 -57
  107. package/src/memory/chroma/embeddings.ts +177 -175
  108. package/src/memory/chroma/index.ts +82 -82
  109. package/src/memory/chroma/migration.ts +270 -270
  110. package/src/memory/chroma/schemas.ts +69 -69
  111. package/src/memory/chroma/search.ts +319 -315
  112. package/src/memory/chroma/store.ts +755 -747
  113. package/src/memory/compression.ts +121 -121
  114. package/src/memory/consolidation/archiver.ts +162 -165
  115. package/src/memory/consolidation/merger.ts +182 -186
  116. package/src/memory/consolidation/scorer.ts +136 -136
  117. package/src/memory/database.ts +9 -0
  118. package/src/memory/dual-write.ts +145 -0
  119. package/src/memory/embeddings.ts +226 -226
  120. package/src/memory/episodic/detector.ts +108 -108
  121. package/src/memory/episodic/manager.ts +347 -351
  122. package/src/memory/episodic/summarizer.ts +179 -179
  123. package/src/memory/episodic/types.ts +52 -52
  124. package/src/memory/fts5-search.ts +692 -633
  125. package/src/memory/index.ts +943 -1060
  126. package/src/memory/migrations/add-fts5.ts +118 -108
  127. package/src/memory/patterns.ts +438 -438
  128. package/src/memory/pruning.ts +60 -60
  129. package/src/memory/schema.ts +88 -88
  130. package/src/memory/store.ts +911 -787
  131. package/src/orchestrator/handlers/decision-handler.ts +204 -204
  132. package/src/packs/index.ts +9 -9
  133. package/src/packs/loader.ts +134 -134
  134. package/src/packs/manager.ts +204 -204
  135. package/src/packs/ranker.ts +78 -78
  136. package/src/packs/types.ts +81 -81
  137. package/src/phase12/index.ts +5 -5
  138. package/src/retrieval/bm25/index.ts +300 -297
  139. package/src/retrieval/bm25/tokenizer.ts +184 -184
  140. package/src/retrieval/feedback/adaptive.ts +221 -221
  141. package/src/retrieval/feedback/index.ts +16 -16
  142. package/src/retrieval/feedback/metrics.ts +221 -221
  143. package/src/retrieval/feedback/store.ts +283 -283
  144. package/src/retrieval/fusion/index.ts +194 -194
  145. package/src/retrieval/fusion/rrf.ts +165 -165
  146. package/src/retrieval/index.ts +12 -12
  147. package/src/retrieval/pipeline.ts +375 -375
  148. package/src/retrieval/query/expander.ts +203 -203
  149. package/src/retrieval/query/index.ts +27 -27
  150. package/src/retrieval/query/intent-classifier.ts +252 -252
  151. package/src/retrieval/query/temporal-parser.ts +295 -295
  152. package/src/retrieval/reranker/index.ts +189 -188
  153. package/src/retrieval/reranker/model.ts +99 -95
  154. package/src/retrieval/service.ts +125 -125
  155. package/src/retrieval/types.ts +162 -162
  156. package/src/routing/entity-extractor.ts +454 -454
  157. package/src/routing/handlers/exploration-handler.ts +369 -0
  158. package/src/routing/handlers/index.ts +19 -0
  159. package/src/routing/handlers/memory-handler.ts +273 -0
  160. package/src/routing/handlers/mutation-handler.ts +241 -0
  161. package/src/routing/handlers/recall-handler.ts +642 -0
  162. package/src/routing/handlers/shared.ts +515 -0
  163. package/src/routing/handlers/types.ts +48 -0
  164. package/src/routing/intent-classifier.ts +552 -552
  165. package/src/routing/response-filter.ts +399 -391
  166. package/src/routing/router.ts +245 -2193
  167. package/src/routing/search-engine.ts +521 -514
  168. package/src/routing/types.ts +104 -94
  169. package/src/scripts/health-check.ts +118 -118
  170. package/src/scripts/setup.ts +122 -122
  171. package/src/server/auto-updater.ts +283 -276
  172. package/src/server/handlers/call-tool.ts +159 -159
  173. package/src/server/handlers/list-tools.ts +35 -35
  174. package/src/server/handlers/tools/auto-remember.ts +165 -165
  175. package/src/server/handlers/tools/brain.ts +86 -86
  176. package/src/server/handlers/tools/create-project.ts +135 -135
  177. package/src/server/handlers/tools/get-code-standards.ts +123 -123
  178. package/src/server/handlers/tools/get-corrections.ts +152 -152
  179. package/src/server/handlers/tools/get-patterns.ts +156 -156
  180. package/src/server/handlers/tools/get-project-context.ts +75 -75
  181. package/src/server/handlers/tools/index.ts +30 -30
  182. package/src/server/handlers/tools/init-project.ts +756 -756
  183. package/src/server/handlers/tools/list-projects.ts +126 -126
  184. package/src/server/handlers/tools/recall-similar.ts +87 -87
  185. package/src/server/handlers/tools/recognize-pattern.ts +132 -132
  186. package/src/server/handlers/tools/record-correction.ts +131 -131
  187. package/src/server/handlers/tools/remember-decision.ts +168 -168
  188. package/src/server/handlers/tools/schemas.ts +179 -179
  189. package/src/server/handlers/tools/search-code.ts +122 -122
  190. package/src/server/handlers/tools/smart-context.ts +146 -146
  191. package/src/server/handlers/tools/update-progress.ts +131 -131
  192. package/src/server/http-api.ts +215 -1229
  193. package/src/server/mcp-proxy.ts +85 -84
  194. package/src/server/mcp-server.ts +285 -284
  195. package/src/server/middleware/auth.ts +39 -0
  196. package/src/server/middleware/error-handler.ts +37 -0
  197. package/src/server/middleware/rate-limit.ts +53 -0
  198. package/src/server/middleware/validate.ts +42 -0
  199. package/src/server/pid-manager.ts +137 -136
  200. package/src/server/providers/resources.ts +581 -581
  201. package/src/server/routes/code.ts +228 -0
  202. package/src/server/routes/context.ts +26 -0
  203. package/src/server/routes/health.ts +19 -0
  204. package/src/server/routes/helpers.ts +100 -0
  205. package/src/server/routes/hooks.ts +197 -0
  206. package/src/server/routes/mcp.ts +47 -0
  207. package/src/server/routes/memory.ts +397 -0
  208. package/src/server/routes/models.ts +96 -0
  209. package/src/server/routes/projects.ts +89 -0
  210. package/src/server/routes/types.ts +21 -0
  211. package/src/server/schemas/api-schemas.ts +202 -0
  212. package/src/server/services.ts +720 -720
  213. package/src/server/utils/memory-indicator.ts +84 -84
  214. package/src/server/utils/response-formatter.ts +129 -129
  215. package/src/server/web-viewer.ts +1145 -1115
  216. package/src/setup/index.ts +38 -38
  217. package/src/tools/registry.ts +115 -115
  218. package/src/tools/schemas.ts +666 -666
  219. package/src/tools/types.ts +412 -412
  220. package/src/training/data-store.ts +320 -298
  221. package/src/training/retrain-pipeline.ts +399 -394
  222. package/src/utils/error-handler.ts +136 -136
  223. package/src/utils/index.ts +58 -58
  224. package/src/utils/kill-port.ts +55 -53
  225. package/src/utils/phase12-helper.ts +56 -56
  226. package/src/utils/safe-path.ts +43 -0
  227. package/src/utils/timing.ts +47 -47
  228. package/src/utils/transaction.ts +63 -63
  229. package/src/vault/index.ts +4 -3
  230. package/src/vault/paths.ts +106 -106
  231. package/src/vault/query.ts +4 -1
  232. package/src/vault/reader.ts +44 -1
  233. package/src/vault/watcher.ts +24 -1
  234. package/src/vault/writer.ts +487 -413
  235. package/skills/persistent-memory/SKILL.md +0 -148
  236. package/skills/persistent-memory/references/tool-reference.md +0 -90
@@ -1,245 +1,263 @@
1
- /**
2
- * Model Manager — SLM Upgrade Phase 4A
3
- * Discovers, loads, and caches ONNX models from ~/.claude-brain/models/
4
- * Lazy loading: models load on first use, not at startup.
5
- */
6
-
7
- import { existsSync, readFileSync } from 'node:fs'
8
- import { join } from 'node:path'
9
- import type { Logger } from 'pino'
10
- import { getHomePaths } from '@/config/home'
11
-
12
- export type ModelTask = 'intent' | 'entity' | 'query' | 'knowledge' | 'compress' | 'pattern'
13
-
14
- export interface ModelManifestEntry {
15
- version: string
16
- file: string
17
- sha256?: string
18
- params?: string
19
- accuracy?: number
20
- labels?: string[]
21
- maxSeqLen?: number
22
- }
23
-
24
- export interface ModelManifest {
25
- models: Partial<Record<ModelTask, ModelManifestEntry>>
26
- }
27
-
28
- export interface LoadedModel {
29
- session: any // onnxruntime.InferenceSession
30
- manifest: ModelManifestEntry
31
- loadedAt: number
32
- }
33
-
34
- export class ModelManager {
35
- private modelsDir: string
36
- private manifest: ModelManifest | null = null
37
- private loadedModels = new Map<ModelTask, LoadedModel>()
38
- private onnxRuntime: any = null
39
- private onnxAvailable: boolean | null = null
40
- private logger: Logger
41
-
42
- constructor(logger: Logger, modelsDir?: string) {
43
- this.logger = logger.child({ component: 'model-manager' })
44
- this.modelsDir = modelsDir || getHomePaths().models
45
- }
46
-
47
- /**
48
- * Check if ONNX Runtime is available (installed as optional dep)
49
- * Tries onnxruntime-node first (faster native), falls back to onnxruntime-web (WASM)
50
- */
51
- private async checkOnnxRuntime(): Promise<boolean> {
52
- if (this.onnxAvailable !== null) return this.onnxAvailable
53
- // Try native node bindings first
54
- try {
55
- this.onnxRuntime = await import('onnxruntime-node')
56
- this.onnxAvailable = true
57
- this.logger.debug('ONNX Runtime (native) available')
58
- return true
59
- } catch {
60
- // Native not available, try WASM fallback
61
- }
62
- try {
63
- this.onnxRuntime = await import('onnxruntime-web')
64
- this.onnxAvailable = true
65
- this.logger.debug('ONNX Runtime (WASM) available')
66
- } catch {
67
- this.onnxAvailable = false
68
- this.logger.debug('ONNX Runtime not installed — models will not load')
69
- }
70
- return this.onnxAvailable
71
- }
72
-
73
- /**
74
- * Load the manifest.json from the models directory
75
- */
76
- private loadManifest(): ModelManifest | null {
77
- if (this.manifest) return this.manifest
78
- const manifestPath = join(this.modelsDir, 'manifest.json')
79
- if (!existsSync(manifestPath)) {
80
- this.logger.debug({ modelsDir: this.modelsDir }, 'No manifest.json found')
81
- return null
82
- }
83
- try {
84
- const raw = readFileSync(manifestPath, 'utf-8')
85
- this.manifest = JSON.parse(raw) as ModelManifest
86
- return this.manifest
87
- } catch (error) {
88
- this.logger.warn({ error }, 'Failed to parse manifest.json')
89
- return null
90
- }
91
- }
92
-
93
- /**
94
- * Check if a model file exists for a given task (without loading it)
95
- */
96
- hasModel(task: ModelTask): boolean {
97
- const manifest = this.loadManifest()
98
- if (!manifest?.models[task]) return false
99
- const entry = manifest.models[task]!
100
- return existsSync(join(this.modelsDir, entry.file))
101
- }
102
-
103
- /**
104
- * Get manifest entry for a task
105
- */
106
- getManifestEntry(task: ModelTask): ModelManifestEntry | null {
107
- const manifest = this.loadManifest()
108
- return manifest?.models[task] ?? null
109
- }
110
-
111
- /**
112
- * Lazy-load a model on first use. Returns null if unavailable.
113
- */
114
- async loadModel(task: ModelTask): Promise<LoadedModel | null> {
115
- // Return cached model
116
- if (this.loadedModels.has(task)) return this.loadedModels.get(task)!
117
-
118
- // Check prerequisites
119
- if (!(await this.checkOnnxRuntime())) return null
120
- const manifest = this.loadManifest()
121
- if (!manifest?.models[task]) return null
122
-
123
- const entry = manifest.models[task]!
124
- const modelPath = join(this.modelsDir, entry.file)
125
- if (!existsSync(modelPath)) {
126
- this.logger.debug({ task, file: entry.file }, 'Model file not found')
127
- return null
128
- }
129
-
130
- try {
131
- const startMs = Date.now()
132
- const session = await this.onnxRuntime.InferenceSession.create(modelPath)
133
- const loaded: LoadedModel = {
134
- session,
135
- manifest: entry,
136
- loadedAt: Date.now(),
137
- }
138
- this.loadedModels.set(task, loaded)
139
- this.logger.info({ task, file: entry.file, loadMs: Date.now() - startMs }, 'Model loaded')
140
- return loaded
141
- } catch (err) {
142
- const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
143
- this.logger.warn({ error, task, file: entry.file }, 'Failed to load model')
144
- return null
145
- }
146
- }
147
-
148
- /**
149
- * Run inference on a loaded model. Returns raw output tensor data.
150
- * Callers (InferenceRouter) handle task-specific pre/post processing.
151
- * Automatically detects which inputs the model accepts (input_ids, attention_mask).
152
- */
153
- async infer(task: ModelTask, inputIds: number[], attentionMask?: number[]): Promise<Float32Array | null> {
154
- const model = await this.loadModel(task)
155
- if (!model) return null
156
-
157
- try {
158
- const OrtTensor = this.onnxRuntime.Tensor
159
- const inputTensor = new OrtTensor('int64', BigInt64Array.from(inputIds.map(BigInt)), [1, inputIds.length])
160
- const feeds: Record<string, any> = { input_ids: inputTensor }
161
-
162
- // Only pass attention_mask if the model actually accepts it
163
- const modelInputNames = model.session.inputNames ?? []
164
- if (attentionMask && modelInputNames.includes('attention_mask')) {
165
- const maskTensor = new OrtTensor('int64', BigInt64Array.from(attentionMask.map(BigInt)), [1, attentionMask.length])
166
- feeds.attention_mask = maskTensor
167
- }
168
-
169
- const results = await model.session.run(feeds)
170
-
171
- // Most classification models output 'logits'
172
- const outputKey = Object.keys(results)[0]
173
- if (!outputKey) return null
174
- return results[outputKey].data as Float32Array
175
- } catch (err) {
176
- const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
177
- this.logger.warn({ error, task }, 'Inference failed')
178
- return null
179
- }
180
- }
181
-
182
- /**
183
- * Public accessor for the models directory path
184
- */
185
- getModelsDir(): string {
186
- return this.modelsDir
187
- }
188
-
189
- /**
190
- * Public check for ONNX Runtime availability
191
- */
192
- async isOnnxAvailable(): Promise<boolean> {
193
- return this.checkOnnxRuntime()
194
- }
195
-
196
- /**
197
- * Get status of all models (for CLI and health checks)
198
- */
199
- getStatus(): Record<ModelTask, { available: boolean; loaded: boolean; version?: string; accuracy?: number }> {
200
- const tasks: ModelTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
201
- const status = {} as Record<ModelTask, { available: boolean; loaded: boolean; version?: string; accuracy?: number }>
202
-
203
- for (const task of tasks) {
204
- const entry = this.getManifestEntry(task)
205
- status[task] = {
206
- available: this.hasModel(task),
207
- loaded: this.loadedModels.has(task),
208
- version: entry?.version,
209
- accuracy: entry?.accuracy,
210
- }
211
- }
212
- return status
213
- }
214
-
215
- /**
216
- * Unload a single model, releasing its ONNX session to free native memory.
217
- */
218
- async unloadModel(task: ModelTask): Promise<void> {
219
- const loaded = this.loadedModels.get(task)
220
- if (!loaded) return
221
-
222
- try {
223
- await loaded.session?.release?.()
224
- } catch (err) {
225
- this.logger.warn({ error: err, task }, 'Failed to release ONNX session')
226
- }
227
- this.loadedModels.delete(task)
228
- this.logger.debug({ task }, 'Model unloaded')
229
- }
230
-
231
- /**
232
- * Unload all models, releasing ONNX sessions to free native memory.
233
- */
234
- async unloadAll(): Promise<void> {
235
- for (const [task, loaded] of this.loadedModels) {
236
- try {
237
- await loaded.session?.release?.()
238
- } catch (err) {
239
- this.logger.warn({ error: err, task }, 'Failed to release ONNX session during unloadAll')
240
- }
241
- }
242
- this.loadedModels.clear()
243
- this.logger.debug('All models unloaded')
244
- }
245
- }
1
+ /**
2
+ * Model Manager — SLM Upgrade Phase 4A
3
+ * Discovers, loads, and caches ONNX models from ~/.claude-brain/models/
4
+ * Lazy loading: models load on first use, not at startup.
5
+ */
6
+
7
+ import { existsSync, readFileSync } from 'node:fs'
8
+ import { join } from 'node:path'
9
+ import type { Logger } from 'pino'
10
+ import { getHomePaths } from '@/config/home'
11
+
12
+ /** Minimal shape for dynamically imported onnxruntime-node/web */
13
+ interface OnnxRuntime {
14
+ InferenceSession: {
15
+ create(path: string): Promise<OnnxSession>
16
+ }
17
+ Tensor: new (type: string, data: BigInt64Array, dims: number[]) => unknown
18
+ }
19
+
20
+ interface OnnxSession {
21
+ inputNames?: string[]
22
+ run(feeds: Record<string, unknown>): Promise<Record<string, { data: Float32Array }>>
23
+ release?(): Promise<void>
24
+ }
25
+
26
+ export type ModelTask = 'intent' | 'entity' | 'query' | 'knowledge' | 'compress' | 'pattern'
27
+
28
+ export interface ModelManifestEntry {
29
+ version: string
30
+ file: string
31
+ sha256?: string
32
+ params?: string
33
+ accuracy?: number
34
+ labels?: string[]
35
+ maxSeqLen?: number
36
+ }
37
+
38
+ export interface ModelManifest {
39
+ models: Partial<Record<ModelTask, ModelManifestEntry>>
40
+ }
41
+
42
+ export interface LoadedModel {
43
+ session: OnnxSession
44
+ manifest: ModelManifestEntry
45
+ loadedAt: number
46
+ }
47
+
48
+ export class ModelManager {
49
+ private modelsDir: string
50
+ private manifest: ModelManifest | null = null
51
+ private loadedModels = new Map<ModelTask, LoadedModel>()
52
+ private onnxRuntime: unknown = null
53
+ private onnxAvailable: boolean | null = null
54
+ private logger: Logger
55
+
56
+ constructor(logger: Logger, modelsDir?: string) {
57
+ this.logger = logger.child({ component: 'model-manager' })
58
+ this.modelsDir = modelsDir || getHomePaths().models
59
+ }
60
+
61
+ /**
62
+ * Check if ONNX Runtime is available (installed as optional dep)
63
+ * Tries onnxruntime-node first (faster native), falls back to onnxruntime-web (WASM)
64
+ */
65
+ private async checkOnnxRuntime(): Promise<boolean> {
66
+ if (this.onnxAvailable !== null) return this.onnxAvailable
67
+ // Try native node bindings first
68
+ try {
69
+ this.onnxRuntime = await import('onnxruntime-node')
70
+ this.onnxAvailable = true
71
+ this.logger.debug('ONNX Runtime (native) available')
72
+ return true
73
+ } catch {
74
+ // Native not available, try WASM fallback
75
+ }
76
+ try {
77
+ this.onnxRuntime = await import('onnxruntime-web')
78
+ this.onnxAvailable = true
79
+ this.logger.debug('ONNX Runtime (WASM) available')
80
+ } catch {
81
+ this.onnxAvailable = false
82
+ this.logger.debug('ONNX Runtime not installed — models will not load')
83
+ }
84
+ return this.onnxAvailable
85
+ }
86
+
87
+ /**
88
+ * Load the manifest.json from the models directory
89
+ */
90
+ private loadManifest(): ModelManifest | null {
91
+ if (this.manifest) return this.manifest
92
+ const manifestPath = join(this.modelsDir, 'manifest.json')
93
+ if (!existsSync(manifestPath)) {
94
+ this.logger.debug({ modelsDir: this.modelsDir }, 'No manifest.json found')
95
+ return null
96
+ }
97
+ try {
98
+ const raw = readFileSync(manifestPath, 'utf-8')
99
+ this.manifest = JSON.parse(raw) as ModelManifest
100
+ return this.manifest
101
+ } catch (error) {
102
+ this.logger.warn({ error }, 'Failed to parse manifest.json')
103
+ return null
104
+ }
105
+ }
106
+
107
+ /**
108
+ * Check if a model file exists for a given task (without loading it)
109
+ */
110
+ hasModel(task: ModelTask): boolean {
111
+ const manifest = this.loadManifest()
112
+ if (!manifest?.models[task]) return false
113
+ const entry = manifest.models[task]!
114
+ return existsSync(join(this.modelsDir, entry.file))
115
+ }
116
+
117
+ /**
118
+ * Get manifest entry for a task
119
+ */
120
+ getManifestEntry(task: ModelTask): ModelManifestEntry | null {
121
+ const manifest = this.loadManifest()
122
+ return manifest?.models[task] ?? null
123
+ }
124
+
125
+ /**
126
+ * Lazy-load a model on first use. Returns null if unavailable.
127
+ */
128
+ async loadModel(task: ModelTask): Promise<LoadedModel | null> {
129
+ // Return cached model
130
+ if (this.loadedModels.has(task)) return this.loadedModels.get(task)!
131
+
132
+ // Check prerequisites
133
+ if (!(await this.checkOnnxRuntime())) return null
134
+ const manifest = this.loadManifest()
135
+ if (!manifest?.models[task]) return null
136
+
137
+ const entry = manifest.models[task]!
138
+ const modelPath = join(this.modelsDir, entry.file)
139
+ if (!existsSync(modelPath)) {
140
+ this.logger.debug({ task, file: entry.file }, 'Model file not found')
141
+ return null
142
+ }
143
+
144
+ try {
145
+ const startMs = Date.now()
146
+ const ort = this.onnxRuntime as OnnxRuntime
147
+ const session = await ort.InferenceSession.create(modelPath)
148
+ const loaded: LoadedModel = {
149
+ session,
150
+ manifest: entry,
151
+ loadedAt: Date.now(),
152
+ }
153
+ this.loadedModels.set(task, loaded)
154
+ this.logger.info({ task, file: entry.file, loadMs: Date.now() - startMs }, 'Model loaded')
155
+ return loaded
156
+ } catch (err) {
157
+ const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
158
+ this.logger.warn({ error, task, file: entry.file }, 'Failed to load model')
159
+ return null
160
+ }
161
+ }
162
+
163
+ /**
164
+ * Run inference on a loaded model. Returns raw output tensor data.
165
+ * Callers (InferenceRouter) handle task-specific pre/post processing.
166
+ * Automatically detects which inputs the model accepts (input_ids, attention_mask).
167
+ */
168
+ async infer(task: ModelTask, inputIds: number[], attentionMask?: number[]): Promise<Float32Array | null> {
169
+ const model = await this.loadModel(task)
170
+ if (!model) return null
171
+
172
+ try {
173
+ const ort = this.onnxRuntime as OnnxRuntime
174
+ const OrtTensor = ort.Tensor
175
+ const inputTensor = new OrtTensor('int64', BigInt64Array.from(inputIds.map(BigInt)), [1, inputIds.length])
176
+ const feeds: Record<string, unknown> = { input_ids: inputTensor }
177
+
178
+ // Only pass attention_mask if the model actually accepts it
179
+ const modelInputNames = model.session.inputNames ?? []
180
+ if (attentionMask && modelInputNames.includes('attention_mask')) {
181
+ const maskTensor = new OrtTensor('int64', BigInt64Array.from(attentionMask.map(BigInt)), [1, attentionMask.length])
182
+ feeds.attention_mask = maskTensor
183
+ }
184
+
185
+ const results = await model.session.run(feeds)
186
+
187
+ // Most classification models output 'logits'
188
+ const outputKey = Object.keys(results)[0]
189
+ if (!outputKey) return null
190
+ const output = results[outputKey]
191
+ if (!output) return null
192
+ return output.data as Float32Array
193
+ } catch (err) {
194
+ const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
195
+ this.logger.warn({ error, task }, 'Inference failed')
196
+ return null
197
+ }
198
+ }
199
+
200
+ /**
201
+ * Public accessor for the models directory path
202
+ */
203
+ getModelsDir(): string {
204
+ return this.modelsDir
205
+ }
206
+
207
+ /**
208
+ * Public check for ONNX Runtime availability
209
+ */
210
+ async isOnnxAvailable(): Promise<boolean> {
211
+ return this.checkOnnxRuntime()
212
+ }
213
+
214
+ /**
215
+ * Get status of all models (for CLI and health checks)
216
+ */
217
+ getStatus(): Record<ModelTask, { available: boolean; loaded: boolean; version?: string; accuracy?: number }> {
218
+ const tasks: ModelTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
219
+ const status = {} as Record<ModelTask, { available: boolean; loaded: boolean; version?: string; accuracy?: number }>
220
+
221
+ for (const task of tasks) {
222
+ const entry = this.getManifestEntry(task)
223
+ status[task] = {
224
+ available: this.hasModel(task),
225
+ loaded: this.loadedModels.has(task),
226
+ version: entry?.version,
227
+ accuracy: entry?.accuracy,
228
+ }
229
+ }
230
+ return status
231
+ }
232
+
233
+ /**
234
+ * Unload a single model, releasing its ONNX session to free native memory.
235
+ */
236
+ async unloadModel(task: ModelTask): Promise<void> {
237
+ const loaded = this.loadedModels.get(task)
238
+ if (!loaded) return
239
+
240
+ try {
241
+ await loaded.session?.release?.()
242
+ } catch (err) {
243
+ this.logger.warn({ error: err, task }, 'Failed to release ONNX session')
244
+ }
245
+ this.loadedModels.delete(task)
246
+ this.logger.debug({ task }, 'Model unloaded')
247
+ }
248
+
249
+ /**
250
+ * Unload all models, releasing ONNX sessions to free native memory.
251
+ */
252
+ async unloadAll(): Promise<void> {
253
+ for (const [task, loaded] of this.loadedModels) {
254
+ try {
255
+ await loaded.session?.release?.()
256
+ } catch (err) {
257
+ this.logger.warn({ error: err, task }, 'Failed to release ONNX session during unloadAll')
258
+ }
259
+ }
260
+ this.loadedModels.clear()
261
+ this.logger.debug('All models unloaded')
262
+ }
263
+ }
@@ -1,10 +1,10 @@
1
- /**
2
- * Optimization Module
3
- * Phase 15.5 - Semantic caching and precomputation
4
- */
5
-
6
- export { SemanticCache } from './semantic-cache'
7
- export type { CachedResult, CacheStats } from './semantic-cache'
8
-
9
- export { PrecomputeEngine } from './precompute'
10
- export type { PrecomputedStats, ProjectStats } from './precompute'
1
+ /**
2
+ * Optimization Module
3
+ * Phase 15.5 - Semantic caching and precomputation
4
+ */
5
+
6
+ export { SemanticCache } from './semantic-cache'
7
+ export type { CachedResult, CacheStats } from './semantic-cache'
8
+
9
+ export { PrecomputeEngine } from './precompute'
10
+ export type { PrecomputedStats, ProjectStats } from './precompute'