claude-brain 0.30.2 → 0.30.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +241 -191
- package/VERSION +1 -1
- package/assets/CLAUDE-unified.md +11 -11
- package/assets/CLAUDE.md +29 -29
- package/package.json +7 -3
- package/packs/backend/node.json +173 -173
- package/packs/core/javascript.json +176 -176
- package/packs/core/typescript.json +222 -222
- package/packs/frontend/react.json +254 -254
- package/packs/meta/testing.json +172 -172
- package/scripts/postinstall.mjs +531 -531
- package/src/automation/decision-detector.ts +452 -452
- package/src/automation/phase12-manager.ts +456 -456
- package/src/automation/proactive-recall.ts +373 -373
- package/src/automation/project-detector.ts +310 -310
- package/src/automation/repo-scanner.ts +210 -205
- package/src/cli/auto-setup.ts +75 -75
- package/src/cli/auto-start.ts +266 -266
- package/src/cli/bin.ts +264 -264
- package/src/cli/commands/autostart.ts +90 -90
- package/src/cli/commands/chroma.ts +578 -577
- package/src/cli/commands/export-training.ts +70 -70
- package/src/cli/commands/export.ts +130 -130
- package/src/cli/commands/git-hook.ts +183 -183
- package/src/cli/commands/hooks.ts +217 -217
- package/src/cli/commands/init.ts +123 -123
- package/src/cli/commands/install-mcp.ts +122 -111
- package/src/cli/commands/models.ts +979 -979
- package/src/cli/commands/pack.ts +200 -200
- package/src/cli/commands/refresh.ts +344 -339
- package/src/cli/commands/reindex.ts +120 -120
- package/src/cli/commands/serve.ts +466 -463
- package/src/cli/commands/start.ts +44 -44
- package/src/cli/commands/status.ts +220 -203
- package/src/cli/commands/uninstall-mcp.ts +45 -41
- package/src/cli/commands/update.ts +130 -124
- package/src/cli/migrate-chroma.ts +106 -106
- package/src/cli/ui/animations.ts +80 -80
- package/src/cli/ui/components.ts +82 -82
- package/src/cli/ui/index.ts +4 -4
- package/src/cli/ui/logo.ts +36 -36
- package/src/cli/ui/theme.ts +55 -55
- package/src/code-intelligence/indexer.ts +352 -352
- package/src/code-intelligence/linker.ts +178 -178
- package/src/code-intelligence/parser.ts +484 -484
- package/src/code-intelligence/query.ts +291 -291
- package/src/code-intelligence/schema.ts +83 -83
- package/src/code-intelligence/types.ts +95 -95
- package/src/config/defaults.ts +52 -52
- package/src/config/home.ts +56 -56
- package/src/config/index.ts +5 -5
- package/src/config/loader.ts +192 -192
- package/src/config/schema.ts +446 -415
- package/src/config/validator.ts +182 -182
- package/src/context/assembler.ts +407 -400
- package/src/context/index.ts +79 -79
- package/src/context/progress-tracker.ts +174 -174
- package/src/context/standards-manager.ts +287 -287
- package/src/context/validator.ts +58 -58
- package/src/diagnostics/index.ts +122 -121
- package/src/health/index.ts +233 -232
- package/src/hooks/brain-hook.ts +134 -131
- package/src/hooks/capture.ts +168 -168
- package/src/hooks/claude-code-mastery.md +112 -112
- package/src/hooks/context-hook.ts +260 -245
- package/src/hooks/deduplicator.ts +72 -72
- package/src/hooks/git-capture.ts +109 -109
- package/src/hooks/git-hook-installer.ts +211 -207
- package/src/hooks/index.ts +20 -20
- package/src/hooks/installer.ts +306 -288
- package/src/hooks/interceptor-hook.ts +204 -201
- package/src/hooks/passive-classifier.ts +397 -397
- package/src/hooks/queue.ts +160 -129
- package/src/hooks/session-tracker.ts +312 -312
- package/src/hooks/types.ts +52 -52
- package/src/index.ts +7 -7
- package/src/intelligence/cross-project/generalizer.ts +283 -283
- package/src/intelligence/cross-project/index.ts +7 -7
- package/src/intelligence/hf-downloader.ts +222 -222
- package/src/intelligence/hf-manifest.json +78 -78
- package/src/intelligence/index.ts +24 -24
- package/src/intelligence/inference-router.ts +762 -762
- package/src/intelligence/model-manager.ts +263 -245
- package/src/intelligence/optimization/index.ts +10 -10
- package/src/intelligence/optimization/precompute.ts +202 -202
- package/src/intelligence/optimization/semantic-cache.ts +213 -207
- package/src/intelligence/prediction/index.ts +7 -7
- package/src/intelligence/prediction/recommender.ts +276 -268
- package/src/intelligence/reasoning/chain-retrieval.ts +243 -247
- package/src/intelligence/reasoning/index.ts +7 -7
- package/src/intelligence/temporal/evolution.ts +193 -197
- package/src/intelligence/temporal/index.ts +16 -16
- package/src/intelligence/temporal/query-processor.ts +190 -190
- package/src/intelligence/temporal/timeline.ts +272 -259
- package/src/intelligence/temporal/trends.ts +263 -263
- package/src/intelligence/tokenizer.ts +118 -118
- package/src/knowledge/entity-extractor.ts +447 -443
- package/src/knowledge/graph/builder.ts +185 -185
- package/src/knowledge/graph/linker.ts +201 -201
- package/src/knowledge/graph/memory-graph.ts +359 -359
- package/src/knowledge/graph/schema.ts +99 -99
- package/src/knowledge/graph/search.ts +166 -166
- package/src/knowledge/relationship-extractor.ts +108 -108
- package/src/memory/chroma/client.ts +211 -192
- package/src/memory/chroma/collection-manager.ts +92 -92
- package/src/memory/chroma/config.ts +57 -57
- package/src/memory/chroma/embeddings.ts +177 -175
- package/src/memory/chroma/index.ts +82 -82
- package/src/memory/chroma/migration.ts +270 -270
- package/src/memory/chroma/schemas.ts +69 -69
- package/src/memory/chroma/search.ts +319 -315
- package/src/memory/chroma/store.ts +755 -747
- package/src/memory/compression.ts +121 -121
- package/src/memory/consolidation/archiver.ts +162 -165
- package/src/memory/consolidation/merger.ts +182 -186
- package/src/memory/consolidation/scorer.ts +136 -136
- package/src/memory/database.ts +9 -0
- package/src/memory/dual-write.ts +145 -0
- package/src/memory/embeddings.ts +226 -226
- package/src/memory/episodic/detector.ts +108 -108
- package/src/memory/episodic/manager.ts +347 -351
- package/src/memory/episodic/summarizer.ts +179 -179
- package/src/memory/episodic/types.ts +52 -52
- package/src/memory/fts5-search.ts +692 -633
- package/src/memory/index.ts +943 -1060
- package/src/memory/migrations/add-fts5.ts +118 -108
- package/src/memory/patterns.ts +438 -438
- package/src/memory/pruning.ts +60 -60
- package/src/memory/schema.ts +88 -88
- package/src/memory/store.ts +911 -787
- package/src/orchestrator/handlers/decision-handler.ts +204 -204
- package/src/packs/index.ts +9 -9
- package/src/packs/loader.ts +134 -134
- package/src/packs/manager.ts +204 -204
- package/src/packs/ranker.ts +78 -78
- package/src/packs/types.ts +81 -81
- package/src/phase12/index.ts +5 -5
- package/src/retrieval/bm25/index.ts +300 -297
- package/src/retrieval/bm25/tokenizer.ts +184 -184
- package/src/retrieval/feedback/adaptive.ts +221 -221
- package/src/retrieval/feedback/index.ts +16 -16
- package/src/retrieval/feedback/metrics.ts +221 -221
- package/src/retrieval/feedback/store.ts +283 -283
- package/src/retrieval/fusion/index.ts +194 -194
- package/src/retrieval/fusion/rrf.ts +165 -165
- package/src/retrieval/index.ts +12 -12
- package/src/retrieval/pipeline.ts +375 -375
- package/src/retrieval/query/expander.ts +203 -203
- package/src/retrieval/query/index.ts +27 -27
- package/src/retrieval/query/intent-classifier.ts +252 -252
- package/src/retrieval/query/temporal-parser.ts +295 -295
- package/src/retrieval/reranker/index.ts +189 -188
- package/src/retrieval/reranker/model.ts +99 -95
- package/src/retrieval/service.ts +125 -125
- package/src/retrieval/types.ts +162 -162
- package/src/routing/entity-extractor.ts +454 -454
- package/src/routing/handlers/exploration-handler.ts +369 -0
- package/src/routing/handlers/index.ts +19 -0
- package/src/routing/handlers/memory-handler.ts +273 -0
- package/src/routing/handlers/mutation-handler.ts +241 -0
- package/src/routing/handlers/recall-handler.ts +642 -0
- package/src/routing/handlers/shared.ts +515 -0
- package/src/routing/handlers/types.ts +48 -0
- package/src/routing/intent-classifier.ts +552 -552
- package/src/routing/response-filter.ts +399 -391
- package/src/routing/router.ts +245 -2193
- package/src/routing/search-engine.ts +521 -514
- package/src/routing/types.ts +104 -94
- package/src/scripts/health-check.ts +118 -118
- package/src/scripts/setup.ts +122 -122
- package/src/server/auto-updater.ts +283 -276
- package/src/server/handlers/call-tool.ts +159 -159
- package/src/server/handlers/list-tools.ts +35 -35
- package/src/server/handlers/tools/auto-remember.ts +165 -165
- package/src/server/handlers/tools/brain.ts +86 -86
- package/src/server/handlers/tools/create-project.ts +135 -135
- package/src/server/handlers/tools/get-code-standards.ts +123 -123
- package/src/server/handlers/tools/get-corrections.ts +152 -152
- package/src/server/handlers/tools/get-patterns.ts +156 -156
- package/src/server/handlers/tools/get-project-context.ts +75 -75
- package/src/server/handlers/tools/index.ts +30 -30
- package/src/server/handlers/tools/init-project.ts +756 -756
- package/src/server/handlers/tools/list-projects.ts +126 -126
- package/src/server/handlers/tools/recall-similar.ts +87 -87
- package/src/server/handlers/tools/recognize-pattern.ts +132 -132
- package/src/server/handlers/tools/record-correction.ts +131 -131
- package/src/server/handlers/tools/remember-decision.ts +168 -168
- package/src/server/handlers/tools/schemas.ts +179 -179
- package/src/server/handlers/tools/search-code.ts +122 -122
- package/src/server/handlers/tools/smart-context.ts +146 -146
- package/src/server/handlers/tools/update-progress.ts +131 -131
- package/src/server/http-api.ts +215 -1229
- package/src/server/mcp-proxy.ts +85 -84
- package/src/server/mcp-server.ts +285 -284
- package/src/server/middleware/auth.ts +39 -0
- package/src/server/middleware/error-handler.ts +37 -0
- package/src/server/middleware/rate-limit.ts +53 -0
- package/src/server/middleware/validate.ts +42 -0
- package/src/server/pid-manager.ts +137 -136
- package/src/server/providers/resources.ts +581 -581
- package/src/server/routes/code.ts +228 -0
- package/src/server/routes/context.ts +26 -0
- package/src/server/routes/health.ts +19 -0
- package/src/server/routes/helpers.ts +100 -0
- package/src/server/routes/hooks.ts +197 -0
- package/src/server/routes/mcp.ts +47 -0
- package/src/server/routes/memory.ts +397 -0
- package/src/server/routes/models.ts +96 -0
- package/src/server/routes/projects.ts +89 -0
- package/src/server/routes/types.ts +21 -0
- package/src/server/schemas/api-schemas.ts +202 -0
- package/src/server/services.ts +720 -720
- package/src/server/utils/memory-indicator.ts +84 -84
- package/src/server/utils/response-formatter.ts +129 -129
- package/src/server/web-viewer.ts +1145 -1115
- package/src/setup/index.ts +38 -38
- package/src/tools/registry.ts +115 -115
- package/src/tools/schemas.ts +666 -666
- package/src/tools/types.ts +412 -412
- package/src/training/data-store.ts +320 -298
- package/src/training/retrain-pipeline.ts +399 -394
- package/src/utils/error-handler.ts +136 -136
- package/src/utils/index.ts +58 -58
- package/src/utils/kill-port.ts +55 -53
- package/src/utils/phase12-helper.ts +56 -56
- package/src/utils/safe-path.ts +43 -0
- package/src/utils/timing.ts +47 -47
- package/src/utils/transaction.ts +63 -63
- package/src/vault/index.ts +4 -3
- package/src/vault/paths.ts +106 -106
- package/src/vault/query.ts +4 -1
- package/src/vault/reader.ts +44 -1
- package/src/vault/watcher.ts +24 -1
- package/src/vault/writer.ts +487 -413
- package/skills/persistent-memory/SKILL.md +0 -148
- package/skills/persistent-memory/references/tool-reference.md +0 -90
|
@@ -1,245 +1,263 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Model Manager — SLM Upgrade Phase 4A
|
|
3
|
-
* Discovers, loads, and caches ONNX models from ~/.claude-brain/models/
|
|
4
|
-
* Lazy loading: models load on first use, not at startup.
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import { existsSync, readFileSync } from 'node:fs'
|
|
8
|
-
import { join } from 'node:path'
|
|
9
|
-
import type { Logger } from 'pino'
|
|
10
|
-
import { getHomePaths } from '@/config/home'
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
export interface
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
private
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
const
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
const
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Model Manager — SLM Upgrade Phase 4A
|
|
3
|
+
* Discovers, loads, and caches ONNX models from ~/.claude-brain/models/
|
|
4
|
+
* Lazy loading: models load on first use, not at startup.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { existsSync, readFileSync } from 'node:fs'
|
|
8
|
+
import { join } from 'node:path'
|
|
9
|
+
import type { Logger } from 'pino'
|
|
10
|
+
import { getHomePaths } from '@/config/home'
|
|
11
|
+
|
|
12
|
+
/** Minimal shape for dynamically imported onnxruntime-node/web */
|
|
13
|
+
interface OnnxRuntime {
|
|
14
|
+
InferenceSession: {
|
|
15
|
+
create(path: string): Promise<OnnxSession>
|
|
16
|
+
}
|
|
17
|
+
Tensor: new (type: string, data: BigInt64Array, dims: number[]) => unknown
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface OnnxSession {
|
|
21
|
+
inputNames?: string[]
|
|
22
|
+
run(feeds: Record<string, unknown>): Promise<Record<string, { data: Float32Array }>>
|
|
23
|
+
release?(): Promise<void>
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export type ModelTask = 'intent' | 'entity' | 'query' | 'knowledge' | 'compress' | 'pattern'
|
|
27
|
+
|
|
28
|
+
export interface ModelManifestEntry {
|
|
29
|
+
version: string
|
|
30
|
+
file: string
|
|
31
|
+
sha256?: string
|
|
32
|
+
params?: string
|
|
33
|
+
accuracy?: number
|
|
34
|
+
labels?: string[]
|
|
35
|
+
maxSeqLen?: number
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export interface ModelManifest {
|
|
39
|
+
models: Partial<Record<ModelTask, ModelManifestEntry>>
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface LoadedModel {
|
|
43
|
+
session: OnnxSession
|
|
44
|
+
manifest: ModelManifestEntry
|
|
45
|
+
loadedAt: number
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export class ModelManager {
|
|
49
|
+
private modelsDir: string
|
|
50
|
+
private manifest: ModelManifest | null = null
|
|
51
|
+
private loadedModels = new Map<ModelTask, LoadedModel>()
|
|
52
|
+
private onnxRuntime: unknown = null
|
|
53
|
+
private onnxAvailable: boolean | null = null
|
|
54
|
+
private logger: Logger
|
|
55
|
+
|
|
56
|
+
constructor(logger: Logger, modelsDir?: string) {
|
|
57
|
+
this.logger = logger.child({ component: 'model-manager' })
|
|
58
|
+
this.modelsDir = modelsDir || getHomePaths().models
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Check if ONNX Runtime is available (installed as optional dep)
|
|
63
|
+
* Tries onnxruntime-node first (faster native), falls back to onnxruntime-web (WASM)
|
|
64
|
+
*/
|
|
65
|
+
private async checkOnnxRuntime(): Promise<boolean> {
|
|
66
|
+
if (this.onnxAvailable !== null) return this.onnxAvailable
|
|
67
|
+
// Try native node bindings first
|
|
68
|
+
try {
|
|
69
|
+
this.onnxRuntime = await import('onnxruntime-node')
|
|
70
|
+
this.onnxAvailable = true
|
|
71
|
+
this.logger.debug('ONNX Runtime (native) available')
|
|
72
|
+
return true
|
|
73
|
+
} catch {
|
|
74
|
+
// Native not available, try WASM fallback
|
|
75
|
+
}
|
|
76
|
+
try {
|
|
77
|
+
this.onnxRuntime = await import('onnxruntime-web')
|
|
78
|
+
this.onnxAvailable = true
|
|
79
|
+
this.logger.debug('ONNX Runtime (WASM) available')
|
|
80
|
+
} catch {
|
|
81
|
+
this.onnxAvailable = false
|
|
82
|
+
this.logger.debug('ONNX Runtime not installed — models will not load')
|
|
83
|
+
}
|
|
84
|
+
return this.onnxAvailable
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Load the manifest.json from the models directory
|
|
89
|
+
*/
|
|
90
|
+
private loadManifest(): ModelManifest | null {
|
|
91
|
+
if (this.manifest) return this.manifest
|
|
92
|
+
const manifestPath = join(this.modelsDir, 'manifest.json')
|
|
93
|
+
if (!existsSync(manifestPath)) {
|
|
94
|
+
this.logger.debug({ modelsDir: this.modelsDir }, 'No manifest.json found')
|
|
95
|
+
return null
|
|
96
|
+
}
|
|
97
|
+
try {
|
|
98
|
+
const raw = readFileSync(manifestPath, 'utf-8')
|
|
99
|
+
this.manifest = JSON.parse(raw) as ModelManifest
|
|
100
|
+
return this.manifest
|
|
101
|
+
} catch (error) {
|
|
102
|
+
this.logger.warn({ error }, 'Failed to parse manifest.json')
|
|
103
|
+
return null
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Check if a model file exists for a given task (without loading it)
|
|
109
|
+
*/
|
|
110
|
+
hasModel(task: ModelTask): boolean {
|
|
111
|
+
const manifest = this.loadManifest()
|
|
112
|
+
if (!manifest?.models[task]) return false
|
|
113
|
+
const entry = manifest.models[task]!
|
|
114
|
+
return existsSync(join(this.modelsDir, entry.file))
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Get manifest entry for a task
|
|
119
|
+
*/
|
|
120
|
+
getManifestEntry(task: ModelTask): ModelManifestEntry | null {
|
|
121
|
+
const manifest = this.loadManifest()
|
|
122
|
+
return manifest?.models[task] ?? null
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Lazy-load a model on first use. Returns null if unavailable.
|
|
127
|
+
*/
|
|
128
|
+
async loadModel(task: ModelTask): Promise<LoadedModel | null> {
|
|
129
|
+
// Return cached model
|
|
130
|
+
if (this.loadedModels.has(task)) return this.loadedModels.get(task)!
|
|
131
|
+
|
|
132
|
+
// Check prerequisites
|
|
133
|
+
if (!(await this.checkOnnxRuntime())) return null
|
|
134
|
+
const manifest = this.loadManifest()
|
|
135
|
+
if (!manifest?.models[task]) return null
|
|
136
|
+
|
|
137
|
+
const entry = manifest.models[task]!
|
|
138
|
+
const modelPath = join(this.modelsDir, entry.file)
|
|
139
|
+
if (!existsSync(modelPath)) {
|
|
140
|
+
this.logger.debug({ task, file: entry.file }, 'Model file not found')
|
|
141
|
+
return null
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
try {
|
|
145
|
+
const startMs = Date.now()
|
|
146
|
+
const ort = this.onnxRuntime as OnnxRuntime
|
|
147
|
+
const session = await ort.InferenceSession.create(modelPath)
|
|
148
|
+
const loaded: LoadedModel = {
|
|
149
|
+
session,
|
|
150
|
+
manifest: entry,
|
|
151
|
+
loadedAt: Date.now(),
|
|
152
|
+
}
|
|
153
|
+
this.loadedModels.set(task, loaded)
|
|
154
|
+
this.logger.info({ task, file: entry.file, loadMs: Date.now() - startMs }, 'Model loaded')
|
|
155
|
+
return loaded
|
|
156
|
+
} catch (err) {
|
|
157
|
+
const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
|
|
158
|
+
this.logger.warn({ error, task, file: entry.file }, 'Failed to load model')
|
|
159
|
+
return null
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Run inference on a loaded model. Returns raw output tensor data.
|
|
165
|
+
* Callers (InferenceRouter) handle task-specific pre/post processing.
|
|
166
|
+
* Automatically detects which inputs the model accepts (input_ids, attention_mask).
|
|
167
|
+
*/
|
|
168
|
+
async infer(task: ModelTask, inputIds: number[], attentionMask?: number[]): Promise<Float32Array | null> {
|
|
169
|
+
const model = await this.loadModel(task)
|
|
170
|
+
if (!model) return null
|
|
171
|
+
|
|
172
|
+
try {
|
|
173
|
+
const ort = this.onnxRuntime as OnnxRuntime
|
|
174
|
+
const OrtTensor = ort.Tensor
|
|
175
|
+
const inputTensor = new OrtTensor('int64', BigInt64Array.from(inputIds.map(BigInt)), [1, inputIds.length])
|
|
176
|
+
const feeds: Record<string, unknown> = { input_ids: inputTensor }
|
|
177
|
+
|
|
178
|
+
// Only pass attention_mask if the model actually accepts it
|
|
179
|
+
const modelInputNames = model.session.inputNames ?? []
|
|
180
|
+
if (attentionMask && modelInputNames.includes('attention_mask')) {
|
|
181
|
+
const maskTensor = new OrtTensor('int64', BigInt64Array.from(attentionMask.map(BigInt)), [1, attentionMask.length])
|
|
182
|
+
feeds.attention_mask = maskTensor
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const results = await model.session.run(feeds)
|
|
186
|
+
|
|
187
|
+
// Most classification models output 'logits'
|
|
188
|
+
const outputKey = Object.keys(results)[0]
|
|
189
|
+
if (!outputKey) return null
|
|
190
|
+
const output = results[outputKey]
|
|
191
|
+
if (!output) return null
|
|
192
|
+
return output.data as Float32Array
|
|
193
|
+
} catch (err) {
|
|
194
|
+
const error = err instanceof Error ? { message: err.message, stack: err.stack } : err
|
|
195
|
+
this.logger.warn({ error, task }, 'Inference failed')
|
|
196
|
+
return null
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Public accessor for the models directory path
|
|
202
|
+
*/
|
|
203
|
+
getModelsDir(): string {
|
|
204
|
+
return this.modelsDir
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Public check for ONNX Runtime availability
|
|
209
|
+
*/
|
|
210
|
+
async isOnnxAvailable(): Promise<boolean> {
|
|
211
|
+
return this.checkOnnxRuntime()
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Get status of all models (for CLI and health checks)
|
|
216
|
+
*/
|
|
217
|
+
getStatus(): Record<ModelTask, { available: boolean; loaded: boolean; version?: string; accuracy?: number }> {
|
|
218
|
+
const tasks: ModelTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
|
|
219
|
+
const status = {} as Record<ModelTask, { available: boolean; loaded: boolean; version?: string; accuracy?: number }>
|
|
220
|
+
|
|
221
|
+
for (const task of tasks) {
|
|
222
|
+
const entry = this.getManifestEntry(task)
|
|
223
|
+
status[task] = {
|
|
224
|
+
available: this.hasModel(task),
|
|
225
|
+
loaded: this.loadedModels.has(task),
|
|
226
|
+
version: entry?.version,
|
|
227
|
+
accuracy: entry?.accuracy,
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return status
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Unload a single model, releasing its ONNX session to free native memory.
|
|
235
|
+
*/
|
|
236
|
+
async unloadModel(task: ModelTask): Promise<void> {
|
|
237
|
+
const loaded = this.loadedModels.get(task)
|
|
238
|
+
if (!loaded) return
|
|
239
|
+
|
|
240
|
+
try {
|
|
241
|
+
await loaded.session?.release?.()
|
|
242
|
+
} catch (err) {
|
|
243
|
+
this.logger.warn({ error: err, task }, 'Failed to release ONNX session')
|
|
244
|
+
}
|
|
245
|
+
this.loadedModels.delete(task)
|
|
246
|
+
this.logger.debug({ task }, 'Model unloaded')
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Unload all models, releasing ONNX sessions to free native memory.
|
|
251
|
+
*/
|
|
252
|
+
async unloadAll(): Promise<void> {
|
|
253
|
+
for (const [task, loaded] of this.loadedModels) {
|
|
254
|
+
try {
|
|
255
|
+
await loaded.session?.release?.()
|
|
256
|
+
} catch (err) {
|
|
257
|
+
this.logger.warn({ error: err, task }, 'Failed to release ONNX session during unloadAll')
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
this.loadedModels.clear()
|
|
261
|
+
this.logger.debug('All models unloaded')
|
|
262
|
+
}
|
|
263
|
+
}
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Optimization Module
|
|
3
|
-
* Phase 15.5 - Semantic caching and precomputation
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
export { SemanticCache } from './semantic-cache'
|
|
7
|
-
export type { CachedResult, CacheStats } from './semantic-cache'
|
|
8
|
-
|
|
9
|
-
export { PrecomputeEngine } from './precompute'
|
|
10
|
-
export type { PrecomputedStats, ProjectStats } from './precompute'
|
|
1
|
+
/**
|
|
2
|
+
* Optimization Module
|
|
3
|
+
* Phase 15.5 - Semantic caching and precomputation
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export { SemanticCache } from './semantic-cache'
|
|
7
|
+
export type { CachedResult, CacheStats } from './semantic-cache'
|
|
8
|
+
|
|
9
|
+
export { PrecomputeEngine } from './precompute'
|
|
10
|
+
export type { PrecomputedStats, ProjectStats } from './precompute'
|