claude-brain 0.29.1 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 0.29.1
1
+ 0.30.0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-brain",
3
- "version": "0.29.1",
3
+ "version": "0.30.0",
4
4
  "description": "Local development assistant bridging Obsidian vaults with Claude Code via MCP",
5
5
  "type": "module",
6
6
  "main": "src/index.ts",
@@ -9,6 +9,7 @@
9
9
  },
10
10
  "files": [
11
11
  "src/**/*.ts",
12
+ "src/intelligence/hf-manifest.json",
12
13
  "src/hooks/claude-code-mastery.md",
13
14
  "scripts/postinstall.mjs",
14
15
  "packs/",
@@ -16,10 +16,13 @@ import { join } from 'node:path'
16
16
  import { homedir } from 'node:os'
17
17
  import { parseArgs } from 'citty'
18
18
  import { renderLogo, theme, heading, dimText, successText, warningText, errorText, box, summaryPanel } from '@/cli/ui/index.js'
19
+ import { progressBar } from '@/cli/ui/components.js'
19
20
  import { getHomePaths, getClaudeBrainHome } from '@/config/home'
20
21
  import { getTrainingStats, type TrainingTask } from '@/training/data-store'
21
22
  import type { ModelManifest, ModelManifestEntry, ModelTask } from '@/intelligence/model-manager'
22
23
  import { shouldRetrain, retrainTask, retrainAll, type RetrainConfig } from '@/training/retrain-pipeline'
24
+ import { downloadFromHuggingFace, type HfManifest } from '@/intelligence/hf-downloader'
25
+ import hfManifestData from '@/intelligence/hf-manifest.json'
23
26
 
24
27
  const ALL_TASKS: ModelTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
25
28
 
@@ -38,7 +41,7 @@ export async function runModels() {
38
41
  subcommand: { type: 'positional', required: false, description: 'Subcommand: list, status, download, enable, disable, benchmark, stats, retrain' },
39
42
  taskArg: { type: 'positional', required: false, description: 'Task name or "all" (for enable/disable/benchmark/retrain)' },
40
43
  task: { type: 'string', description: 'Target task (for download --task)' },
41
- source: { type: 'string', description: 'Source: local (default) or release' },
44
+ source: { type: 'string', description: 'Source: local (default) or hf (Hugging Face Hub)' },
42
45
  force: { type: 'boolean', description: 'Force retrain even if checks say not needed' },
43
46
  })
44
47
 
@@ -256,7 +259,7 @@ function loadManifest(): ModelManifest | null {
256
259
 
257
260
  // ─── download ─────────────────────────────────────────────────────
258
261
 
259
- function downloadModels(taskFilter: string, source: string) {
262
+ async function downloadModels(taskFilter: string, source: string) {
260
263
  console.log()
261
264
  console.log(renderLogo())
262
265
  console.log()
@@ -280,17 +283,9 @@ function downloadModels(taskFilter: string, source: string) {
280
283
  console.log(successText(`Created models directory: ${paths.models}`))
281
284
  }
282
285
 
283
- // Release source not yet implemented
284
- if (source === 'release') {
285
- console.log(
286
- box(
287
- 'Downloading from release artifacts is not yet available.\n' +
288
- 'Use --source local to install from ~/slm-training/models/ instead.',
289
- 'Coming Soon'
290
- )
291
- )
292
- console.log()
293
- return
286
+ // Hugging Face Hub source
287
+ if (source === 'hf' || source === 'release') {
288
+ return downloadFromHF(tasks, paths.models)
294
289
  }
295
290
 
296
291
  // Local source — copy from ~/slm-training/models/
@@ -409,6 +404,109 @@ function downloadModels(taskFilter: string, source: string) {
409
404
  console.log()
410
405
  }
411
406
 
407
+ // ─── download from HF ─────────────────────────────────────────────
408
+
409
+ const isTTY = process.stdout.isTTY === true
410
+
411
+ async function downloadFromHF(tasks: ModelTask[], modelsDir: string) {
412
+ const manifest = hfManifestData as HfManifest
413
+
414
+ // Compute total download size
415
+ let totalSize = 0
416
+ for (const task of tasks) {
417
+ const entry = manifest.models[task]
418
+ if (entry) totalSize += entry.size
419
+ }
420
+
421
+ console.log(` ${dimText('Source:')} Hugging Face Hub (${manifest.hfRepo})`)
422
+ console.log(` ${dimText('Target:')} ${modelsDir}`)
423
+ console.log(` ${dimText('Models:')} ${tasks.join(', ')}`)
424
+ console.log(` ${dimText('Total:')} ~${formatBytes(totalSize)}`)
425
+ console.log()
426
+
427
+ const results = await downloadFromHuggingFace(manifest, {
428
+ destDir: modelsDir,
429
+ tasks,
430
+ onProgress(task, downloaded, total) {
431
+ if (isTTY && total > 0) {
432
+ const pct = (downloaded / total) * 100
433
+ const bar = progressBar(pct, 25)
434
+ process.stdout.write(`\r ${task.padEnd(12)} ${bar} ${formatBytes(downloaded)} / ${formatBytes(total)}`)
435
+ }
436
+ },
437
+ onComplete(task, bytes) {
438
+ if (isTTY) process.stdout.write('\r' + ' '.repeat(80) + '\r')
439
+ console.log(` ${successText(task.padEnd(12))} ${formatBytes(bytes)} ${dimText('SHA256 verified')}`)
440
+ },
441
+ onError(task, error) {
442
+ if (isTTY) process.stdout.write('\r' + ' '.repeat(80) + '\r')
443
+ console.log(` ${errorText(task.padEnd(12))} ${error}`)
444
+ },
445
+ })
446
+
447
+ console.log()
448
+
449
+ const succeeded = results.filter(r => r.success)
450
+ if (succeeded.length === 0) {
451
+ console.log(warningText('No models were downloaded.'))
452
+ console.log()
453
+ return
454
+ }
455
+
456
+ // Build manifest from downloaded results + existing
457
+ const manifestPath = join(modelsDir, 'manifest.json')
458
+ const manifestModels: Partial<Record<ModelTask, ModelManifestEntry>> = {}
459
+
460
+ // Preserve existing manifest entries
461
+ if (existsSync(manifestPath)) {
462
+ try {
463
+ const existing: ModelManifest = JSON.parse(readFileSync(manifestPath, 'utf-8'))
464
+ if (existing.models) Object.assign(manifestModels, existing.models)
465
+ } catch { /* overwrite corrupt */ }
466
+ }
467
+
468
+ const installedTasks: ModelTask[] = []
469
+ let totalBytes = 0
470
+
471
+ for (const result of succeeded) {
472
+ const task = result.task as ModelTask
473
+ const entry = manifest.models[task]
474
+ if (!entry) continue
475
+
476
+ manifestModels[task] = {
477
+ version: entry.version,
478
+ file: entry.file,
479
+ sha256: entry.sha256,
480
+ params: entry.params,
481
+ accuracy: entry.accuracy ?? undefined,
482
+ labels: entry.labels,
483
+ maxSeqLen: entry.maxSeqLen,
484
+ }
485
+ installedTasks.push(task)
486
+ totalBytes += result.bytes
487
+ }
488
+
489
+ // Write manifest
490
+ const localManifest: ModelManifest = { models: manifestModels }
491
+ writeFileSync(manifestPath, JSON.stringify(localManifest, null, 2))
492
+
493
+ // Auto-enable models in config
494
+ const config = loadConfigFile()
495
+ if (!config.slm) config.slm = {}
496
+ config.slm.enabled = true
497
+ if (!config.slm.tasks) config.slm.tasks = {}
498
+ for (const task of installedTasks) {
499
+ config.slm.tasks[task] = 'model'
500
+ }
501
+ saveConfigFile(config)
502
+ updateConfigYml(installedTasks, 'model')
503
+
504
+ console.log(successText(`Downloaded ${succeeded.length} model${succeeded.length !== 1 ? 's' : ''} (${formatBytes(totalBytes)})`))
505
+ console.log(successText(`Auto-enabled ${installedTasks.join(', ')} in config`))
506
+ console.log(dimText(`Manifest written to ${manifestPath}`))
507
+ console.log()
508
+ }
509
+
412
510
  // ─── enable ───────────────────────────────────────────────────────
413
511
 
414
512
  function enableTask(taskArg: string) {
@@ -783,7 +881,7 @@ function printModelsHelp() {
783
881
  const subcommands = [
784
882
  ['list', 'Show installed models and their status'],
785
883
  ['status', 'Show inference routing and ONNX runtime status'],
786
- ['download', 'Download pre-trained models (--task <task>|all)'],
884
+ ['download', 'Download models (--source local|hf, --task <task>|all)'],
787
885
  ['enable <task|all>', 'Enable model inference for task(s)'],
788
886
  ['disable <task|all>', 'Disable model inference for task(s)'],
789
887
  ['benchmark <task>', 'Run accuracy benchmark on test data'],
@@ -805,6 +903,9 @@ function printModelsHelp() {
805
903
  console.log(theme.bold('Examples:'))
806
904
  console.log(` ${dimText('claude-brain models list')}`)
807
905
  console.log(` ${dimText('claude-brain models status')}`)
906
+ console.log(` ${dimText('claude-brain models download --source hf')}`)
907
+ console.log(` ${dimText('claude-brain models download --source hf --task intent')}`)
908
+ console.log(` ${dimText('claude-brain models download --source local')}`)
808
909
  console.log(` ${dimText('claude-brain models enable all')}`)
809
910
  console.log(` ${dimText('claude-brain models enable intent')}`)
810
911
  console.log(` ${dimText('claude-brain models disable pattern')}`)
@@ -148,7 +148,7 @@ export async function loadConfig(basePath: string = process.cwd()): Promise<Conf
148
148
  const fileConfig = loadFromFile(basePath)
149
149
  const envConfig = loadFromEnv()
150
150
 
151
- const merged = mergeConfigs(defaultConfig as Partial<Config>, envConfig, fileConfig)
151
+ const merged = mergeConfigs(defaultConfig as Partial<Config>, fileConfig, envConfig)
152
152
 
153
153
  const result = ConfigSchema.safeParse(merged)
154
154
 
@@ -362,6 +362,8 @@ export const ConfigSchema = z.object({
362
362
  enabled: z.boolean().default(false),
363
363
  /** Directory containing ONNX model files */
364
364
  modelsDir: z.string().default('~/.claude-brain/models'),
365
+ /** Hugging Face repo for downloading pre-trained models */
366
+ hfRepo: z.string().default('demgun101/claude-brain-models'),
365
367
  /** Minimum model confidence to use model prediction (below → regex fallback) */
366
368
  confidenceThreshold: z.number().min(0).max(1).default(0.7),
367
369
  /** Per-task mode: 'model' uses model only, 'regex' uses regex only, 'both' runs both and logs comparison */
@@ -211,7 +211,7 @@ export class PassiveClassifier {
211
211
  const packages = match[1]?.trim()
212
212
  if (packages) {
213
213
  return {
214
- type: 'progress',
214
+ type: 'decision',
215
215
  confidence: 0.85,
216
216
  content: `Installed package(s): ${packages}`,
217
217
  project: this.extractProjectFromCwd(input.cwd),
@@ -0,0 +1,222 @@
1
+ /**
2
+ * Hugging Face Hub Downloader — streams ONNX models from HF with SHA256 verification.
3
+ * Atomic writes via temp file + rename. Inline retry with exponential backoff.
4
+ */
5
+
6
+ import { createHash } from 'node:crypto'
7
+ import { existsSync, mkdirSync, renameSync, unlinkSync } from 'node:fs'
8
+ import { join } from 'node:path'
9
+ import type { ModelTask } from '@/intelligence/model-manager'
10
+
11
+ export interface HfManifestEntry {
12
+ file: string
13
+ metaFile: string
14
+ sha256: string
15
+ metaSha256: string
16
+ size: number
17
+ version: string
18
+ params: string
19
+ accuracy: number | null
20
+ labels: string[]
21
+ maxSeqLen: number
22
+ }
23
+
24
+ export interface HfManifest {
25
+ hfRepo: string
26
+ hfBranch: string
27
+ models: Record<string, HfManifestEntry>
28
+ }
29
+
30
+ export interface DownloadOptions {
31
+ /** Target directory to write models into */
32
+ destDir: string
33
+ /** Tasks to download (subset of manifest keys) */
34
+ tasks: ModelTask[]
35
+ /** HF repo override (default from manifest) */
36
+ hfRepo?: string
37
+ /** HF branch override (default from manifest) */
38
+ hfBranch?: string
39
+ /** Progress callback: task name, bytes downloaded so far, total bytes */
40
+ onProgress?: (task: string, downloaded: number, total: number) => void
41
+ /** Called when a task completes */
42
+ onComplete?: (task: string, bytes: number) => void
43
+ /** Called on error */
44
+ onError?: (task: string, error: string) => void
45
+ }
46
+
47
+ export interface DownloadResult {
48
+ task: string
49
+ success: boolean
50
+ bytes: number
51
+ error?: string
52
+ }
53
+
54
+ const MAX_RETRIES = 3
55
+ const RETRY_DELAYS = [2000, 4000, 8000]
56
+ const DOWNLOAD_TIMEOUT_MS = 300_000 // 5 minutes per file
57
+
58
+ /**
59
+ * Build HF resolve URL for a file.
60
+ * Pattern: https://huggingface.co/{repo}/resolve/{branch}/{filename}
61
+ */
62
+ function hfUrl(repo: string, branch: string, filename: string): string {
63
+ return `https://huggingface.co/${repo}/resolve/${branch}/${filename}`
64
+ }
65
+
66
+ /**
67
+ * Stream-download a single file with SHA256 verification and atomic write.
68
+ * Returns the number of bytes written.
69
+ */
70
+ async function downloadFile(
71
+ url: string,
72
+ destPath: string,
73
+ expectedSha256: string,
74
+ expectedSize: number,
75
+ onProgress?: (downloaded: number, total: number) => void,
76
+ ): Promise<number> {
77
+ const tempPath = `${destPath}.download`
78
+
79
+ // Clean up any leftover temp file
80
+ if (existsSync(tempPath)) {
81
+ unlinkSync(tempPath)
82
+ }
83
+
84
+ const response = await fetch(url, {
85
+ signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT_MS),
86
+ })
87
+
88
+ if (!response.ok) {
89
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`)
90
+ }
91
+
92
+ if (!response.body) {
93
+ throw new Error('Response body is null')
94
+ }
95
+
96
+ const contentLength = parseInt(response.headers.get('content-length') || '0', 10) || expectedSize
97
+ const hash = createHash('sha256')
98
+ const writer = Bun.file(tempPath).writer()
99
+ let downloaded = 0
100
+
101
+ const reader = response.body.getReader()
102
+
103
+ try {
104
+ while (true) {
105
+ const { done, value } = await reader.read()
106
+ if (done) break
107
+
108
+ writer.write(value)
109
+ hash.update(value)
110
+ downloaded += value.byteLength
111
+
112
+ if (onProgress) {
113
+ onProgress(downloaded, contentLength)
114
+ }
115
+ }
116
+ } finally {
117
+ await writer.end()
118
+ }
119
+
120
+ // Verify SHA256
121
+ const actualSha256 = hash.digest('hex')
122
+ if (actualSha256 !== expectedSha256) {
123
+ // Clean up bad file
124
+ if (existsSync(tempPath)) unlinkSync(tempPath)
125
+ throw new Error(
126
+ `SHA256 mismatch: expected ${expectedSha256.slice(0, 12)}..., got ${actualSha256.slice(0, 12)}...`
127
+ )
128
+ }
129
+
130
+ // Atomic rename
131
+ renameSync(tempPath, destPath)
132
+ return downloaded
133
+ }
134
+
135
+ /**
136
+ * Download a single file with retry logic.
137
+ */
138
+ async function downloadWithRetry(
139
+ url: string,
140
+ destPath: string,
141
+ expectedSha256: string,
142
+ expectedSize: number,
143
+ onProgress?: (downloaded: number, total: number) => void,
144
+ ): Promise<number> {
145
+ let lastError: Error | null = null
146
+
147
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
148
+ try {
149
+ return await downloadFile(url, destPath, expectedSha256, expectedSize, onProgress)
150
+ } catch (err) {
151
+ lastError = err instanceof Error ? err : new Error(String(err))
152
+
153
+ // Clean up temp file on failure
154
+ const tempPath = `${destPath}.download`
155
+ if (existsSync(tempPath)) {
156
+ try { unlinkSync(tempPath) } catch { /* ignore */ }
157
+ }
158
+
159
+ if (attempt < MAX_RETRIES) {
160
+ await new Promise(resolve => setTimeout(resolve, RETRY_DELAYS[attempt]))
161
+ }
162
+ }
163
+ }
164
+
165
+ throw lastError!
166
+ }
167
+
168
+ /**
169
+ * Main entry point: download models from Hugging Face Hub.
170
+ */
171
+ export async function downloadFromHuggingFace(
172
+ manifest: HfManifest,
173
+ options: DownloadOptions,
174
+ ): Promise<DownloadResult[]> {
175
+ const repo = options.hfRepo || manifest.hfRepo
176
+ const branch = options.hfBranch || manifest.hfBranch
177
+ const results: DownloadResult[] = []
178
+
179
+ // Ensure dest directory exists
180
+ if (!existsSync(options.destDir)) {
181
+ mkdirSync(options.destDir, { recursive: true })
182
+ }
183
+
184
+ for (const task of options.tasks) {
185
+ const entry = manifest.models[task]
186
+ if (!entry) {
187
+ const err = `No manifest entry for task "${task}"`
188
+ options.onError?.(task, err)
189
+ results.push({ task, success: false, bytes: 0, error: err })
190
+ continue
191
+ }
192
+
193
+ try {
194
+ // Download ONNX model
195
+ const modelUrl = hfUrl(repo, branch, entry.file)
196
+ const modelDest = join(options.destDir, entry.file)
197
+
198
+ const bytes = await downloadWithRetry(
199
+ modelUrl,
200
+ modelDest,
201
+ entry.sha256,
202
+ entry.size,
203
+ (downloaded, total) => options.onProgress?.(task, downloaded, total),
204
+ )
205
+
206
+ // Download metadata JSON
207
+ const metaUrl = hfUrl(repo, branch, entry.metaFile)
208
+ const metaDest = join(options.destDir, entry.metaFile)
209
+
210
+ await downloadWithRetry(metaUrl, metaDest, entry.metaSha256, 0)
211
+
212
+ options.onComplete?.(task, bytes)
213
+ results.push({ task, success: true, bytes })
214
+ } catch (err) {
215
+ const msg = err instanceof Error ? err.message : String(err)
216
+ options.onError?.(task, msg)
217
+ results.push({ task, success: false, bytes: 0, error: msg })
218
+ }
219
+ }
220
+
221
+ return results
222
+ }
@@ -0,0 +1,78 @@
1
+ {
2
+ "hfRepo": "demgun101/claude-brain-models",
3
+ "hfBranch": "main",
4
+ "models": {
5
+ "intent": {
6
+ "file": "intent.onnx",
7
+ "metaFile": "intent.json",
8
+ "sha256": "f276ec091712f53ceeccfdca70d468d0b5aa6da7ee5c4bd7867a7464a9732dd2",
9
+ "metaSha256": "d5702fd45e5685187e74810da75e41be9d12d9fd82b7ccd7244a9f0d33076c65",
10
+ "size": 120073283,
11
+ "version": "0.1.0",
12
+ "params": "nanoGPT-intent",
13
+ "accuracy": 0.9904,
14
+ "labels": ["session_start", "context_needed", "decision_made", "store_this", "pattern_found", "mistake_learned", "progress_update", "question", "comparison", "exploration", "list_all", "update_memory", "delete_memory", "detail_request", "timeline", "no_action"],
15
+ "maxSeqLen": 128
16
+ },
17
+ "entity": {
18
+ "file": "entity.onnx",
19
+ "metaFile": "entity.json",
20
+ "sha256": "d15706b78112e8dda3caa7e054237106b67200f90a1e6e0e4336526468dea8b5",
21
+ "metaSha256": "1de0636bc3bd9de23c0ce767a148d6becad7b6add6a0a717449a7ac185296d55",
22
+ "size": 120058476,
23
+ "version": "0.1.0",
24
+ "params": "nanoGPT-entity",
25
+ "accuracy": 0.9795,
26
+ "labels": ["O", "B-TECH", "I-TECH", "B-PROJECT", "I-PROJECT", "B-CONCEPT", "I-CONCEPT"],
27
+ "maxSeqLen": 128
28
+ },
29
+ "query": {
30
+ "file": "query.onnx",
31
+ "metaFile": "query.json",
32
+ "sha256": "802446105ad873b4a172dc875c07e245882f48691f72ae0abb0bda5934fa084a",
33
+ "metaSha256": "32393875ff58a87da83480673868e4ac14c3fd1a59e16836d7c1f2fa02a7f25e",
34
+ "size": 120057880,
35
+ "version": "0.1.0",
36
+ "params": "nanoGPT-query",
37
+ "accuracy": 0.985,
38
+ "labels": ["factual", "procedural", "comparative", "temporal", "exploratory", "decision"],
39
+ "maxSeqLen": 128
40
+ },
41
+ "knowledge": {
42
+ "file": "knowledge.onnx",
43
+ "metaFile": "knowledge.json",
44
+ "sha256": "a786eefc5ef5c6f2fc132c6de7f0972891057f5a993c9c2d9264207d36165035",
45
+ "metaSha256": "6aa77194cbd8c10a2451958ec5a3e43328df97ee8aea46abf53e3e432f3f3c4d",
46
+ "size": 120056340,
47
+ "version": "0.1.0",
48
+ "params": "nanoGPT-knowledge",
49
+ "accuracy": 0.998,
50
+ "labels": ["fact", "preference", "constraint", "goal", "definition"],
51
+ "maxSeqLen": 128
52
+ },
53
+ "compress": {
54
+ "file": "compress.onnx",
55
+ "metaFile": "compress.json",
56
+ "sha256": "2d950a0e0a2cdc5dc90b7c44803c5fb81c76d824bd208c64a7104c5845e1c237",
57
+ "metaSha256": "e91500c665ec47083bb6b3ff8c83d529f263792d83f7f169401267ceb1e8d031",
58
+ "size": 357902441,
59
+ "version": "0.1.0",
60
+ "params": "nanoGPT-compress",
61
+ "accuracy": null,
62
+ "labels": [],
63
+ "maxSeqLen": 256
64
+ },
65
+ "pattern": {
66
+ "file": "pattern.onnx",
67
+ "metaFile": "pattern.json",
68
+ "sha256": "3b44371eaef11fb8ccc4c1636d6cfbdcdf62ae8bc6ffc808ff0ff45b24824fde",
69
+ "metaSha256": "1126f3fa9a115b4d26063e006e7d33b9e82b7740194ac86b5dae9f25d5dfd1a2",
70
+ "size": 254526620,
71
+ "version": "0.1.0",
72
+ "params": "nanoGPT-pattern",
73
+ "accuracy": 0.8667,
74
+ "labels": ["solution", "anti-pattern", "best-practice", "common-issue"],
75
+ "maxSeqLen": 128
76
+ }
77
+ }
78
+ }
@@ -213,9 +213,32 @@ export class ModelManager {
213
213
  }
214
214
 
215
215
  /**
216
- * Unload all models (for cleanup/testing)
216
+ * Unload a single model, releasing its ONNX session to free native memory.
217
217
  */
218
- unloadAll(): void {
218
+ async unloadModel(task: ModelTask): Promise<void> {
219
+ const loaded = this.loadedModels.get(task)
220
+ if (!loaded) return
221
+
222
+ try {
223
+ await loaded.session?.release?.()
224
+ } catch (err) {
225
+ this.logger.warn({ error: err, task }, 'Failed to release ONNX session')
226
+ }
227
+ this.loadedModels.delete(task)
228
+ this.logger.debug({ task }, 'Model unloaded')
229
+ }
230
+
231
+ /**
232
+ * Unload all models, releasing ONNX sessions to free native memory.
233
+ */
234
+ async unloadAll(): Promise<void> {
235
+ for (const [task, loaded] of this.loadedModels) {
236
+ try {
237
+ await loaded.session?.release?.()
238
+ } catch (err) {
239
+ this.logger.warn({ error: err, task }, 'Failed to release ONNX session during unloadAll')
240
+ }
241
+ }
219
242
  this.loadedModels.clear()
220
243
  this.logger.debug('All models unloaded')
221
244
  }
@@ -697,9 +697,9 @@ export async function shutdownServices(): Promise<void> {
697
697
  }
698
698
  }
699
699
 
700
- // Unload SLM models
700
+ // Unload SLM models (releases ONNX sessions)
701
701
  if (services.modelManager) {
702
- services.modelManager.unloadAll()
702
+ await services.modelManager.unloadAll()
703
703
  serviceLogger.info('SLM models unloaded')
704
704
  }
705
705