@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,1213 @@
1
+ /**
2
+ * Coordinates which model is currently loaded into the plugin-local-ai
3
+ * runtime. Eliza runs one inference model at a time; switching models
4
+ * unloads the previous one first so we don't double-allocate VRAM.
5
+ *
6
+ * This module *does not* talk to `capacitor-llama` directly. The plugin
7
+ * owns the native binding; we ask it to swap via a small runtime service
8
+ * registered under the name "localInferenceLoader". When the plugin is not
9
+ * enabled, we still track the user's preferred active model so the
10
+ * preference survives enabling the plugin later.
11
+ */
12
+
13
+ import { existsSync, readFileSync } from "node:fs";
14
+ import {
15
+ dirname as pathDirname,
16
+ join as pathJoin,
17
+ resolve as pathResolve,
18
+ } from "node:path";
19
+ import type { AgentRuntime } from "@elizaos/core";
20
+ import {
21
+ ELIZA_1_PLACEHOLDER_IDS,
22
+ FIRST_RUN_DEFAULT_MODEL_ID,
23
+ findCatalogModel,
24
+ } from "./catalog";
25
+ import { localInferenceEngine } from "./engine";
26
+ import { probeHardware } from "./hardware";
27
+ import type { Eliza1Manifest } from "./manifest";
28
+ import {
29
+ assessRamFit,
30
+ defaultManifestLoader,
31
+ type ManifestLoader,
32
+ pickFittingContextVariant,
33
+ type RamFitOptions,
34
+ } from "./ram-budget";
35
+ import { recommendForFirstRun } from "./recommendation";
36
+ import { touchElizaModel } from "./registry";
37
+ import type {
38
+ ActiveModelState,
39
+ CatalogModel,
40
+ HardwareProbe,
41
+ InstalledModel,
42
+ } from "./types";
43
+ import {
44
+ assessVoiceBundleFits,
45
+ VOICE_ENSEMBLE_BUDGETS,
46
+ type VoiceTierSlot,
47
+ } from "./voice/voice-budget";
48
+
49
+ export type { KvOffloadMode, LocalInferenceLoadArgs } from "./load-args.js";
50
+ export {
51
+ ELIZA_1_PLACEHOLDER_IDS,
52
+ FIRST_RUN_DEFAULT_MODEL_ID,
53
+ recommendForFirstRun,
54
+ };
55
+
56
+ import type { KvOffloadMode, LocalInferenceLoadArgs } from "./load-args.js";
57
+
58
+ /**
59
+ * Allow-list for KV cache type strings. The eliza fork of node-llama-cpp
60
+ * (v3.18.1-eliza.3+) extends `GgmlType` with TBQ3_0 (43), TBQ4_0 (44),
61
+ * QJL1_256 (46), Q4_POLAR (47) so the binding accepts the lowercase
62
+ * aliases below. Whether the C++ kernel actually runs depends on the
63
+ * loaded the legacy node-llama-cpp NAPI prebuild (no longer used) binary — the elizaOS/llama.cpp
64
+ * prebuild ships the kernels; upstream's prebuild does not.
65
+ *
66
+ * `validateLocalInferenceLoadArgs({ allowFork: false })` (the route-layer
67
+ * default) still throws on these strings so a UI/API caller can't land
68
+ * the desktop on a kernel that won't run; `allowFork: true` (the AOSP +
69
+ * resolved-args path) lets them through.
70
+ */
71
+ const FORK_ONLY_KV_CACHE_TYPES = new Set([
72
+ "tbq1_0",
73
+ "tbq2_0",
74
+ "tbq3_0",
75
+ "tbq4_0",
76
+ "tbq3_0_tcq",
77
+ "turbo2",
78
+ "turbo3",
79
+ "turbo4",
80
+ "turbo2_0",
81
+ "turbo3_0",
82
+ "turbo4_0",
83
+ "turbo2_tcq",
84
+ "turbo3_tcq",
85
+ "qjl1_256",
86
+ "qjl1_512",
87
+ "q4_polar",
88
+ ]);
89
+
90
+ const STOCK_KV_CACHE_TYPES = new Set([
91
+ "f16",
92
+ "f32",
93
+ "bf16",
94
+ "q4_0",
95
+ "q4_1",
96
+ "q5_0",
97
+ "q5_1",
98
+ "q8_0",
99
+ "q4_k",
100
+ "q5_k",
101
+ "q6_k",
102
+ "q8_k",
103
+ "iq4_nl",
104
+ ]);
105
+
106
+ export function isForkOnlyKvCacheType(name: string | undefined): boolean {
107
+ if (!name) return false;
108
+ return FORK_ONLY_KV_CACHE_TYPES.has(name.trim().toLowerCase());
109
+ }
110
+
111
+ export function isStockKvCacheType(name: string | undefined): boolean {
112
+ if (!name) return false;
113
+ return STOCK_KV_CACHE_TYPES.has(name.trim().toLowerCase());
114
+ }
115
+
116
+ /**
117
+ * Validate per-load overrides against what the in-process backend can
118
+ * honour. The AOSP loader has its own (broader) acceptance set — pass
119
+ * `{ allowFork: true }` to skip the desktop-only restriction.
120
+ *
121
+ * Throws on the first illegal value so the caller (the API route) can
122
+ * surface a 400 with a useful message instead of letting the load slip
123
+ * through and silently degrade to fp16.
124
+ */
125
+ export function validateLocalInferenceLoadArgs(
126
+ args: Partial<LocalInferenceLoadArgs>,
127
+ options: { allowFork?: boolean } = {},
128
+ ): void {
129
+ const allowFork = options.allowFork === true;
130
+ for (const field of ["cacheTypeK", "cacheTypeV"] as const) {
131
+ const value = args[field];
132
+ if (value === undefined) continue;
133
+ if (typeof value !== "string" || value.length === 0) {
134
+ throw new Error(`${field} must be a non-empty string`);
135
+ }
136
+ if (!allowFork && isForkOnlyKvCacheType(value)) {
137
+ throw new Error(
138
+ `${field}="${value}" requires the elizaOS/llama.cpp kernel from the elizaOS fork. The elizaOS/capacitor-llama binding accepts the string at the TS layer, but the upstream @node-llama-cpp/<platform> prebuild does not implement the underlying ggml type. Pass through the AOSP path or load the elizaOS/llama.cpp prebuilt binary. Stock-only types accepted here: ${[...STOCK_KV_CACHE_TYPES].join(", ")}.`,
139
+ );
140
+ }
141
+ if (!allowFork && !isStockKvCacheType(value)) {
142
+ throw new Error(
143
+ `${field}="${value}" is not a recognised KV cache type. Stock builds accept ${[...STOCK_KV_CACHE_TYPES].join(", ")}.`,
144
+ );
145
+ }
146
+ if (
147
+ allowFork &&
148
+ !isStockKvCacheType(value) &&
149
+ !isForkOnlyKvCacheType(value)
150
+ ) {
151
+ throw new Error(
152
+ `${field}="${value}" is not a recognised KV cache type. Accepted stock types: ${[...STOCK_KV_CACHE_TYPES].join(", ")}. Accepted elizaOS fork types: ${[...FORK_ONLY_KV_CACHE_TYPES].join(", ")}.`,
153
+ );
154
+ }
155
+ }
156
+ if (args.contextSize !== undefined) {
157
+ if (
158
+ typeof args.contextSize !== "number" ||
159
+ !Number.isInteger(args.contextSize) ||
160
+ args.contextSize < 256
161
+ ) {
162
+ throw new Error(
163
+ `contextSize must be a positive integer >= 256 (got ${String(args.contextSize)})`,
164
+ );
165
+ }
166
+ }
167
+ if (args.gpuLayers !== undefined) {
168
+ if (
169
+ typeof args.gpuLayers !== "number" ||
170
+ !Number.isInteger(args.gpuLayers) ||
171
+ args.gpuLayers < 0
172
+ ) {
173
+ throw new Error(
174
+ `gpuLayers must be a non-negative integer (got ${String(args.gpuLayers)})`,
175
+ );
176
+ }
177
+ }
178
+ if (args.kvOffload !== undefined) {
179
+ const v = args.kvOffload;
180
+ if (typeof v === "string") {
181
+ if (v !== "cpu" && v !== "gpu" && v !== "split") {
182
+ throw new Error(
183
+ `kvOffload must be "cpu", "gpu", "split", or { gpuLayers: number } (got "${v}")`,
184
+ );
185
+ }
186
+ } else if (
187
+ !v ||
188
+ typeof v !== "object" ||
189
+ typeof (v as { gpuLayers?: unknown }).gpuLayers !== "number"
190
+ ) {
191
+ throw new Error(
192
+ `kvOffload must be "cpu", "gpu", "split", or { gpuLayers: number }`,
193
+ );
194
+ }
195
+ }
196
+ for (const field of ["flashAttention", "mmap", "mlock"] as const) {
197
+ const value = args[field];
198
+ if (value === undefined) continue;
199
+ if (typeof value !== "boolean") {
200
+ throw new Error(`${field} must be a boolean`);
201
+ }
202
+ }
203
+ }
204
+
205
+ export interface LocalInferenceLoader {
206
+ loadModel(args: LocalInferenceLoadArgs): Promise<void>;
207
+ unloadModel(): Promise<void>;
208
+ currentModelPath(): string | null;
209
+ /**
210
+ * Optional generation surface. When a loader implements this, the runtime
211
+ * handler (`ensure-local-inference-handler.ts`) routes TEXT_SMALL /
212
+ * TEXT_LARGE requests through it instead of the standalone engine. Mobile
213
+ * builds populate this via the Capacitor adapter; desktop omits it and falls
214
+ * back to the `LocalInferenceEngine`.
215
+ */
216
+ generate?(args: {
217
+ prompt: string;
218
+ stopSequences?: string[];
219
+ maxTokens?: number;
220
+ temperature?: number;
221
+ /**
222
+ * Optional `promptCacheKey` from the runtime cache plan. Loaders
223
+ * that implement prefix caching (the in-process llama.cpp FFI slot
224
+ * pool or node-llama-cpp session pool) use this to pin
225
+ * subsequent calls with the same key to the same KV cache slot.
226
+ * Loaders without prefix caching can ignore the field.
227
+ */
228
+ cacheKey?: string;
229
+ }): Promise<string>;
230
+ /**
231
+ * Optional embedding surface. When a loader implements this, the runtime
232
+ * handler routes `TEXT_EMBEDDING` requests through it. The AOSP bun:ffi
233
+ * loader populates this directly via `llama_get_embeddings_seq`; the
234
+ * device-bridge loader populates it by dispatching an `embed` frame to
235
+ * the connected device. Loaders that cannot embed leave this undefined,
236
+ * and the runtime falls back to its non-local embedding provider chain.
237
+ */
238
+ embed?(args: { input: string }): Promise<{
239
+ embedding: number[];
240
+ tokens: number;
241
+ }>;
242
+ }
243
+
244
+ /**
245
+ * Per-load override fields the caller can set. Subset of `LocalInferenceLoadArgs`
246
+ * minus `modelPath` (which the coordinator owns) and minus speculative
247
+ * fields (which the catalog `runtime.mtp` block owns end-to-end). The
248
+ * route layer accepts this shape on `POST /api/local-inference/active`.
249
+ */
250
+ export interface LocalInferenceLoadOverrides {
251
+ contextSize?: number;
252
+ cacheTypeK?: string;
253
+ cacheTypeV?: string;
254
+ gpuLayers?: number;
255
+ kvOffload?: KvOffloadMode;
256
+ flashAttention?: boolean;
257
+ mmap?: boolean;
258
+ mlock?: boolean;
259
+ useGpu?: boolean;
260
+ maxThreads?: number;
261
+ }
262
+
263
+ interface ResolveLocalInferenceLoadArgsOptions {
264
+ manifestLoader?: ManifestLoader;
265
+ }
266
+
267
+ function bundleRootForInstalledModel(installed: InstalledModel): string {
268
+ return installed.bundleRoot ?? pathDirname(pathDirname(installed.path));
269
+ }
270
+
271
+ function manifestTextContextForInstalledPath(
272
+ installed: InstalledModel,
273
+ manifest: Eliza1Manifest,
274
+ ): number | undefined {
275
+ const modelPath = pathResolve(installed.path);
276
+ const bundleRoot = bundleRootForInstalledModel(installed);
277
+ for (const entry of manifest.files.text) {
278
+ if (
279
+ typeof entry.ctx !== "number" ||
280
+ !Number.isInteger(entry.ctx) ||
281
+ entry.ctx < 256
282
+ ) {
283
+ continue;
284
+ }
285
+ if (pathResolve(bundleRoot, entry.path) === modelPath) {
286
+ return entry.ctx;
287
+ }
288
+ }
289
+ return undefined;
290
+ }
291
+
292
+ function candidateManifestPaths(installed: InstalledModel): string[] {
293
+ const candidates = [
294
+ installed.manifestPath,
295
+ installed.bundleRoot
296
+ ? pathJoin(installed.bundleRoot, "eliza-1.manifest.json")
297
+ : undefined,
298
+ pathJoin(pathDirname(pathDirname(installed.path)), "eliza-1.manifest.json"),
299
+ pathJoin(pathDirname(installed.path), "eliza-1.manifest.json"),
300
+ ];
301
+ return [...new Set(candidates.filter((p): p is string => Boolean(p)))];
302
+ }
303
+
304
+ function readLegacyStagedManifestTextContext(
305
+ installed: InstalledModel,
306
+ ): number | undefined {
307
+ if (installed.source !== "eliza-download") return undefined;
308
+ const modelPath = pathResolve(installed.path);
309
+ const bundleRoot = bundleRootForInstalledModel(installed);
310
+
311
+ for (const manifestPath of candidateManifestPaths(installed)) {
312
+ let parsed: unknown;
313
+ try {
314
+ parsed = JSON.parse(readFileSync(manifestPath, "utf8"));
315
+ } catch {
316
+ continue;
317
+ }
318
+ if (!parsed || typeof parsed !== "object") continue;
319
+ const raw = parsed as {
320
+ id?: unknown;
321
+ version?: unknown;
322
+ defaultEligible?: unknown;
323
+ files?: { text?: unknown };
324
+ };
325
+ if (typeof raw.id === "string" && raw.id !== installed.id) continue;
326
+ const version = typeof raw.version === "string" ? raw.version : "";
327
+ const stagedOrCandidate =
328
+ raw.defaultEligible === false ||
329
+ /(?:candidate|staged|dev|local)/i.test(version);
330
+ if (!stagedOrCandidate) continue;
331
+ if (!Array.isArray(raw.files?.text)) continue;
332
+ for (const entry of raw.files.text) {
333
+ if (!entry || typeof entry !== "object") continue;
334
+ const file = entry as { path?: unknown; ctx?: unknown };
335
+ if (typeof file.path !== "string") continue;
336
+ if (
337
+ typeof file.ctx !== "number" ||
338
+ !Number.isInteger(file.ctx) ||
339
+ file.ctx < 256
340
+ ) {
341
+ continue;
342
+ }
343
+ if (pathResolve(bundleRoot, file.path) === modelPath) {
344
+ return file.ctx;
345
+ }
346
+ }
347
+ }
348
+ return undefined;
349
+ }
350
+
351
+ function installedBundleContextSize(
352
+ installed: InstalledModel,
353
+ manifestLoader: ManifestLoader,
354
+ ): number | undefined {
355
+ const manifest = manifestLoader(installed.id, installed);
356
+ if (manifest) {
357
+ const contextSize = manifestTextContextForInstalledPath(
358
+ installed,
359
+ manifest,
360
+ );
361
+ if (contextSize !== undefined) return contextSize;
362
+ }
363
+ return readLegacyStagedManifestTextContext(installed);
364
+ }
365
+
366
+ function applyCatalogDefaults(
367
+ args: LocalInferenceLoadArgs,
368
+ installed: InstalledModel,
369
+ catalog: CatalogModel | undefined,
370
+ manifestLoader: ManifestLoader,
371
+ ): void {
372
+ const runtime = catalog?.runtime;
373
+
374
+ // KV cache types from the catalog runtime block. Per-call overrides
375
+ // take precedence and are merged in afterwards.
376
+ if (runtime?.kvCache?.typeK) args.cacheTypeK = runtime.kvCache.typeK;
377
+ if (runtime?.kvCache?.typeV) args.cacheTypeV = runtime.kvCache.typeV;
378
+
379
+ // Catalog-level model ceiling. Without a per-load override, plumb the
380
+ // model's true `contextLength` so the loader picks an appropriate
381
+ // window instead of falling back to whatever default the binding
382
+ // happens to use ("auto" → smallest fitting, which historically meant
383
+ // 4k or 8k even for 128k-trained models).
384
+ if (args.contextSize === undefined) {
385
+ args.contextSize =
386
+ installedBundleContextSize(installed, manifestLoader) ??
387
+ catalog?.contextLength;
388
+ }
389
+
390
+ // Catalog-declared GPU offload default — only apply when the caller
391
+ // didn't override `gpuLayers`. Numeric `gpuLayers` is the canonical
392
+ // shape; `"auto"` is the loader's default and we don't need to set
393
+ // anything for it.
394
+ if (
395
+ catalog?.gpuLayers !== undefined &&
396
+ typeof catalog.gpuLayers === "number" &&
397
+ args.gpuLayers === undefined
398
+ ) {
399
+ args.gpuLayers = catalog.gpuLayers;
400
+ }
401
+
402
+ // flashAttention default from catalog optimizations block. Per-load
403
+ // overrides win.
404
+ if (
405
+ runtime?.optimizations?.flashAttention !== undefined &&
406
+ args.flashAttention === undefined
407
+ ) {
408
+ args.flashAttention = runtime.optimizations.flashAttention;
409
+ }
410
+
411
+ // mmap / mlock from catalog optimizations. `noMmap === true` means
412
+ // disable mmap explicitly; otherwise leave the loader default.
413
+ if (runtime?.optimizations?.noMmap !== undefined && args.mmap === undefined) {
414
+ args.mmap = !runtime.optimizations.noMmap;
415
+ }
416
+ if (runtime?.optimizations?.mlock !== undefined && args.mlock === undefined) {
417
+ args.mlock = runtime.optimizations.mlock;
418
+ }
419
+ }
420
+
421
+ function mergeOverrides(
422
+ args: LocalInferenceLoadArgs,
423
+ overrides: LocalInferenceLoadOverrides | undefined,
424
+ ): void {
425
+ if (!overrides) return;
426
+ if (overrides.contextSize !== undefined)
427
+ args.contextSize = overrides.contextSize;
428
+ if (overrides.cacheTypeK !== undefined)
429
+ args.cacheTypeK = overrides.cacheTypeK;
430
+ if (overrides.cacheTypeV !== undefined)
431
+ args.cacheTypeV = overrides.cacheTypeV;
432
+ if (overrides.gpuLayers !== undefined) args.gpuLayers = overrides.gpuLayers;
433
+ if (overrides.kvOffload !== undefined) args.kvOffload = overrides.kvOffload;
434
+ if (overrides.flashAttention !== undefined) {
435
+ args.flashAttention = overrides.flashAttention;
436
+ }
437
+ if (overrides.mmap !== undefined) args.mmap = overrides.mmap;
438
+ if (overrides.mlock !== undefined) args.mlock = overrides.mlock;
439
+ if (overrides.useGpu !== undefined) args.useGpu = overrides.useGpu;
440
+ if (overrides.maxThreads !== undefined)
441
+ args.maxThreads = overrides.maxThreads;
442
+ }
443
+
444
+ /**
445
+ * Resolve the per-tier mmproj GGUF path for a given installed model when
446
+ * the catalog declares the tier ships a vision projector AND the file is
447
+ * actually on disk under the bundle root.
448
+ *
449
+ * Returns:
450
+ * - the absolute path to the mmproj file when the tier has vision and
451
+ * the file exists.
452
+ * - undefined when the tier has no vision component (text-only bundle)
453
+ * or when the file hasn't been downloaded yet. In the latter case
454
+ * the coordinator emits a one-shot warning; vision capability is
455
+ * unavailable for the session but the text load still succeeds.
456
+ *
457
+ * Path layout: the catalog's `sourceModel.components.vision.file` is the
458
+ * Hugging Face-relative path, e.g. `bundles/2b/vision/mmproj-2b.gguf`.
459
+ * Locally the bundleRoot already represents the per-tier "bundles/<tier>"
460
+ * subtree, so we strip the leading `bundles/<tier>/` segment before
461
+ * joining against the local bundleRoot. When that prefix isn't present
462
+ * (e.g. a custom bundle layout), we fall through to the original path
463
+ * unchanged. Manifest-validated bundles (`bundleRoot` set) are the only
464
+ * path that lands a vision component — external-scan models (LM Studio,
465
+ * Jan) don't.
466
+ */
467
+ export function resolveMmprojPath(
468
+ installed: InstalledModel,
469
+ catalog: CatalogModel | undefined,
470
+ ): string | undefined {
471
+ if (!catalog) return undefined;
472
+ const visionComponent = catalog.sourceModel?.components?.vision;
473
+ if (!visionComponent?.file) return undefined;
474
+ const bundleRoot = installed.bundleRoot;
475
+ if (!bundleRoot) return undefined;
476
+ const local = stripBundlePrefix(visionComponent.file, installed.id);
477
+ const candidate = pathJoin(bundleRoot, local);
478
+ if (!existsSync(candidate)) return undefined;
479
+ return candidate;
480
+ }
481
+
482
+ function resolveMtpDrafterPath(
483
+ installed: InstalledModel,
484
+ catalog: CatalogModel | undefined,
485
+ manifestLoader: ManifestLoader,
486
+ ): string | undefined {
487
+ const bundleRoot = installed.bundleRoot;
488
+ if (!bundleRoot) return undefined;
489
+
490
+ const manifest = manifestLoader(installed.id, installed);
491
+ for (const entry of manifest?.files.mtp ?? []) {
492
+ const candidate = pathJoin(bundleRoot, entry.path);
493
+ if (existsSync(candidate)) return candidate;
494
+ }
495
+
496
+ const catalogFile =
497
+ catalog?.runtime?.mtp?.drafterFile ??
498
+ catalog?.sourceModel?.components?.mtp?.file;
499
+ if (!catalogFile) return undefined;
500
+ const local = stripBundlePrefix(catalogFile, installed.id);
501
+ const candidate = pathJoin(bundleRoot, local);
502
+ if (!existsSync(candidate)) return undefined;
503
+ return candidate;
504
+ }
505
+
506
+ /**
507
+ * Strip the `bundles/<tier-slug>/` prefix the catalog uses for HF
508
+ * paths so the remaining string is bundle-root-relative. When the
509
+ * prefix isn't present, return the input unchanged.
510
+ */
511
+ function stripBundlePrefix(catalogFile: string, modelId: string): string {
512
+ const slug = modelId.startsWith("eliza-1-")
513
+ ? modelId.slice("eliza-1-".length)
514
+ : modelId;
515
+ const prefix = `bundles/${slug}/`;
516
+ if (catalogFile.startsWith(prefix)) {
517
+ return catalogFile.slice(prefix.length);
518
+ }
519
+ return catalogFile;
520
+ }
521
+
522
+ const DEFAULT_MOBILE_CONTEXT_CEILING = 8192;
523
+
524
+ /**
525
+ * Whether this on-device inference runtime is a memory-constrained mobile
526
+ * platform (iOS/Android). The agent runs inside the embedded engine and the
527
+ * host injects the platform marker into the process env at start; desktop and
528
+ * server have no marker, so they keep the full catalog context ceiling.
529
+ */
530
+ function isMobileLocalInferenceRuntime(): boolean {
531
+ if (typeof process === "undefined" || !process.env) return false;
532
+ const platform = (
533
+ process.env.ELIZA_MOBILE_PLATFORM ||
534
+ process.env.ELIZA_PLATFORM ||
535
+ ""
536
+ )
537
+ .trim()
538
+ .toLowerCase();
539
+ return platform === "ios" || platform === "android";
540
+ }
541
+
542
+ function mobileContextCeiling(): number {
543
+ const raw = process.env?.ELIZA_MOBILE_CONTEXT_CEILING?.trim();
544
+ const parsed = raw ? Number.parseInt(raw, 10) : Number.NaN;
545
+ return Number.isInteger(parsed) && parsed >= 256
546
+ ? parsed
547
+ : DEFAULT_MOBILE_CONTEXT_CEILING;
548
+ }
549
+
550
+ export async function resolveLocalInferenceLoadArgs(
551
+ installed: InstalledModel,
552
+ overrides?: LocalInferenceLoadOverrides,
553
+ options: ResolveLocalInferenceLoadArgsOptions = {},
554
+ ): Promise<LocalInferenceLoadArgs> {
555
+ const args: LocalInferenceLoadArgs = { modelPath: installed.path };
556
+ const catalog = findCatalogModel(installed.id);
557
+ const runtime = catalog?.runtime;
558
+ const manifestLoader = options.manifestLoader ?? defaultManifestLoader;
559
+
560
+ applyCatalogDefaults(args, installed, catalog, manifestLoader);
561
+
562
+ // WS2: when the tier declares vision and the per-tier mmproj GGUF is
563
+ // already on disk, plumb the path. The text load is never gated on
564
+ // mmproj — when the file is missing on a vision-capable tier the
565
+ // coordinator emits a one-shot warning and continues.
566
+ const mmprojPath = resolveMmprojPath(installed, catalog);
567
+ if (mmprojPath) {
568
+ args.mmprojPath = mmprojPath;
569
+ }
570
+
571
+ const mtp = runtime?.mtp;
572
+ if (mtp) {
573
+ // Native MTP launch defaults. Do NOT replace catalog `contextLength`
574
+ // here; `applyCatalogDefaults` owns the chat-side context. The MTP
575
+ // block only owns the speculative draft window.
576
+ //
577
+ // Two MTP shapes: same-file MTP embeds the NextN head in the text
578
+ // GGUF (no `drafterFile` in the catalog) and runs with no separate
579
+ // draft model; separate-drafter MTP declares a `drafterFile` and
580
+ // requires the bundled drafter GGUF to be present on disk.
581
+ const sameFileMtp = !mtp.drafterFile;
582
+ const drafterPath = sameFileMtp
583
+ ? undefined
584
+ : resolveMtpDrafterPath(installed, catalog, manifestLoader);
585
+ if (!sameFileMtp && installed.bundleRoot && !drafterPath) {
586
+ throw new Error(
587
+ `[local-inference] ${installed.id} declares a separate-drafter MTP but no bundled drafter GGUF was found under ${installed.bundleRoot}`,
588
+ );
589
+ }
590
+ args.useGpu = true;
591
+ args.draftModelPath = drafterPath;
592
+ args.draftMin = mtp.draftMin;
593
+ args.draftMax = mtp.draftMax;
594
+ args.speculativeSamples = mtp.draftMax;
595
+ args.mobileSpeculative = true;
596
+ }
597
+
598
+ mergeOverrides(args, overrides);
599
+
600
+ // Mobile context ceiling. A 128k-trained model's catalog `contextLength`
601
+ // (e.g. 131072) implies a multi-GB KV cache; loading it at full width on a
602
+ // phone is impractically slow and OOMs, so the on-device agent's first reply
603
+ // never lands. On iOS/Android clamp the context window (and any speculative
604
+ // draft window) to a mobile-sane ceiling so local inference is usable;
605
+ // desktop/server keep the full catalog ceiling. Override with
606
+ // ELIZA_MOBILE_CONTEXT_CEILING for capable devices.
607
+ if (args.contextSize !== undefined && isMobileLocalInferenceRuntime()) {
608
+ const ceiling = mobileContextCeiling();
609
+ if (args.contextSize > ceiling) args.contextSize = ceiling;
610
+ if (
611
+ args.draftContextSize !== undefined &&
612
+ args.draftContextSize > ceiling
613
+ ) {
614
+ args.draftContextSize = ceiling;
615
+ }
616
+ }
617
+
618
+ if (args.cacheTypeK) args.cacheTypeK = args.cacheTypeK.trim().toLowerCase();
619
+ if (args.cacheTypeV) args.cacheTypeV = args.cacheTypeV.trim().toLowerCase();
620
+
621
+ // Validate the final merged args. The route layer is the one
622
+ // that calls `validateLocalInferenceLoadArgs` with `allowFork: false`
623
+ // against just the overrides — see `local-inference-compat-routes.ts`.
624
+ validateLocalInferenceLoadArgs(args, { allowFork: true });
625
+ return args;
626
+ }
627
+
628
+ const MB_PER_GB = 1024;
629
+
630
+ export class ModelDoesNotFitError extends Error {
631
+ readonly modelId: string;
632
+ readonly requiredMb: number;
633
+ readonly usableMb: number;
634
+ readonly hostRamMb: number;
635
+ readonly fittingVariantId: string | null;
636
+
637
+ constructor(args: {
638
+ modelId: string;
639
+ requiredMb: number;
640
+ usableMb: number;
641
+ hostRamMb: number;
642
+ fittingVariantId: string | null;
643
+ }) {
644
+ const variantHint = args.fittingVariantId
645
+ ? args.fittingVariantId === args.modelId
646
+ ? ""
647
+ : ` The largest context variant of this tier that would fit is "${args.fittingVariantId}".`
648
+ : " No context variant of this tier fits this host.";
649
+ super(
650
+ `[local-inference] Model "${args.modelId}" needs ~${args.requiredMb} MB RAM to boot, but only ~${args.usableMb} MB are usable on this host (${args.hostRamMb} MB total, after the OS/runtime headroom reserve). Refusing to load it.${variantHint} Pick a smaller tier in Settings → Model Hub, or set ELIZA_LOCAL_RAM_HEADROOM_MB lower if you accept running closer to the limit.`,
651
+ );
652
+ this.name = "ModelDoesNotFitError";
653
+ this.modelId = args.modelId;
654
+ this.requiredMb = args.requiredMb;
655
+ this.usableMb = args.usableMb;
656
+ this.hostRamMb = args.hostRamMb;
657
+ this.fittingVariantId = args.fittingVariantId;
658
+ }
659
+ }
660
+
661
+ /**
662
+ * Admission gate: refuse a model load when the host can't fit the bundle's
663
+ * boot floor. `hostRamMb` is the host's total RAM in megabytes. `installed`
664
+ * is forwarded to `assessRamFit` so a manifest-declared `ramBudgetMb` wins
665
+ * over the catalog scalar. Throws `ModelDoesNotFitError` on no-fit; returns
666
+ * the (advisory) fit decision otherwise so callers can log a `tight` warning.
667
+ *
668
+ * Models with no catalog entry (external HF blobs) are not gated — the
669
+ * catalog has no RAM budget for them, so we trust the operator's explicit
670
+ * pick (the dispatcher's load-time error surfaces if it genuinely OOMs).
671
+ */
672
+ export function assertModelFitsHost(
673
+ installed: InstalledModel,
674
+ hostRamMb: number,
675
+ options: RamFitOptions = {},
676
+ ): { level: "fits" | "tight"; minMb: number; recommendedMb: number } {
677
+ const catalog = findCatalogModel(installed.id);
678
+ if (!catalog) return { level: "fits", minMb: 0, recommendedMb: 0 };
679
+ const fit = assessRamFit(catalog, hostRamMb, { ...options, installed });
680
+ if (fit.fits) {
681
+ return {
682
+ level: fit.level === "wontfit" ? "tight" : fit.level,
683
+ minMb: fit.budget.minMb,
684
+ recommendedMb: fit.budget.recommendedMb,
685
+ };
686
+ }
687
+ const fitting = pickFittingContextVariant(catalog, hostRamMb, {
688
+ ...options,
689
+ installed,
690
+ });
691
+ throw new ModelDoesNotFitError({
692
+ modelId: installed.id,
693
+ requiredMb: fit.budget.minMb,
694
+ usableMb: fit.usableMb,
695
+ hostRamMb,
696
+ fittingVariantId: fitting?.id ?? null,
697
+ });
698
+ }
699
+
700
+ /**
701
+ * Typed error for refused local-voice sessions. Mirrors
702
+ * `ModelDoesNotFitError` but at the bundle level — emitted by
703
+ * `assertVoiceBundleFitsHost` when the whole co-resident voice + text stack
704
+ * cannot fit a host's RAM (per R9 §2.3 / §3.2).
705
+ *
706
+ * Catch this at the runtime's voice-session-start boundary and surface the
707
+ * tier-warning copy (`TIER_WARNING_COPY[<tier>]`) — DO NOT load weights and
708
+ * watch `MemoryMonitor` evict mid-session.
709
+ */
710
+ export class VoiceBundleDoesNotFitError extends Error {
711
+ readonly tierSlot: string;
712
+ readonly deviceTier: string;
713
+ readonly requiredPeakMb: number;
714
+ readonly requiredSteadyStateMb: number;
715
+ readonly usableMb: number;
716
+ readonly hostRamMb: number;
717
+
718
+ constructor(args: {
719
+ tierSlot: string;
720
+ deviceTier: string;
721
+ requiredPeakMb: number;
722
+ requiredSteadyStateMb: number;
723
+ usableMb: number;
724
+ hostRamMb: number;
725
+ }) {
726
+ super(
727
+ `[local-inference] The voice bundle for tier "${args.tierSlot}" needs ~${args.requiredSteadyStateMb} MB steady-state (+~${args.requiredPeakMb - args.requiredSteadyStateMb} MB transient TTS peak) but only ~${args.usableMb} MB are usable on this host (${args.hostRamMb} MB total, after the OS/runtime headroom reserve). Refusing to start local voice; the runtime should fall back to cloud TTS+ASR or refuse the user-facing action.`,
728
+ );
729
+ this.name = "VoiceBundleDoesNotFitError";
730
+ this.tierSlot = args.tierSlot;
731
+ this.deviceTier = args.deviceTier;
732
+ this.requiredPeakMb = args.requiredPeakMb;
733
+ this.requiredSteadyStateMb = args.requiredSteadyStateMb;
734
+ this.usableMb = args.usableMb;
735
+ this.hostRamMb = args.hostRamMb;
736
+ }
737
+ }
738
+
739
+ /**
740
+ * Cross-model admission gate for the local-voice session. Sums the whole
741
+ * co-resident bundle (LM + ASR + TTS + embedding + VAD +
742
+ * wake-word + turn-detector + emotion + speaker-encoder + transient TTS
743
+ * peak) and refuses entry when the host can't fit it.
744
+ *
745
+ * Returns the decision on `fits`. Throws `VoiceBundleDoesNotFitError` when
746
+ * `wontfit` (when `strict=true`, the default), or just returns the
747
+ * `wontfit` decision when `strict=false` (the runtime then logs and
748
+ * degrades silently). Pair with `TIER_WARNING_COPY[deviceTier]` for
749
+ * user-facing UX.
750
+ *
751
+ * R9 §1.4 + §2.3 + §3.2 spec.
752
+ */
753
+ export function assertVoiceBundleFitsHost(args: {
754
+ tierSlot: string;
755
+ deviceTier: string;
756
+ hostRamMb: number;
757
+ reserveMb?: number;
758
+ strict?: boolean;
759
+ }): {
760
+ level: "fits" | "tight" | "wontfit";
761
+ steadyStateMb: number;
762
+ peakMb: number;
763
+ usableMb: number;
764
+ fits: boolean;
765
+ } {
766
+ if (!(args.tierSlot in VOICE_ENSEMBLE_BUDGETS)) {
767
+ // Unknown tier slot — be permissive: the runtime hasn't built a
768
+ // canonical slot for this combination yet, and falling through to
769
+ // `assertModelFitsHost` (the per-tier check) is the right default.
770
+ return {
771
+ level: "fits",
772
+ steadyStateMb: 0,
773
+ peakMb: 0,
774
+ usableMb: Math.max(0, args.hostRamMb - (args.reserveMb ?? 1536)),
775
+ fits: true,
776
+ };
777
+ }
778
+ const decision = assessVoiceBundleFits({
779
+ tierSlot: args.tierSlot as VoiceTierSlot,
780
+ deviceTier: args.deviceTier as "MAX" | "GOOD" | "OKAY" | "POOR",
781
+ hostRamMb: args.hostRamMb,
782
+ reserveMb: args.reserveMb,
783
+ });
784
+ if (decision.level === "wontfit" && args.strict !== false) {
785
+ throw new VoiceBundleDoesNotFitError({
786
+ tierSlot: args.tierSlot,
787
+ deviceTier: args.deviceTier,
788
+ requiredPeakMb: Math.round(decision.peakMb),
789
+ requiredSteadyStateMb: Math.round(decision.steadyStateMb),
790
+ usableMb: Math.round(decision.usableMb),
791
+ hostRamMb: args.hostRamMb,
792
+ });
793
+ }
794
+ return {
795
+ level: decision.level,
796
+ steadyStateMb: decision.steadyStateMb,
797
+ peakMb: decision.peakMb,
798
+ usableMb: decision.usableMb,
799
+ fits: decision.fits,
800
+ };
801
+ }
802
+
803
+ function hostRamMbFromProbe(probe: HardwareProbe): number {
804
+ return Math.round(probe.totalRamGb * MB_PER_GB);
805
+ }
806
+
807
+ /**
808
+ * Refusal raised when activation is asked for a model whose own
809
+ * `eliza-1.manifest.json` says its text eval has not passed (`candidate.*` /
810
+ * `weights-staged.*` tiers). Carries the structured payload the route layer
811
+ * surfaces verbatim to the API consumer: `manifestVersion` so the UI can
812
+ * say "this tier isn't ready" with the actual version string, and
813
+ * `failedEvals` so the user sees which checks are still red.
814
+ *
815
+ * Why we gate here, not just at download:
816
+ * - the bundle may already be on disk (hand-staged, manually copied, or
817
+ * downloaded before a fail-state was recorded), so the download gate
818
+ * alone leaves a window where a candidate-only bundle can be flipped
819
+ * into the active model slot and silently emit `[unused]` tokens.
820
+ *
821
+ * See issue #7679 for the original symptom: the runtime activated a
822
+ * candidate `1.0.0-candidate.1` bundle whose every `evals.*.passed`
823
+ * was `false`, then served BERT/WordPiece reserved tokens (`[unused0..99]`
824
+ * / `[PAD]`) as chat output with no actionable error.
825
+ */
826
+ export class CandidateModelActivationError extends Error {
827
+ readonly modelId: string;
828
+ readonly manifestVersion: string;
829
+ readonly failedEvals: ReadonlyArray<string>;
830
+
831
+ constructor(args: {
832
+ modelId: string;
833
+ manifestVersion: string;
834
+ failedEvals: ReadonlyArray<string>;
835
+ }) {
836
+ const evalSuffix =
837
+ args.failedEvals.length > 0
838
+ ? ` Failed evals: ${args.failedEvals.join(", ")}.`
839
+ : "";
840
+ super(
841
+ `Model "${args.modelId}" is candidate-only — its manifest (version ${args.manifestVersion}) reports evals.textEval.passed=false. Refusing to activate.${evalSuffix} Wait for the publisher to flip the manifest off candidate/weights-staged and re-fetch the bundle.`,
842
+ );
843
+ this.name = "CandidateModelActivationError";
844
+ this.modelId = args.modelId;
845
+ this.manifestVersion = args.manifestVersion;
846
+ this.failedEvals = args.failedEvals;
847
+ }
848
+ }
849
+
850
+ /**
851
+ * Activation eval gate. Reads the installed bundle's manifest and refuses
852
+ * activation when `evals.textEval.passed` is not `true`. A bundle with no
853
+ * `eliza-1.manifest.json` on disk (third-party HF GGUFs, external scans,
854
+ * pre-bundle installs) is *not* gated — the gate only applies to bundles
855
+ * that ship a published manifest, which is the source of truth for the
856
+ * publish state.
857
+ *
858
+ * Throws `CandidateModelActivationError` on a failing manifest; returns
859
+ * silently otherwise.
860
+ */
861
+ export function assertManifestEvalsPassed(
862
+ installed: InstalledModel,
863
+ manifestLoader: ManifestLoader = defaultManifestLoader,
864
+ ): void {
865
+ const manifest = manifestLoader(installed.id, installed);
866
+ if (!manifest) return;
867
+ if (manifest.evals.textEval.passed === true) return;
868
+ throw new CandidateModelActivationError({
869
+ modelId: installed.id,
870
+ manifestVersion: manifest.version,
871
+ failedEvals: collectFailedEvalNames(manifest),
872
+ });
873
+ }
874
+
875
+ function collectFailedEvalNames(manifest: Eliza1Manifest): string[] {
876
+ const failed: string[] = [];
877
+ const evals = manifest.evals;
878
+ if (evals.textEval.passed !== true) failed.push("textEval");
879
+ if (evals.voiceRtf.passed !== true) failed.push("voiceRtf");
880
+ if (evals.e2eLoopOk !== true) failed.push("e2eLoopOk");
881
+ if (evals.thirtyTurnOk !== true) failed.push("thirtyTurnOk");
882
+ if (evals.asrWer && evals.asrWer.passed !== true) failed.push("asrWer");
883
+ if (evals.embedMteb && evals.embedMteb.passed !== true) {
884
+ failed.push("embedMteb");
885
+ }
886
+ if (evals.vadLatencyMs && evals.vadLatencyMs.passed !== true) {
887
+ failed.push("vadLatencyMs");
888
+ }
889
+ if (evals.expressive && evals.expressive.passed !== true) {
890
+ failed.push("expressive");
891
+ }
892
+ if (evals.turnDetector && evals.turnDetector.passed !== true) {
893
+ failed.push("turnDetector");
894
+ }
895
+ return failed;
896
+ }
897
+
898
+ function isLoader(value: unknown): value is LocalInferenceLoader {
899
+ if (!value || typeof value !== "object") return false;
900
+ const candidate = value as Partial<LocalInferenceLoader>;
901
+ return (
902
+ typeof candidate.loadModel === "function" &&
903
+ typeof candidate.unloadModel === "function" &&
904
+ typeof candidate.currentModelPath === "function"
905
+ );
906
+ }
907
+
908
+ export class ActiveModelCoordinator {
909
+ private state: ActiveModelState = {
910
+ modelId: null,
911
+ loadedAt: null,
912
+ status: "idle",
913
+ };
914
+
915
+ /**
916
+ * The last model that successfully reached `status: "ready"`, plus the
917
+ * inputs needed to re-load it. switchTo() tears the active model down
918
+ * before loading the new one (unload-then-load); if the new load fails we
919
+ * restore this so a failed switch never leaves the host with zero models
920
+ * loaded while a working one existed moments earlier. `null` until the
921
+ * first successful load (or after an unload).
922
+ */
923
+ private lastReady: {
924
+ installed: InstalledModel;
925
+ overrides?: LocalInferenceLoadOverrides;
926
+ state: ActiveModelState;
927
+ } | null = null;
928
+
929
+ private readonly listeners = new Set<(state: ActiveModelState) => void>();
930
+
931
+ snapshot(): ActiveModelState {
932
+ return { ...this.state };
933
+ }
934
+
935
+ subscribe(listener: (state: ActiveModelState) => void): () => void {
936
+ this.listeners.add(listener);
937
+ return () => {
938
+ this.listeners.delete(listener);
939
+ };
940
+ }
941
+
942
+ private emit(): void {
943
+ const current = { ...this.state };
944
+ for (const listener of this.listeners) {
945
+ try {
946
+ listener(current);
947
+ } catch {
948
+ this.listeners.delete(listener);
949
+ }
950
+ }
951
+ }
952
+
953
+ /**
954
+ * WS2: one-shot warning latch per (modelId) — when the tier declares
955
+ * vision but no mmproj GGUF was found on disk, log once so the
956
+ * operator sees that vision is degraded for this session. The
957
+ * arbiter's vision-describe capability stays unregistered for this
958
+ * session; plugin-vision falls back to its non-eliza-1 path.
959
+ */
960
+ private readonly warnedDegradedVisionFor = new Set<string>();
961
+
962
+ private warnIfVisionDegraded(
963
+ installed: InstalledModel,
964
+ resolvedMmprojPath: string | undefined,
965
+ ): void {
966
+ const catalog = findCatalogModel(installed.id);
967
+ const tierClaimsVision = Boolean(
968
+ catalog?.sourceModel?.components?.vision?.file,
969
+ );
970
+ if (!tierClaimsVision) return;
971
+ if (resolvedMmprojPath) return;
972
+ if (this.warnedDegradedVisionFor.has(installed.id)) return;
973
+ this.warnedDegradedVisionFor.add(installed.id);
974
+ console.warn(
975
+ `[local-inference] vision capability unavailable for tier "${installed.id}" — the bundle declares vision/mmproj but the projector GGUF is not on disk under "${installed.bundleRoot ?? "<no-bundleRoot>"}". Text and voice will continue to load; plugin-vision will fall back to its Florence-2 path. Download the per-tier mmproj-<tier>.gguf to enable native vision-describe.`,
976
+ );
977
+ }
978
+
979
+ /** Return the loader service from the current runtime, if registered. */
980
+ private getLoader(runtime: AgentRuntime | null): LocalInferenceLoader | null {
981
+ if (!runtime) return null;
982
+ const candidate = (
983
+ runtime as {
984
+ getService?: (name: string) => unknown;
985
+ }
986
+ ).getService?.("localInferenceLoader");
987
+ return isLoader(candidate) ? candidate : null;
988
+ }
989
+
990
+ async switchTo(
991
+ runtime: AgentRuntime | null,
992
+ installed: InstalledModel,
993
+ overrides?: LocalInferenceLoadOverrides,
994
+ opts: { hardware?: HardwareProbe; manifestLoader?: ManifestLoader } = {},
995
+ ): Promise<ActiveModelState> {
996
+ // Activation eval gate (#7679). Refuse to flip a candidate-only /
997
+ // weights-staged bundle into the active model slot — the manifest
998
+ // already says its text eval hasn't passed, so the only thing
999
+ // activation buys is `[unused]`/`[PAD]` tokens in chat output and
1000
+ // a confused user. Runs BEFORE the loading state is emitted so
1001
+ // the UI never shows "loading → error" for a known-bad bundle;
1002
+ // it sees the 422 from the route layer directly.
1003
+ assertManifestEvalsPassed(installed, opts.manifestLoader);
1004
+
1005
+ this.state = {
1006
+ modelId: installed.id,
1007
+ loadedAt: null,
1008
+ status: "loading",
1009
+ };
1010
+ this.emit();
1011
+
1012
+ // Prefer a runtime-registered loader (plugin-local-ai or equivalent)
1013
+ // when present — it will already have warmed up the right configuration.
1014
+ // Otherwise, fall back to the standalone engine, which is the default
1015
+ // path for users who haven't separately enabled plugin-local-ai.
1016
+ const loader = this.getLoader(runtime);
1017
+
1018
+ // Snapshot the previously-active model BEFORE the unload-then-load tears
1019
+ // it down, so a failed switch can restore it instead of leaving zero
1020
+ // models loaded under the requested id.
1021
+ const previous = this.lastReady;
1022
+ let previousDisplaced = false;
1023
+
1024
+ try {
1025
+ const ready = await this.performLoad(
1026
+ loader,
1027
+ installed,
1028
+ overrides,
1029
+ opts,
1030
+ () => {
1031
+ previousDisplaced = true;
1032
+ },
1033
+ );
1034
+ this.state = ready;
1035
+ this.lastReady = { installed, overrides, state: ready };
1036
+ } catch (err) {
1037
+ const failure = err instanceof Error ? err.message : String(err);
1038
+ if (previous) {
1039
+ previousDisplaced =
1040
+ (loader?.currentModelPath() ??
1041
+ localInferenceEngine.currentModelPath()) !==
1042
+ previous.installed.path;
1043
+ }
1044
+ // Attempt to restore the previously-active model. The unload-then-load
1045
+ // already tore it down, so without this the host has no model loaded.
1046
+ if (previous && previousDisplaced) {
1047
+ try {
1048
+ const restored = await this.performLoad(
1049
+ loader,
1050
+ previous.installed,
1051
+ previous.overrides,
1052
+ opts,
1053
+ () => {},
1054
+ );
1055
+ this.state = restored;
1056
+ this.lastReady = {
1057
+ installed: previous.installed,
1058
+ overrides: previous.overrides,
1059
+ state: restored,
1060
+ };
1061
+ console.warn(
1062
+ `[local-inference] Failed to switch to "${installed.id}" (${failure}); restored previously-active model "${previous.installed.id}".`,
1063
+ );
1064
+ this.emit();
1065
+ return this.snapshot();
1066
+ } catch (restoreErr) {
1067
+ const restoreFailure =
1068
+ restoreErr instanceof Error
1069
+ ? restoreErr.message
1070
+ : String(restoreErr);
1071
+ console.error(
1072
+ `[local-inference] Failed to switch to "${installed.id}" (${failure}) AND failed to restore "${previous.installed.id}" (${restoreFailure}). No model is loaded.`,
1073
+ );
1074
+ }
1075
+ } else if (previous) {
1076
+ // Admission/load-arg errors happen before unload, so the previous
1077
+ // model is still live. Restore the coordinator state without touching
1078
+ // the loader and surface the failed request only as a warning.
1079
+ this.state = previous.state;
1080
+ this.lastReady = previous;
1081
+ console.warn(
1082
+ `[local-inference] Refused to switch to "${installed.id}" before unloading the active model "${previous.installed.id}" (${failure}).`,
1083
+ );
1084
+ this.emit();
1085
+ return this.snapshot();
1086
+ }
1087
+ // No prior model to restore (or restore also failed): report honestly
1088
+ // that nothing is loaded rather than attributing a phantom id.
1089
+ this.lastReady = null;
1090
+ this.state = {
1091
+ modelId: null,
1092
+ loadedAt: null,
1093
+ status: "error",
1094
+ error: failure,
1095
+ };
1096
+ }
1097
+
1098
+ this.emit();
1099
+ if (installed.source === "eliza-download") {
1100
+ try {
1101
+ await touchElizaModel(installed.id);
1102
+ } catch (err) {
1103
+ console.warn(
1104
+ `[local-inference] Model "${installed.id}" loaded, but failed to update last-used metadata: ${err instanceof Error ? err.message : String(err)}`,
1105
+ );
1106
+ }
1107
+ }
1108
+ return this.snapshot();
1109
+ }
1110
+
1111
+ /**
1112
+ * Run the unload-then-load against the loader (or standalone engine) and
1113
+ * build the `status: "ready"` state. Throws on any load failure; never
1114
+ * mutates `this.state`/`this.lastReady` so callers control rollback.
1115
+ */
1116
+ private async performLoad(
1117
+ loader: LocalInferenceLoader | null,
1118
+ installed: InstalledModel,
1119
+ overrides: LocalInferenceLoadOverrides | undefined,
1120
+ opts: { hardware?: HardwareProbe; manifestLoader?: ManifestLoader },
1121
+ markPreviousDisplaced: () => void,
1122
+ ): Promise<ActiveModelState> {
1123
+ // RAM-budget admission control (W10 / J1): refuse a model that won't
1124
+ // fit this host *before* touching the loader, so we never half-load
1125
+ // and OOM. `assertModelFitsHost` throws `ModelDoesNotFitError` with
1126
+ // the specific numbers + the largest fitting variant of the tier.
1127
+ const probe = opts.hardware ?? (await probeHardware());
1128
+ const admission = assertModelFitsHost(installed, hostRamMbFromProbe(probe));
1129
+ if (admission.level === "tight") {
1130
+ console.warn(
1131
+ `[local-inference] Loading "${installed.id}" with tight RAM headroom (~${admission.minMb} MB floor, ${admission.recommendedMb} MB recommended; ${hostRamMbFromProbe(probe)} MB host). Expect swapping under sustained load.`,
1132
+ );
1133
+ }
1134
+ const resolved = await resolveLocalInferenceLoadArgs(installed, overrides);
1135
+ // WS2: warn one-shot when the tier declares vision but the
1136
+ // per-tier mmproj GGUF isn't on disk yet. The text load still
1137
+ // proceeds; vision capability is degraded for this session
1138
+ // (plugin-vision falls back to its Florence-2 path).
1139
+ this.warnIfVisionDegraded(installed, resolved.mmprojPath);
1140
+ if (loader) {
1141
+ markPreviousDisplaced();
1142
+ await loader.unloadModel();
1143
+ await loader.loadModel(resolved);
1144
+ } else {
1145
+ await localInferenceEngine.load(installed.path, resolved);
1146
+ }
1147
+ const runtimeLoad = loader
1148
+ ? null
1149
+ : localInferenceEngine.currentRuntimeLoadConfig();
1150
+ // Surface the effective load config so consumers (the benchmark
1151
+ // harness, the Settings UI, the active-model SSE) can verify the
1152
+ // requested overrides actually took hold instead of silently
1153
+ // falling back to a smaller context or fp16 KV.
1154
+ return {
1155
+ modelId: installed.id,
1156
+ loadedAt: new Date().toISOString(),
1157
+ status: "ready",
1158
+ loadedContextSize:
1159
+ runtimeLoad?.contextSize ?? resolved.contextSize ?? null,
1160
+ loadedCacheTypeK: runtimeLoad
1161
+ ? runtimeLoad.cacheTypeK
1162
+ : (resolved.cacheTypeK ?? null),
1163
+ loadedCacheTypeV: runtimeLoad
1164
+ ? runtimeLoad.cacheTypeV
1165
+ : (resolved.cacheTypeV ?? null),
1166
+ loadedGpuLayers:
1167
+ runtimeLoad !== null
1168
+ ? runtimeLoad.gpuLayers
1169
+ : typeof resolved.gpuLayers === "number"
1170
+ ? resolved.gpuLayers
1171
+ : null,
1172
+ };
1173
+ }
1174
+
1175
+ async unload(runtime: AgentRuntime | null): Promise<ActiveModelState> {
1176
+ const loader = this.getLoader(runtime);
1177
+ try {
1178
+ if (loader) {
1179
+ await loader.unloadModel();
1180
+ } else {
1181
+ await localInferenceEngine.unload();
1182
+ }
1183
+ } catch (err) {
1184
+ this.state = {
1185
+ modelId: null,
1186
+ loadedAt: null,
1187
+ status: "error",
1188
+ error: err instanceof Error ? err.message : String(err),
1189
+ loadedContextSize: null,
1190
+ loadedCacheTypeK: null,
1191
+ loadedCacheTypeV: null,
1192
+ loadedGpuLayers: null,
1193
+ };
1194
+ this.emit();
1195
+ return this.snapshot();
1196
+ }
1197
+ // The model was deliberately unloaded — drop the restore snapshot so a
1198
+ // later failed switch doesn't silently re-load a model the operator
1199
+ // asked to unload.
1200
+ this.lastReady = null;
1201
+ this.state = {
1202
+ modelId: null,
1203
+ loadedAt: null,
1204
+ status: "idle",
1205
+ loadedContextSize: null,
1206
+ loadedCacheTypeK: null,
1207
+ loadedCacheTypeV: null,
1208
+ loadedGpuLayers: null,
1209
+ };
1210
+ this.emit();
1211
+ return this.snapshot();
1212
+ }
1213
+ }