@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (893) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +157 -0
  3. package/dist/actions/generate-media.d.ts +59 -0
  4. package/dist/actions/generate-media.d.ts.map +1 -0
  5. package/dist/actions/identify-speaker.d.ts +23 -0
  6. package/dist/actions/identify-speaker.d.ts.map +1 -0
  7. package/dist/actions/transcription-control.d.ts +29 -0
  8. package/dist/actions/transcription-control.d.ts.map +1 -0
  9. package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
  10. package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
  11. package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
  12. package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
  13. package/dist/adapters/capacitor-llama/index.d.ts +18 -0
  14. package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
  15. package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
  16. package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
  17. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
  18. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
  19. package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
  20. package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
  21. package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
  22. package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
  23. package/dist/adapters/capacitor-llama/types.d.ts +338 -0
  24. package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
  25. package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
  26. package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
  27. package/dist/backends/apple-foundation.d.ts +56 -0
  28. package/dist/backends/apple-foundation.d.ts.map +1 -0
  29. package/dist/index.d.ts +8 -37
  30. package/dist/index.d.ts.map +1 -0
  31. package/dist/index.js +38979 -430
  32. package/dist/index.js.map +217 -0
  33. package/dist/local-inference-routes.d.ts +47 -0
  34. package/dist/local-inference-routes.d.ts.map +1 -0
  35. package/dist/provider.d.ts +21 -0
  36. package/dist/provider.d.ts.map +1 -0
  37. package/dist/routes/compat-helpers.d.ts +18 -0
  38. package/dist/routes/compat-helpers.d.ts.map +1 -0
  39. package/dist/routes/family-member-route.d.ts +62 -0
  40. package/dist/routes/family-member-route.d.ts.map +1 -0
  41. package/dist/routes/index.d.ts +20 -0
  42. package/dist/routes/index.d.ts.map +1 -0
  43. package/dist/routes/index.js +42040 -0
  44. package/dist/routes/index.js.map +236 -0
  45. package/dist/routes/live-diarization-route.d.ts +33 -0
  46. package/dist/routes/live-diarization-route.d.ts.map +1 -0
  47. package/dist/routes/local-inference-asr-route.d.ts +4 -0
  48. package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
  49. package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
  50. package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  51. package/dist/routes/local-inference-compat-routes.d.ts +16 -0
  52. package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
  53. package/dist/routes/local-inference-tts-route.d.ts +7 -0
  54. package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/dist/routes/native-pcm-turn-route.d.ts +3 -0
  56. package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
  57. package/dist/routes/transcript-audio-store.d.ts +15 -0
  58. package/dist/routes/transcript-audio-store.d.ts.map +1 -0
  59. package/dist/routes/transcripts-routes.d.ts +44 -0
  60. package/dist/routes/transcripts-routes.d.ts.map +1 -0
  61. package/dist/routes/voice-first-run-routes.d.ts +62 -0
  62. package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
  63. package/dist/routes/voice-models-routes.d.ts +62 -0
  64. package/dist/routes/voice-models-routes.d.ts.map +1 -0
  65. package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
  66. package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  67. package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
  68. package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
  69. package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/dist/runtime/embedding-manager-support.d.ts +77 -0
  72. package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
  73. package/dist/runtime/embedding-presets.d.ts +16 -0
  74. package/dist/runtime/embedding-presets.d.ts.map +1 -0
  75. package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
  76. package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
  77. package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
  78. package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  79. package/dist/runtime/index.d.ts +15 -0
  80. package/dist/runtime/index.d.ts.map +1 -0
  81. package/dist/runtime/index.js +38768 -0
  82. package/dist/runtime/index.js.map +217 -0
  83. package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
  84. package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  85. package/dist/runtime/voice-entity-binding.d.ts +113 -0
  86. package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
  87. package/dist/services/active-model.d.ts +310 -0
  88. package/dist/services/active-model.d.ts.map +1 -0
  89. package/dist/services/asr-provenance.d.ts +5 -0
  90. package/dist/services/asr-provenance.d.ts.map +1 -0
  91. package/dist/services/assignments.d.ts +84 -0
  92. package/dist/services/assignments.d.ts.map +1 -0
  93. package/dist/services/backend-selector.d.ts +55 -0
  94. package/dist/services/backend-selector.d.ts.map +1 -0
  95. package/dist/services/backend.d.ts +440 -0
  96. package/dist/services/backend.d.ts.map +1 -0
  97. package/dist/services/bionic-host-loader.d.ts +67 -0
  98. package/dist/services/bionic-host-loader.d.ts.map +1 -0
  99. package/dist/services/bundled-models.d.ts +34 -0
  100. package/dist/services/bundled-models.d.ts.map +1 -0
  101. package/dist/services/cache-bridge.d.ts +206 -0
  102. package/dist/services/cache-bridge.d.ts.map +1 -0
  103. package/dist/services/catalog.d.ts +10 -0
  104. package/dist/services/catalog.d.ts.map +1 -0
  105. package/dist/services/checkpoint-client.d.ts +109 -0
  106. package/dist/services/checkpoint-client.d.ts.map +1 -0
  107. package/dist/services/checkpoint-manager.d.ts +217 -0
  108. package/dist/services/checkpoint-manager.d.ts.map +1 -0
  109. package/dist/services/cloud-fallback.d.ts +102 -0
  110. package/dist/services/cloud-fallback.d.ts.map +1 -0
  111. package/dist/services/context-fit.d.ts +36 -0
  112. package/dist/services/context-fit.d.ts.map +1 -0
  113. package/dist/services/conversation-registry.d.ts +142 -0
  114. package/dist/services/conversation-registry.d.ts.map +1 -0
  115. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
  116. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  117. package/dist/services/device-bridge.d.ts +188 -0
  118. package/dist/services/device-bridge.d.ts.map +1 -0
  119. package/dist/services/device-resource-metrics.d.ts +149 -0
  120. package/dist/services/device-resource-metrics.d.ts.map +1 -0
  121. package/dist/services/device-tier.d.ts +133 -0
  122. package/dist/services/device-tier.d.ts.map +1 -0
  123. package/dist/services/downloader.d.ts +94 -0
  124. package/dist/services/downloader.d.ts.map +1 -0
  125. package/dist/services/engine.d.ts +579 -0
  126. package/dist/services/engine.d.ts.map +1 -0
  127. package/dist/services/ensure-local-artifacts.d.ts +82 -0
  128. package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
  129. package/dist/services/external-scanner.d.ts +17 -0
  130. package/dist/services/external-scanner.d.ts.map +1 -0
  131. package/dist/services/ffi-llm-mock.d.ts +90 -0
  132. package/dist/services/ffi-llm-mock.d.ts.map +1 -0
  133. package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
  134. package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
  135. package/dist/services/ffi-streaming-backend.d.ts +201 -0
  136. package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
  137. package/dist/services/ffi-streaming-runner.d.ts +146 -0
  138. package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
  139. package/dist/services/gpu-autotune.d.ts +150 -0
  140. package/dist/services/gpu-autotune.d.ts.map +1 -0
  141. package/dist/services/gpu-detect.d.ts +56 -0
  142. package/dist/services/gpu-detect.d.ts.map +1 -0
  143. package/dist/services/handler-registry.d.ts +72 -0
  144. package/dist/services/handler-registry.d.ts.map +1 -0
  145. package/dist/services/hardware.d.ts +63 -0
  146. package/dist/services/hardware.d.ts.map +1 -0
  147. package/dist/services/image-description-runtime.d.ts +14 -0
  148. package/dist/services/image-description-runtime.d.ts.map +1 -0
  149. package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
  150. package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  151. package/dist/services/imagegen/backend-selector.d.ts +118 -0
  152. package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
  153. package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
  154. package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  155. package/dist/services/imagegen/errors.d.ts +16 -0
  156. package/dist/services/imagegen/errors.d.ts.map +1 -0
  157. package/dist/services/imagegen/index.d.ts +58 -0
  158. package/dist/services/imagegen/index.d.ts.map +1 -0
  159. package/dist/services/imagegen/mflux.d.ts +74 -0
  160. package/dist/services/imagegen/mflux.d.ts.map +1 -0
  161. package/dist/services/imagegen/sd-cpp.d.ts +181 -0
  162. package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
  163. package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  164. package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  165. package/dist/services/imagegen/types.d.ts +181 -0
  166. package/dist/services/imagegen/types.d.ts.map +1 -0
  167. package/dist/services/index.d.ts +31 -0
  168. package/dist/services/index.d.ts.map +1 -0
  169. package/dist/services/index.js +39453 -0
  170. package/dist/services/index.js.map +227 -0
  171. package/dist/services/inference-capabilities.d.ts +132 -0
  172. package/dist/services/inference-capabilities.d.ts.map +1 -0
  173. package/dist/services/inference-telemetry.d.ts +59 -0
  174. package/dist/services/inference-telemetry.d.ts.map +1 -0
  175. package/dist/services/ios-llama-streaming.d.ts +119 -0
  176. package/dist/services/ios-llama-streaming.d.ts.map +1 -0
  177. package/dist/services/kv-spill.d.ts +189 -0
  178. package/dist/services/kv-spill.d.ts.map +1 -0
  179. package/dist/services/latency-trace.d.ts +346 -0
  180. package/dist/services/latency-trace.d.ts.map +1 -0
  181. package/dist/services/lib-target.d.ts +55 -0
  182. package/dist/services/lib-target.d.ts.map +1 -0
  183. package/dist/services/live-signals.d.ts +86 -0
  184. package/dist/services/live-signals.d.ts.map +1 -0
  185. package/dist/services/llama-server-metrics.d.ts +114 -0
  186. package/dist/services/llama-server-metrics.d.ts.map +1 -0
  187. package/dist/services/llm-streaming-binding.d.ts +96 -0
  188. package/dist/services/llm-streaming-binding.d.ts.map +1 -0
  189. package/dist/services/load-args.d.ts +82 -0
  190. package/dist/services/load-args.d.ts.map +1 -0
  191. package/dist/services/manifest/index.d.ts +4 -0
  192. package/dist/services/manifest/index.d.ts.map +1 -0
  193. package/dist/services/manifest/schema.d.ts +903 -0
  194. package/dist/services/manifest/schema.d.ts.map +1 -0
  195. package/dist/services/manifest/types.d.ts +32 -0
  196. package/dist/services/manifest/types.d.ts.map +1 -0
  197. package/dist/services/manifest/validator.d.ts +66 -0
  198. package/dist/services/manifest/validator.d.ts.map +1 -0
  199. package/dist/services/memory-arbiter.d.ts +348 -0
  200. package/dist/services/memory-arbiter.d.ts.map +1 -0
  201. package/dist/services/memory-benchmark.d.ts +76 -0
  202. package/dist/services/memory-benchmark.d.ts.map +1 -0
  203. package/dist/services/memory-monitor.d.ts +128 -0
  204. package/dist/services/memory-monitor.d.ts.map +1 -0
  205. package/dist/services/memory-pressure.d.ts +130 -0
  206. package/dist/services/memory-pressure.d.ts.map +1 -0
  207. package/dist/services/mtp-doctor.d.ts +13 -0
  208. package/dist/services/mtp-doctor.d.ts.map +1 -0
  209. package/dist/services/network-policy.d.ts +127 -0
  210. package/dist/services/network-policy.d.ts.map +1 -0
  211. package/dist/services/paths.d.ts +6 -0
  212. package/dist/services/paths.d.ts.map +1 -0
  213. package/dist/services/planner-skeleton.d.ts +124 -0
  214. package/dist/services/planner-skeleton.d.ts.map +1 -0
  215. package/dist/services/providers.d.ts +38 -0
  216. package/dist/services/providers.d.ts.map +1 -0
  217. package/dist/services/ram-budget.d.ts +110 -0
  218. package/dist/services/ram-budget.d.ts.map +1 -0
  219. package/dist/services/readiness.d.ts +9 -0
  220. package/dist/services/readiness.d.ts.map +1 -0
  221. package/dist/services/recommendation.d.ts +111 -0
  222. package/dist/services/recommendation.d.ts.map +1 -0
  223. package/dist/services/registry.d.ts +33 -0
  224. package/dist/services/registry.d.ts.map +1 -0
  225. package/dist/services/router-handler.d.ts +92 -0
  226. package/dist/services/router-handler.d.ts.map +1 -0
  227. package/dist/services/routing-policy.d.ts +92 -0
  228. package/dist/services/routing-policy.d.ts.map +1 -0
  229. package/dist/services/routing-preferences.d.ts +8 -0
  230. package/dist/services/routing-preferences.d.ts.map +1 -0
  231. package/dist/services/runtime-target.d.ts +98 -0
  232. package/dist/services/runtime-target.d.ts.map +1 -0
  233. package/dist/services/service.d.ts +128 -0
  234. package/dist/services/service.d.ts.map +1 -0
  235. package/dist/services/session-pool.d.ts +72 -0
  236. package/dist/services/session-pool.d.ts.map +1 -0
  237. package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
  238. package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
  239. package/dist/services/structured-output/index.d.ts +2 -0
  240. package/dist/services/structured-output/index.d.ts.map +1 -0
  241. package/dist/services/structured-output.d.ts +311 -0
  242. package/dist/services/structured-output.d.ts.map +1 -0
  243. package/dist/services/system-memory.d.ts +33 -0
  244. package/dist/services/system-memory.d.ts.map +1 -0
  245. package/dist/services/types.d.ts +19 -0
  246. package/dist/services/types.d.ts.map +1 -0
  247. package/dist/services/verify-on-device.d.ts +34 -0
  248. package/dist/services/verify-on-device.d.ts.map +1 -0
  249. package/dist/services/verify.d.ts +8 -0
  250. package/dist/services/verify.d.ts.map +1 -0
  251. package/dist/services/vision/aosp-unavailable.d.ts +115 -0
  252. package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
  253. package/dist/services/vision/capacitor-llama.d.ts +99 -0
  254. package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
  255. package/dist/services/vision/cloud-fallback.d.ts +47 -0
  256. package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
  257. package/dist/services/vision/hash.d.ts +71 -0
  258. package/dist/services/vision/hash.d.ts.map +1 -0
  259. package/dist/services/vision/index.d.ts +95 -0
  260. package/dist/services/vision/index.d.ts.map +1 -0
  261. package/dist/services/vision/llama-server.d.ts +73 -0
  262. package/dist/services/vision/llama-server.d.ts.map +1 -0
  263. package/dist/services/vision/types.d.ts +162 -0
  264. package/dist/services/vision/types.d.ts.map +1 -0
  265. package/dist/services/vision/vast-fallback.d.ts +18 -0
  266. package/dist/services/vision/vast-fallback.d.ts.map +1 -0
  267. package/dist/services/vision-embedding-cache.d.ts +98 -0
  268. package/dist/services/vision-embedding-cache.d.ts.map +1 -0
  269. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
  270. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
  271. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
  272. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
  273. package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
  274. package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
  275. package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
  276. package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
  277. package/dist/services/voice/barge-in.d.ts +112 -0
  278. package/dist/services/voice/barge-in.d.ts.map +1 -0
  279. package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
  280. package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
  281. package/dist/services/voice/checkpoint-manager.d.ts +199 -0
  282. package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
  283. package/dist/services/voice/checkpoint-policy.d.ts +178 -0
  284. package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
  285. package/dist/services/voice/corpus-augment.d.ts +111 -0
  286. package/dist/services/voice/corpus-augment.d.ts.map +1 -0
  287. package/dist/services/voice/corpus-generator.d.ts +134 -0
  288. package/dist/services/voice/corpus-generator.d.ts.map +1 -0
  289. package/dist/services/voice/diarization-error-rate.d.ts +40 -0
  290. package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
  291. package/dist/services/voice/e2e-harness.d.ts +297 -0
  292. package/dist/services/voice/e2e-harness.d.ts.map +1 -0
  293. package/dist/services/voice/eager-context-builder.d.ts +170 -0
  294. package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
  295. package/dist/services/voice/echo-delay.d.ts +67 -0
  296. package/dist/services/voice/echo-delay.d.ts.map +1 -0
  297. package/dist/services/voice/echo-metrics.d.ts +7 -0
  298. package/dist/services/voice/echo-metrics.d.ts.map +1 -0
  299. package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
  300. package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
  301. package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
  302. package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  303. package/dist/services/voice/embedding-server.d.ts +37 -0
  304. package/dist/services/voice/embedding-server.d.ts.map +1 -0
  305. package/dist/services/voice/embedding.d.ts +132 -0
  306. package/dist/services/voice/embedding.d.ts.map +1 -0
  307. package/dist/services/voice/emotion-attribution.d.ts +68 -0
  308. package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
  309. package/dist/services/voice/engine-bridge.d.ts +762 -0
  310. package/dist/services/voice/engine-bridge.d.ts.map +1 -0
  311. package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
  312. package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  313. package/dist/services/voice/eot-classifier.d.ts +211 -0
  314. package/dist/services/voice/eot-classifier.d.ts.map +1 -0
  315. package/dist/services/voice/errors.d.ts +20 -0
  316. package/dist/services/voice/errors.d.ts.map +1 -0
  317. package/dist/services/voice/expressive-tags.d.ts +158 -0
  318. package/dist/services/voice/expressive-tags.d.ts.map +1 -0
  319. package/dist/services/voice/ffi-bindings.d.ts +696 -0
  320. package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
  321. package/dist/services/voice/first-line-cache.d.ts +181 -0
  322. package/dist/services/voice/first-line-cache.d.ts.map +1 -0
  323. package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
  324. package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
  325. package/dist/services/voice/index.d.ts +96 -0
  326. package/dist/services/voice/index.d.ts.map +1 -0
  327. package/dist/services/voice/kokoro/index.d.ts +24 -0
  328. package/dist/services/voice/kokoro/index.d.ts.map +1 -0
  329. package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
  330. package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  331. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  332. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  333. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  334. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  335. package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  336. package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  337. package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
  338. package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
  339. package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
  340. package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  341. package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
  342. package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  343. package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
  344. package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  345. package/dist/services/voice/kokoro/types.d.ts +82 -0
  346. package/dist/services/voice/kokoro/types.d.ts.map +1 -0
  347. package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
  348. package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  349. package/dist/services/voice/kokoro/voices.d.ts +30 -0
  350. package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
  351. package/dist/services/voice/lifecycle.d.ts +135 -0
  352. package/dist/services/voice/lifecycle.d.ts.map +1 -0
  353. package/dist/services/voice/live-diarization-session.d.ts +196 -0
  354. package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
  355. package/dist/services/voice/metric-math.d.ts +10 -0
  356. package/dist/services/voice/metric-math.d.ts.map +1 -0
  357. package/dist/services/voice/mic-source.d.ts +136 -0
  358. package/dist/services/voice/mic-source.d.ts.map +1 -0
  359. package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
  360. package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
  361. package/dist/services/voice/optimistic-policy.d.ts +109 -0
  362. package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
  363. package/dist/services/voice/optimistic-rollback.d.ts +151 -0
  364. package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
  365. package/dist/services/voice/partial-stabilizer.d.ts +73 -0
  366. package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
  367. package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
  368. package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  369. package/dist/services/voice/phrase-cache.d.ts +76 -0
  370. package/dist/services/voice/phrase-cache.d.ts.map +1 -0
  371. package/dist/services/voice/phrase-chunker.d.ts +62 -0
  372. package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
  373. package/dist/services/voice/pipeline-impls.d.ts +151 -0
  374. package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
  375. package/dist/services/voice/pipeline.d.ts +216 -0
  376. package/dist/services/voice/pipeline.d.ts.map +1 -0
  377. package/dist/services/voice/prefill-client.d.ts +123 -0
  378. package/dist/services/voice/prefill-client.d.ts.map +1 -0
  379. package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
  380. package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  381. package/dist/services/voice/profile-store.d.ts +248 -0
  382. package/dist/services/voice/profile-store.d.ts.map +1 -0
  383. package/dist/services/voice/ring-buffer.d.ts +40 -0
  384. package/dist/services/voice/ring-buffer.d.ts.map +1 -0
  385. package/dist/services/voice/rollback-queue.d.ts +24 -0
  386. package/dist/services/voice/rollback-queue.d.ts.map +1 -0
  387. package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
  388. package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  389. package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
  390. package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  391. package/dist/services/voice/scheduler.d.ts +146 -0
  392. package/dist/services/voice/scheduler.d.ts.map +1 -0
  393. package/dist/services/voice/self-voice-imprint.d.ts +33 -0
  394. package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
  395. package/dist/services/voice/shared-resources.d.ts +204 -0
  396. package/dist/services/voice/shared-resources.d.ts.map +1 -0
  397. package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  398. package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  399. package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
  400. package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  401. package/dist/services/voice/speaker/diarizer.d.ts +75 -0
  402. package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
  403. package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
  404. package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  405. package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
  406. package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  407. package/dist/services/voice/speaker/encoder.d.ts +37 -0
  408. package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
  409. package/dist/services/voice/speaker-imprint.d.ts +83 -0
  410. package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
  411. package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
  412. package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
  413. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
  414. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
  415. package/dist/services/voice/system-audio-sink.d.ts +73 -0
  416. package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
  417. package/dist/services/voice/transcriber.d.ts +244 -0
  418. package/dist/services/voice/transcriber.d.ts.map +1 -0
  419. package/dist/services/voice/transcript-knowledge.d.ts +37 -0
  420. package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
  421. package/dist/services/voice/transcript-service.d.ts +60 -0
  422. package/dist/services/voice/transcript-service.d.ts.map +1 -0
  423. package/dist/services/voice/transcript-store.d.ts +64 -0
  424. package/dist/services/voice/transcript-store.d.ts.map +1 -0
  425. package/dist/services/voice/turn-controller.d.ts +183 -0
  426. package/dist/services/voice/turn-controller.d.ts.map +1 -0
  427. package/dist/services/voice/types.d.ts +643 -0
  428. package/dist/services/voice/types.d.ts.map +1 -0
  429. package/dist/services/voice/vad.d.ts +283 -0
  430. package/dist/services/voice/vad.d.ts.map +1 -0
  431. package/dist/services/voice/voice-budget.d.ts +241 -0
  432. package/dist/services/voice/voice-budget.d.ts.map +1 -0
  433. package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
  434. package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  435. package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
  436. package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
  437. package/dist/services/voice/voice-preset-format.d.ts +158 -0
  438. package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
  439. package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
  440. package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
  441. package/dist/services/voice/voice-profile-routes.d.ts +83 -0
  442. package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
  443. package/dist/services/voice/voice-scenario.d.ts +131 -0
  444. package/dist/services/voice/voice-scenario.d.ts.map +1 -0
  445. package/dist/services/voice/voice-state-machine.d.ts +364 -0
  446. package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
  447. package/dist/services/voice/voice-workbench-report.d.ts +117 -0
  448. package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
  449. package/dist/services/voice/wake-word-ggml.d.ts +100 -0
  450. package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
  451. package/dist/services/voice/wake-word.d.ts +255 -0
  452. package/dist/services/voice/wake-word.d.ts.map +1 -0
  453. package/dist/services/voice/wav-codec.d.ts +11 -0
  454. package/dist/services/voice/wav-codec.d.ts.map +1 -0
  455. package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
  456. package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
  457. package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
  458. package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
  459. package/dist/services/voice/workbench-logic-services.d.ts +36 -0
  460. package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
  461. package/dist/services/voice/workbench-real-services.d.ts +17 -0
  462. package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
  463. package/dist/services/voice/workbench-scenarios.d.ts +24 -0
  464. package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
  465. package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  466. package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  467. package/dist/services/voice-model-updater.d.ts +240 -0
  468. package/dist/services/voice-model-updater.d.ts.map +1 -0
  469. package/dist/services/voice-prewarm.d.ts +3 -0
  470. package/dist/services/voice-prewarm.d.ts.map +1 -0
  471. package/dist/voice-workbench.d.ts +18 -0
  472. package/dist/voice-workbench.d.ts.map +1 -0
  473. package/dist/voice-workbench.js +5259 -0
  474. package/dist/voice-workbench.js.map +34 -0
  475. package/package.json +101 -15
  476. package/registry-entry.json +137 -0
  477. package/src/actions/generate-media.ts +647 -0
  478. package/src/actions/identify-speaker.ts +171 -0
  479. package/src/actions/transcription-control.test.ts +100 -0
  480. package/src/actions/transcription-control.ts +127 -0
  481. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  482. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  483. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  484. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  485. package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
  486. package/src/adapters/capacitor-llama/environment.ts +71 -0
  487. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  488. package/src/adapters/capacitor-llama/index.ts +831 -0
  489. package/src/adapters/capacitor-llama/loader.ts +109 -0
  490. package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
  491. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  492. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  493. package/src/adapters/capacitor-llama/types.ts +374 -0
  494. package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
  495. package/src/backends/apple-foundation.ts +127 -0
  496. package/src/index.ts +62 -0
  497. package/src/local-inference-routes.test.ts +390 -0
  498. package/src/local-inference-routes.ts +1625 -0
  499. package/src/provider.ts +1111 -0
  500. package/src/routes/compat-helpers.ts +275 -0
  501. package/src/routes/family-member-route.ts +353 -0
  502. package/src/routes/index.ts +61 -0
  503. package/src/routes/live-diarization-route.test.ts +347 -0
  504. package/src/routes/live-diarization-route.ts +198 -0
  505. package/src/routes/local-inference-asr-route.test.ts +246 -0
  506. package/src/routes/local-inference-asr-route.ts +166 -0
  507. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  508. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  509. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  510. package/src/routes/local-inference-compat-routes.ts +775 -0
  511. package/src/routes/local-inference-tts-route.test.ts +179 -0
  512. package/src/routes/local-inference-tts-route.ts +230 -0
  513. package/src/routes/native-pcm-turn-route.test.ts +136 -0
  514. package/src/routes/native-pcm-turn-route.ts +121 -0
  515. package/src/routes/transcript-audio-store.ts +27 -0
  516. package/src/routes/transcripts-routes.test.ts +195 -0
  517. package/src/routes/transcripts-routes.ts +191 -0
  518. package/src/routes/voice-first-run-routes.ts +524 -0
  519. package/src/routes/voice-models-routes.ts +554 -0
  520. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  521. package/src/routes/voice-profiles-management-routes.ts +476 -0
  522. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  523. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  524. package/src/runtime/bionic-wire-encoding.test.ts +147 -0
  525. package/src/runtime/capacitor-llama.d.ts +25 -0
  526. package/src/runtime/embedding-manager-support.ts +497 -0
  527. package/src/runtime/embedding-presets.ts +81 -0
  528. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  529. package/src/runtime/embedding-warmup-policy.ts +48 -0
  530. package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
  531. package/src/runtime/ensure-local-inference-handler.ts +1640 -0
  532. package/src/runtime/index.ts +36 -0
  533. package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
  534. package/src/runtime/mobile-local-inference-gate.ts +99 -0
  535. package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
  536. package/src/runtime/voice-entity-binding.ts +368 -0
  537. package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
  538. package/src/services/README.md +71 -0
  539. package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
  540. package/src/services/__tests__/backend-selector.test.ts +101 -0
  541. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  542. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  543. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  544. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  545. package/src/services/__tests__/runtime-target.test.ts +176 -0
  546. package/src/services/active-model-context-fit.test.ts +125 -0
  547. package/src/services/active-model-switch-rollback.test.ts +183 -0
  548. package/src/services/active-model.ts +1416 -0
  549. package/src/services/asr-provenance.ts +68 -0
  550. package/src/services/assignment-validation.test.ts +118 -0
  551. package/src/services/assignments.test.ts +106 -0
  552. package/src/services/assignments.ts +278 -0
  553. package/src/services/backend-selector.ts +95 -0
  554. package/src/services/backend.test.ts +84 -0
  555. package/src/services/backend.ts +791 -0
  556. package/src/services/bionic-host-loader.test.ts +226 -0
  557. package/src/services/bionic-host-loader.ts +252 -0
  558. package/src/services/bundled-models.ts +129 -0
  559. package/src/services/cache-bridge.test.ts +516 -0
  560. package/src/services/cache-bridge.ts +423 -0
  561. package/src/services/catalog.test.ts +259 -0
  562. package/src/services/catalog.ts +33 -0
  563. package/src/services/checkpoint-client.ts +258 -0
  564. package/src/services/checkpoint-manager.ts +474 -0
  565. package/src/services/cloud-fallback.ts +230 -0
  566. package/src/services/context-fit.test.ts +121 -0
  567. package/src/services/context-fit.ts +113 -0
  568. package/src/services/conversation-registry.test.ts +235 -0
  569. package/src/services/conversation-registry.ts +264 -0
  570. package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
  571. package/src/services/device-bridge.ts +1237 -0
  572. package/src/services/device-resource-metrics.test.ts +98 -0
  573. package/src/services/device-resource-metrics.ts +346 -0
  574. package/src/services/device-tier.test.ts +458 -0
  575. package/src/services/device-tier.ts +502 -0
  576. package/src/services/downloader.test.ts +888 -0
  577. package/src/services/downloader.ts +1039 -0
  578. package/src/services/engine-direct-bundle.test.ts +90 -0
  579. package/src/services/engine-streaming.test.ts +80 -0
  580. package/src/services/engine.ts +2096 -0
  581. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  582. package/src/services/ensure-local-artifacts.test.ts +368 -0
  583. package/src/services/ensure-local-artifacts.ts +351 -0
  584. package/src/services/external-scanner.ts +312 -0
  585. package/src/services/ffi-llm-mock.ts +354 -0
  586. package/src/services/ffi-llm-streaming-abi.ts +445 -0
  587. package/src/services/ffi-streaming-backend.ts +418 -0
  588. package/src/services/ffi-streaming-runner.test.ts +220 -0
  589. package/src/services/ffi-streaming-runner.ts +407 -0
  590. package/src/services/ffi-unload-ordering.test.ts +166 -0
  591. package/src/services/fused-eliza1-no-regression.test.ts +144 -0
  592. package/src/services/gpu-autotune.ts +534 -0
  593. package/src/services/gpu-detect.ts +139 -0
  594. package/src/services/handler-registry.ts +240 -0
  595. package/src/services/hardware.test.ts +236 -0
  596. package/src/services/hardware.ts +438 -0
  597. package/src/services/image-description-runtime.test.ts +61 -0
  598. package/src/services/image-description-runtime.ts +118 -0
  599. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  600. package/src/services/imagegen/backend-selector.test.ts +190 -0
  601. package/src/services/imagegen/backend-selector.ts +277 -0
  602. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  603. package/src/services/imagegen/errors.ts +40 -0
  604. package/src/services/imagegen/index.ts +144 -0
  605. package/src/services/imagegen/mflux.ts +313 -0
  606. package/src/services/imagegen/sd-cpp.ts +715 -0
  607. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  608. package/src/services/imagegen/types.ts +193 -0
  609. package/src/services/index.ts +229 -0
  610. package/src/services/inference-capabilities.test.ts +75 -0
  611. package/src/services/inference-capabilities.ts +204 -0
  612. package/src/services/inference-telemetry.ts +143 -0
  613. package/src/services/ios-llama-streaming.ts +248 -0
  614. package/src/services/kv-spill.test.ts +222 -0
  615. package/src/services/kv-spill.ts +357 -0
  616. package/src/services/latency-trace.test.ts +266 -0
  617. package/src/services/latency-trace.ts +844 -0
  618. package/src/services/lib-target.test.ts +145 -0
  619. package/src/services/lib-target.ts +102 -0
  620. package/src/services/live-signals.test.ts +132 -0
  621. package/src/services/live-signals.ts +177 -0
  622. package/src/services/llama-server-metrics.test.ts +168 -0
  623. package/src/services/llama-server-metrics.ts +304 -0
  624. package/src/services/llm-streaming-binding.ts +136 -0
  625. package/src/services/load-args.ts +81 -0
  626. package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
  627. package/src/services/manifest/index.ts +72 -0
  628. package/src/services/manifest/manifest.test.ts +791 -0
  629. package/src/services/manifest/schema.ts +761 -0
  630. package/src/services/manifest/types.ts +61 -0
  631. package/src/services/manifest/validator.ts +633 -0
  632. package/src/services/memory-arbiter.test.ts +558 -0
  633. package/src/services/memory-arbiter.ts +991 -0
  634. package/src/services/memory-benchmark.test.ts +91 -0
  635. package/src/services/memory-benchmark.ts +354 -0
  636. package/src/services/memory-monitor.test.ts +232 -0
  637. package/src/services/memory-monitor.ts +309 -0
  638. package/src/services/memory-pressure.ts +414 -0
  639. package/src/services/mtp-doctor.ts +86 -0
  640. package/src/services/network-policy.ts +346 -0
  641. package/src/services/paths.ts +25 -0
  642. package/src/services/planner-skeleton.ts +175 -0
  643. package/src/services/providers.ts +507 -0
  644. package/src/services/ram-budget-cache.test.ts +164 -0
  645. package/src/services/ram-budget.ts +309 -0
  646. package/src/services/readiness.test.ts +87 -0
  647. package/src/services/readiness.ts +238 -0
  648. package/src/services/recommendation.test.ts +216 -0
  649. package/src/services/recommendation.ts +671 -0
  650. package/src/services/registry.ts +157 -0
  651. package/src/services/required-kernels-gate.test.ts +64 -0
  652. package/src/services/router-handler.test.ts +45 -0
  653. package/src/services/router-handler.ts +426 -0
  654. package/src/services/routing-policy.test.ts +352 -0
  655. package/src/services/routing-policy.ts +367 -0
  656. package/src/services/routing-preferences.ts +17 -0
  657. package/src/services/runtime-target.ts +154 -0
  658. package/src/services/service.test.ts +223 -0
  659. package/src/services/service.ts +750 -0
  660. package/src/services/session-pool.ts +153 -0
  661. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  662. package/src/services/structured-output/deterministic-repair.ts +443 -0
  663. package/src/services/structured-output/index.ts +4 -0
  664. package/src/services/structured-output.test.ts +483 -0
  665. package/src/services/structured-output.ts +712 -0
  666. package/src/services/system-memory.test.ts +47 -0
  667. package/src/services/system-memory.ts +67 -0
  668. package/src/services/transcription-priority.test.ts +211 -0
  669. package/src/services/types.ts +59 -0
  670. package/src/services/verify-on-device.test.ts +87 -0
  671. package/src/services/verify-on-device.ts +127 -0
  672. package/src/services/verify.ts +13 -0
  673. package/src/services/vision/aosp-unavailable.ts +163 -0
  674. package/src/services/vision/capacitor-llama.ts +255 -0
  675. package/src/services/vision/cloud-fallback.test.ts +243 -0
  676. package/src/services/vision/cloud-fallback.ts +268 -0
  677. package/src/services/vision/fallback-chain.test.ts +86 -0
  678. package/src/services/vision/hash.ts +157 -0
  679. package/src/services/vision/index.ts +251 -0
  680. package/src/services/vision/llama-server.ts +177 -0
  681. package/src/services/vision/types.ts +163 -0
  682. package/src/services/vision/vast-fallback.ts +127 -0
  683. package/src/services/vision-embedding-cache.ts +189 -0
  684. package/src/services/voice/VOICE_WORKBENCH.md +133 -0
  685. package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
  686. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  687. package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
  688. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  689. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  690. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  691. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  692. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  693. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  694. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  695. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  696. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  697. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  698. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  699. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  700. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  701. package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
  702. package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
  703. package/src/services/voice/asr-timed.real.test.ts +139 -0
  704. package/src/services/voice/audio-frame-consumer.test.ts +669 -0
  705. package/src/services/voice/audio-frame-consumer.ts +651 -0
  706. package/src/services/voice/barge-in.test.ts +244 -0
  707. package/src/services/voice/barge-in.ts +335 -0
  708. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  709. package/src/services/voice/cancellation-coordinator.ts +269 -0
  710. package/src/services/voice/checkpoint-manager.ts +401 -0
  711. package/src/services/voice/checkpoint-policy.ts +336 -0
  712. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  713. package/src/services/voice/corpus-augment.test.ts +276 -0
  714. package/src/services/voice/corpus-augment.ts +451 -0
  715. package/src/services/voice/corpus-generator.test.ts +201 -0
  716. package/src/services/voice/corpus-generator.ts +413 -0
  717. package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
  718. package/src/services/voice/diarization-error-rate.test.ts +100 -0
  719. package/src/services/voice/diarization-error-rate.ts +249 -0
  720. package/src/services/voice/e2e-harness.der.test.ts +94 -0
  721. package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
  722. package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
  723. package/src/services/voice/e2e-harness.test.ts +182 -0
  724. package/src/services/voice/e2e-harness.ts +902 -0
  725. package/src/services/voice/eager-context-builder.ts +262 -0
  726. package/src/services/voice/echo-delay.test.ts +118 -0
  727. package/src/services/voice/echo-delay.ts +135 -0
  728. package/src/services/voice/echo-metrics.test.ts +17 -0
  729. package/src/services/voice/echo-metrics.ts +20 -0
  730. package/src/services/voice/echo-reference-buffer.test.ts +86 -0
  731. package/src/services/voice/echo-reference-buffer.ts +165 -0
  732. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  733. package/src/services/voice/embedding-server.ts +200 -0
  734. package/src/services/voice/embedding.test.ts +131 -0
  735. package/src/services/voice/embedding.ts +242 -0
  736. package/src/services/voice/emotion-attribution.test.ts +129 -0
  737. package/src/services/voice/emotion-attribution.ts +361 -0
  738. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  739. package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
  740. package/src/services/voice/engine-bridge.test.ts +384 -0
  741. package/src/services/voice/engine-bridge.ts +2343 -0
  742. package/src/services/voice/eot-classifier-ggml.ts +569 -0
  743. package/src/services/voice/eot-classifier.test.ts +98 -0
  744. package/src/services/voice/eot-classifier.ts +422 -0
  745. package/src/services/voice/errors.ts +34 -0
  746. package/src/services/voice/expressive-tags.asr.test.ts +77 -0
  747. package/src/services/voice/expressive-tags.test.ts +102 -0
  748. package/src/services/voice/expressive-tags.ts +405 -0
  749. package/src/services/voice/ffi-bindings.test.ts +735 -0
  750. package/src/services/voice/ffi-bindings.ts +3387 -0
  751. package/src/services/voice/first-line-cache.ts +725 -0
  752. package/src/services/voice/fused-eot-scorer.ts +139 -0
  753. package/src/services/voice/index.ts +502 -0
  754. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
  755. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
  756. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  757. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  758. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  759. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  760. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  761. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
  762. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  763. package/src/services/voice/kokoro/index.ts +79 -0
  764. package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
  765. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  766. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  767. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  768. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  769. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  770. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  771. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  772. package/src/services/voice/kokoro/runtime-selection.ts +64 -0
  773. package/src/services/voice/kokoro/types.ts +95 -0
  774. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  775. package/src/services/voice/kokoro/voices.ts +64 -0
  776. package/src/services/voice/lifecycle.test.ts +315 -0
  777. package/src/services/voice/lifecycle.ts +301 -0
  778. package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
  779. package/src/services/voice/live-diarization-session.ts +622 -0
  780. package/src/services/voice/metric-math.test.ts +61 -0
  781. package/src/services/voice/metric-math.ts +25 -0
  782. package/src/services/voice/mic-source.test.ts +210 -0
  783. package/src/services/voice/mic-source.ts +503 -0
  784. package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
  785. package/src/services/voice/nlms-echo-canceller.ts +317 -0
  786. package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
  787. package/src/services/voice/optimistic-policy.test.ts +101 -0
  788. package/src/services/voice/optimistic-policy.ts +192 -0
  789. package/src/services/voice/optimistic-rollback.ts +343 -0
  790. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  791. package/src/services/voice/partial-stabilizer.ts +140 -0
  792. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  793. package/src/services/voice/phrase-cache.test.ts +242 -0
  794. package/src/services/voice/phrase-cache.ts +186 -0
  795. package/src/services/voice/phrase-chunker.test.ts +239 -0
  796. package/src/services/voice/phrase-chunker.ts +281 -0
  797. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  798. package/src/services/voice/pipeline-impls.test.ts +292 -0
  799. package/src/services/voice/pipeline-impls.ts +315 -0
  800. package/src/services/voice/pipeline.ts +504 -0
  801. package/src/services/voice/prefill-client.ts +316 -0
  802. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  803. package/src/services/voice/profile-store.ts +887 -0
  804. package/src/services/voice/real-audio-decode.test.ts +148 -0
  805. package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
  806. package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
  807. package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
  808. package/src/services/voice/ring-buffer.test.ts +129 -0
  809. package/src/services/voice/ring-buffer.ts +123 -0
  810. package/src/services/voice/rollback-queue.ts +74 -0
  811. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  812. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  813. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  814. package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
  815. package/src/services/voice/scheduler.t2.test.ts +141 -0
  816. package/src/services/voice/scheduler.ts +927 -0
  817. package/src/services/voice/self-voice-imprint.test.ts +59 -0
  818. package/src/services/voice/self-voice-imprint.ts +102 -0
  819. package/src/services/voice/shared-resources.ts +343 -0
  820. package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
  821. package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
  822. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  823. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  824. package/src/services/voice/speaker/diarizer.ts +218 -0
  825. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  826. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  827. package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
  828. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  829. package/src/services/voice/speaker/encoder.ts +105 -0
  830. package/src/services/voice/speaker-imprint.test.ts +185 -0
  831. package/src/services/voice/speaker-imprint.ts +312 -0
  832. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  833. package/src/services/voice/speaker-preset-cache.ts +195 -0
  834. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  835. package/src/services/voice/system-audio-sink.test.ts +29 -0
  836. package/src/services/voice/system-audio-sink.ts +366 -0
  837. package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
  838. package/src/services/voice/transcriber.test.ts +392 -0
  839. package/src/services/voice/transcriber.ts +704 -0
  840. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  841. package/src/services/voice/transcript-knowledge.ts +75 -0
  842. package/src/services/voice/transcript-service.test.ts +195 -0
  843. package/src/services/voice/transcript-service.ts +205 -0
  844. package/src/services/voice/transcript-store.test.ts +189 -0
  845. package/src/services/voice/transcript-store.ts +164 -0
  846. package/src/services/voice/turn-controller.test.ts +575 -0
  847. package/src/services/voice/turn-controller.ts +596 -0
  848. package/src/services/voice/types.ts +699 -0
  849. package/src/services/voice/vad.test.ts +498 -0
  850. package/src/services/voice/vad.ts +832 -0
  851. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  852. package/src/services/voice/voice-budget.test.ts +415 -0
  853. package/src/services/voice/voice-budget.ts +635 -0
  854. package/src/services/voice/voice-duet.test.ts +375 -0
  855. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  856. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  857. package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
  858. package/src/services/voice/voice-preload-predictor.test.ts +130 -0
  859. package/src/services/voice/voice-preload-predictor.ts +113 -0
  860. package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
  861. package/src/services/voice/voice-preset-format.test.ts +75 -0
  862. package/src/services/voice/voice-preset-format.ts +713 -0
  863. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  864. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  865. package/src/services/voice/voice-profile-artifact.ts +518 -0
  866. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  867. package/src/services/voice/voice-profile-routes.ts +425 -0
  868. package/src/services/voice/voice-scenario.test.ts +159 -0
  869. package/src/services/voice/voice-scenario.ts +280 -0
  870. package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
  871. package/src/services/voice/voice-state-machine.ts +727 -0
  872. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  873. package/src/services/voice/voice-workbench-report.ts +367 -0
  874. package/src/services/voice/voice-workbench.test.ts +158 -0
  875. package/src/services/voice/voice.test.ts +1070 -0
  876. package/src/services/voice/wake-word-ggml.ts +319 -0
  877. package/src/services/voice/wake-word.test.ts +298 -0
  878. package/src/services/voice/wake-word.ts +554 -0
  879. package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
  880. package/src/services/voice/wav-codec.test.ts +32 -0
  881. package/src/services/voice/wav-codec.ts +101 -0
  882. package/src/services/voice/workbench-entrypoint.test.ts +55 -0
  883. package/src/services/voice/workbench-entrypoint.ts +88 -0
  884. package/src/services/voice/workbench-headless-runner.test.ts +162 -0
  885. package/src/services/voice/workbench-headless-runner.ts +396 -0
  886. package/src/services/voice/workbench-logic-services.test.ts +225 -0
  887. package/src/services/voice/workbench-logic-services.ts +184 -0
  888. package/src/services/voice/workbench-real-services.ts +629 -0
  889. package/src/services/voice/workbench-scenarios.ts +407 -0
  890. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  891. package/src/services/voice-model-updater.ts +724 -0
  892. package/src/services/voice-prewarm.ts +51 -0
  893. package/src/voice-workbench.ts +71 -0
@@ -0,0 +1,1416 @@
1
+ /**
2
+ * Coordinates which model is currently loaded into the plugin-local-ai
3
+ * runtime. Eliza runs one inference model at a time; switching models
4
+ * unloads the previous one first so we don't double-allocate VRAM.
5
+ *
6
+ * This module *does not* talk to `capacitor-llama` directly. The plugin
7
+ * owns the native binding; we ask it to swap via a small runtime service
8
+ * registered under the name "localInferenceLoader". When the plugin is not
9
+ * enabled, we still track the user's preferred active model so the
10
+ * preference survives enabling the plugin later.
11
+ */
12
+
13
+ import { existsSync, readFileSync } from "node:fs";
14
+ import {
15
+ dirname as pathDirname,
16
+ join as pathJoin,
17
+ resolve as pathResolve,
18
+ } from "node:path";
19
+ import type { AgentRuntime } from "@elizaos/core";
20
+ import {
21
+ ELIZA_1_PLACEHOLDER_IDS,
22
+ FIRST_RUN_DEFAULT_MODEL_ID,
23
+ findCatalogModel,
24
+ } from "./catalog";
25
+ import {
26
+ computeRuntimeContextFit,
27
+ type RuntimeContextFit,
28
+ } from "./context-fit";
29
+ import { localInferenceEngine } from "./engine";
30
+ import { probeHardware } from "./hardware";
31
+ import {
32
+ type Eliza1Kernel,
33
+ type Eliza1Manifest,
34
+ type Eliza1Tier,
35
+ missingRequiredKernels,
36
+ OPTIONAL_KERNELS_BY_TIER,
37
+ REQUIRED_KERNELS_BY_TIER,
38
+ RUNTIME_TO_ELIZA1_KERNEL,
39
+ } from "./manifest";
40
+ import {
41
+ assessRamFit,
42
+ defaultManifestLoader,
43
+ type ManifestLoader,
44
+ pickFittingContextVariant,
45
+ type RamFitOptions,
46
+ ramHeadroomReserveMb,
47
+ } from "./ram-budget";
48
+ import { recommendForFirstRun } from "./recommendation";
49
+ import { touchElizaModel } from "./registry";
50
+ import type {
51
+ ActiveModelState,
52
+ CatalogModel,
53
+ HardwareProbe,
54
+ InstalledModel,
55
+ } from "./types";
56
+ import {
57
+ assessVoiceBundleFits,
58
+ VOICE_ENSEMBLE_BUDGETS,
59
+ type VoiceTierSlot,
60
+ } from "./voice/voice-budget";
61
+
62
+ export type { KvOffloadMode, LocalInferenceLoadArgs } from "./load-args.js";
63
+ export {
64
+ ELIZA_1_PLACEHOLDER_IDS,
65
+ FIRST_RUN_DEFAULT_MODEL_ID,
66
+ recommendForFirstRun,
67
+ };
68
+
69
+ import type { KvOffloadMode, LocalInferenceLoadArgs } from "./load-args.js";
70
+
71
+ /**
72
+ * Allow-list for KV cache type strings. The eliza fork of node-llama-cpp
73
+ * (v3.18.1-eliza.3+) extends `GgmlType` with TBQ3_0 (43), TBQ4_0 (44),
74
+ * QJL1_256 (46), Q4_POLAR (47) so the binding accepts the lowercase
75
+ * aliases below. Whether the C++ kernel actually runs depends on the
76
+ * loaded the legacy node-llama-cpp NAPI prebuild (no longer used) binary — the elizaOS/llama.cpp
77
+ * prebuild ships the kernels; upstream's prebuild does not.
78
+ *
79
+ * `validateLocalInferenceLoadArgs({ allowFork: false })` (the route-layer
80
+ * default) still throws on these strings so a UI/API caller can't land
81
+ * the desktop on a kernel that won't run; `allowFork: true` (the AOSP +
82
+ * resolved-args path) lets them through.
83
+ */
84
+ const FORK_ONLY_KV_CACHE_TYPES = new Set([
85
+ "tbq1_0",
86
+ "tbq2_0",
87
+ "tbq3_0",
88
+ "tbq4_0",
89
+ "tbq3_0_tcq",
90
+ "turbo2",
91
+ "turbo3",
92
+ "turbo4",
93
+ "turbo2_0",
94
+ "turbo3_0",
95
+ "turbo4_0",
96
+ "turbo2_tcq",
97
+ "turbo3_tcq",
98
+ "qjl1_256",
99
+ "qjl1_512",
100
+ "q4_polar",
101
+ ]);
102
+
103
+ const STOCK_KV_CACHE_TYPES = new Set([
104
+ "f16",
105
+ "f32",
106
+ "bf16",
107
+ "q4_0",
108
+ "q4_1",
109
+ "q5_0",
110
+ "q5_1",
111
+ "q8_0",
112
+ "q4_k",
113
+ "q5_k",
114
+ "q6_k",
115
+ "q8_k",
116
+ "iq4_nl",
117
+ ]);
118
+
119
+ export function isForkOnlyKvCacheType(name: string | undefined): boolean {
120
+ if (!name) return false;
121
+ return FORK_ONLY_KV_CACHE_TYPES.has(name.trim().toLowerCase());
122
+ }
123
+
124
+ export function isStockKvCacheType(name: string | undefined): boolean {
125
+ if (!name) return false;
126
+ return STOCK_KV_CACHE_TYPES.has(name.trim().toLowerCase());
127
+ }
128
+
129
+ /**
130
+ * Validate per-load overrides against what the in-process backend can
131
+ * honour. The AOSP loader has its own (broader) acceptance set — pass
132
+ * `{ allowFork: true }` to skip the desktop-only restriction.
133
+ *
134
+ * Throws on the first illegal value so the caller (the API route) can
135
+ * surface a 400 with a useful message instead of letting the load slip
136
+ * through and silently degrade to fp16.
137
+ */
138
+ export function validateLocalInferenceLoadArgs(
139
+ args: Partial<LocalInferenceLoadArgs>,
140
+ options: { allowFork?: boolean } = {},
141
+ ): void {
142
+ const allowFork = options.allowFork === true;
143
+ for (const field of ["cacheTypeK", "cacheTypeV"] as const) {
144
+ const value = args[field];
145
+ if (value === undefined) continue;
146
+ if (typeof value !== "string" || value.length === 0) {
147
+ throw new Error(`${field} must be a non-empty string`);
148
+ }
149
+ if (!allowFork && isForkOnlyKvCacheType(value)) {
150
+ throw new Error(
151
+ `${field}="${value}" requires the elizaOS/llama.cpp kernel from the elizaOS fork. The elizaOS/capacitor-llama binding accepts the string at the TS layer, but the upstream @node-llama-cpp/<platform> prebuild does not implement the underlying ggml type. Pass through the AOSP path or load the elizaOS/llama.cpp prebuilt binary. Stock-only types accepted here: ${[...STOCK_KV_CACHE_TYPES].join(", ")}.`,
152
+ );
153
+ }
154
+ if (!allowFork && !isStockKvCacheType(value)) {
155
+ throw new Error(
156
+ `${field}="${value}" is not a recognised KV cache type. Stock builds accept ${[...STOCK_KV_CACHE_TYPES].join(", ")}.`,
157
+ );
158
+ }
159
+ if (
160
+ allowFork &&
161
+ !isStockKvCacheType(value) &&
162
+ !isForkOnlyKvCacheType(value)
163
+ ) {
164
+ throw new Error(
165
+ `${field}="${value}" is not a recognised KV cache type. Accepted stock types: ${[...STOCK_KV_CACHE_TYPES].join(", ")}. Accepted elizaOS fork types: ${[...FORK_ONLY_KV_CACHE_TYPES].join(", ")}.`,
166
+ );
167
+ }
168
+ }
169
+ if (args.contextSize !== undefined) {
170
+ if (
171
+ typeof args.contextSize !== "number" ||
172
+ !Number.isInteger(args.contextSize) ||
173
+ args.contextSize < 256
174
+ ) {
175
+ throw new Error(
176
+ `contextSize must be a positive integer >= 256 (got ${String(args.contextSize)})`,
177
+ );
178
+ }
179
+ }
180
+ if (args.gpuLayers !== undefined) {
181
+ if (
182
+ typeof args.gpuLayers !== "number" ||
183
+ !Number.isInteger(args.gpuLayers) ||
184
+ args.gpuLayers < 0
185
+ ) {
186
+ throw new Error(
187
+ `gpuLayers must be a non-negative integer (got ${String(args.gpuLayers)})`,
188
+ );
189
+ }
190
+ }
191
+ if (args.kvOffload !== undefined) {
192
+ const v = args.kvOffload;
193
+ if (typeof v === "string") {
194
+ if (v !== "cpu" && v !== "gpu" && v !== "split") {
195
+ throw new Error(
196
+ `kvOffload must be "cpu", "gpu", "split", or { gpuLayers: number } (got "${v}")`,
197
+ );
198
+ }
199
+ } else if (
200
+ !v ||
201
+ typeof v !== "object" ||
202
+ typeof (v as { gpuLayers?: unknown }).gpuLayers !== "number"
203
+ ) {
204
+ throw new Error(
205
+ `kvOffload must be "cpu", "gpu", "split", or { gpuLayers: number }`,
206
+ );
207
+ }
208
+ }
209
+ for (const field of ["flashAttention", "mmap", "mlock"] as const) {
210
+ const value = args[field];
211
+ if (value === undefined) continue;
212
+ if (typeof value !== "boolean") {
213
+ throw new Error(`${field} must be a boolean`);
214
+ }
215
+ }
216
+ }
217
+
218
+ export interface LocalInferenceLoader {
219
+ loadModel(args: LocalInferenceLoadArgs): Promise<void>;
220
+ unloadModel(): Promise<void>;
221
+ currentModelPath(): string | null;
222
+ /**
223
+ * Optional generation surface. When a loader implements this, the runtime
224
+ * handler (`ensure-local-inference-handler.ts`) routes TEXT_SMALL /
225
+ * TEXT_LARGE requests through it instead of the standalone engine. Mobile
226
+ * builds populate this via the Capacitor adapter; desktop omits it and falls
227
+ * back to the `LocalInferenceEngine`.
228
+ */
229
+ generate?(args: {
230
+ prompt: string;
231
+ stopSequences?: string[];
232
+ maxTokens?: number;
233
+ temperature?: number;
234
+ /**
235
+ * Optional `promptCacheKey` from the runtime cache plan. Loaders
236
+ * that implement prefix caching (the in-process llama.cpp FFI slot
237
+ * pool or node-llama-cpp session pool) use this to pin
238
+ * subsequent calls with the same key to the same KV cache slot.
239
+ * Loaders without prefix caching can ignore the field.
240
+ */
241
+ cacheKey?: string;
242
+ }): Promise<string>;
243
+ /**
244
+ * Optional embedding surface. When a loader implements this, the runtime
245
+ * handler routes `TEXT_EMBEDDING` requests through it. The AOSP bun:ffi
246
+ * loader populates this directly via `llama_get_embeddings_seq`; the
247
+ * device-bridge loader populates it by dispatching an `embed` frame to
248
+ * the connected device. Loaders that cannot embed leave this undefined,
249
+ * and the runtime falls back to its non-local embedding provider chain.
250
+ */
251
+ embed?(args: { input: string }): Promise<{
252
+ embedding: number[];
253
+ tokens: number;
254
+ }>;
255
+ }
256
+
257
+ /**
258
+ * Per-load override fields the caller can set. Subset of `LocalInferenceLoadArgs`
259
+ * minus `modelPath` (which the coordinator owns) and minus speculative
260
+ * fields (which the catalog `runtime.mtp` block owns end-to-end). The
261
+ * route layer accepts this shape on `POST /api/local-inference/active`.
262
+ */
263
+ export interface LocalInferenceLoadOverrides {
264
+ contextSize?: number;
265
+ cacheTypeK?: string;
266
+ cacheTypeV?: string;
267
+ gpuLayers?: number;
268
+ kvOffload?: KvOffloadMode;
269
+ flashAttention?: boolean;
270
+ mmap?: boolean;
271
+ mlock?: boolean;
272
+ useGpu?: boolean;
273
+ maxThreads?: number;
274
+ }
275
+
276
+ interface ResolveLocalInferenceLoadArgsOptions {
277
+ manifestLoader?: ManifestLoader;
278
+ hardware?: HardwareProbe;
279
+ }
280
+
281
+ function bundleRootForInstalledModel(installed: InstalledModel): string {
282
+ return installed.bundleRoot ?? pathDirname(pathDirname(installed.path));
283
+ }
284
+
285
+ function manifestTextContextForInstalledPath(
286
+ installed: InstalledModel,
287
+ manifest: Eliza1Manifest,
288
+ ): number | undefined {
289
+ const modelPath = pathResolve(installed.path);
290
+ const bundleRoot = bundleRootForInstalledModel(installed);
291
+ for (const entry of manifest.files.text) {
292
+ if (
293
+ typeof entry.ctx !== "number" ||
294
+ !Number.isInteger(entry.ctx) ||
295
+ entry.ctx < 256
296
+ ) {
297
+ continue;
298
+ }
299
+ if (pathResolve(bundleRoot, entry.path) === modelPath) {
300
+ return entry.ctx;
301
+ }
302
+ }
303
+ return undefined;
304
+ }
305
+
306
+ function candidateManifestPaths(installed: InstalledModel): string[] {
307
+ const candidates = [
308
+ installed.manifestPath,
309
+ installed.bundleRoot
310
+ ? pathJoin(installed.bundleRoot, "eliza-1.manifest.json")
311
+ : undefined,
312
+ pathJoin(pathDirname(pathDirname(installed.path)), "eliza-1.manifest.json"),
313
+ pathJoin(pathDirname(installed.path), "eliza-1.manifest.json"),
314
+ ];
315
+ return [...new Set(candidates.filter((p): p is string => Boolean(p)))];
316
+ }
317
+
318
+ function readLegacyStagedManifestTextContext(
319
+ installed: InstalledModel,
320
+ ): number | undefined {
321
+ if (installed.source !== "eliza-download") return undefined;
322
+ const modelPath = pathResolve(installed.path);
323
+ const bundleRoot = bundleRootForInstalledModel(installed);
324
+
325
+ for (const manifestPath of candidateManifestPaths(installed)) {
326
+ let parsed: unknown;
327
+ try {
328
+ parsed = JSON.parse(readFileSync(manifestPath, "utf8"));
329
+ } catch {
330
+ continue;
331
+ }
332
+ if (!parsed || typeof parsed !== "object") continue;
333
+ const raw = parsed as {
334
+ id?: unknown;
335
+ version?: unknown;
336
+ defaultEligible?: unknown;
337
+ files?: { text?: unknown };
338
+ };
339
+ if (typeof raw.id === "string" && raw.id !== installed.id) continue;
340
+ const version = typeof raw.version === "string" ? raw.version : "";
341
+ const stagedOrCandidate =
342
+ raw.defaultEligible === false ||
343
+ /(?:candidate|staged|dev|local)/i.test(version);
344
+ if (!stagedOrCandidate) continue;
345
+ if (!Array.isArray(raw.files?.text)) continue;
346
+ for (const entry of raw.files.text) {
347
+ if (!entry || typeof entry !== "object") continue;
348
+ const file = entry as { path?: unknown; ctx?: unknown };
349
+ if (typeof file.path !== "string") continue;
350
+ if (
351
+ typeof file.ctx !== "number" ||
352
+ !Number.isInteger(file.ctx) ||
353
+ file.ctx < 256
354
+ ) {
355
+ continue;
356
+ }
357
+ if (pathResolve(bundleRoot, file.path) === modelPath) {
358
+ return file.ctx;
359
+ }
360
+ }
361
+ }
362
+ return undefined;
363
+ }
364
+
365
+ function installedBundleContextSize(
366
+ installed: InstalledModel,
367
+ manifestLoader: ManifestLoader,
368
+ ): number | undefined {
369
+ const manifest = manifestLoader(installed.id, installed);
370
+ if (manifest) {
371
+ const contextSize = manifestTextContextForInstalledPath(
372
+ installed,
373
+ manifest,
374
+ );
375
+ if (contextSize !== undefined) return contextSize;
376
+ }
377
+ return readLegacyStagedManifestTextContext(installed);
378
+ }
379
+
380
+ function applyCatalogDefaults(
381
+ args: LocalInferenceLoadArgs,
382
+ installed: InstalledModel,
383
+ catalog: CatalogModel | undefined,
384
+ manifestLoader: ManifestLoader,
385
+ hardware: HardwareProbe | undefined,
386
+ ): void {
387
+ const runtime = catalog?.runtime;
388
+
389
+ // KV cache types from the catalog runtime block. Per-call overrides
390
+ // take precedence and are merged in afterwards.
391
+ if (runtime?.kvCache?.typeK) args.cacheTypeK = runtime.kvCache.typeK;
392
+ if (runtime?.kvCache?.typeV) args.cacheTypeV = runtime.kvCache.typeV;
393
+
394
+ // Catalog-level model ceiling. Without a per-load override, plumb the
395
+ // model's true `contextLength` so the loader picks an appropriate
396
+ // window instead of falling back to whatever default the binding
397
+ // happens to use ("auto" → smallest fitting, which historically meant
398
+ // 4k or 8k even for 128k-trained models).
399
+ if (args.contextSize === undefined) {
400
+ const nativeContext =
401
+ installedBundleContextSize(installed, manifestLoader) ??
402
+ catalog?.contextLength;
403
+ const fit = resolveRuntimeContextFit(
404
+ installed,
405
+ catalog,
406
+ nativeContext,
407
+ hardware,
408
+ );
409
+ args.contextSize = fit?.contextSize ?? nativeContext;
410
+ // Headroom KV-precision upgrade: when the selector chose f16 (opt-in via
411
+ // ELIZA_PREFER_ACCURATE_KV_WHEN_HEADROOM) and the caller/catalog left KV at
412
+ // the default q8_0, raise both cache types to f16. Only ever upgrades, and
413
+ // only when f16 still affords the selected window (#8809 AC#4).
414
+ if (
415
+ fit?.kvQuant === "f16" &&
416
+ (args.cacheTypeK === undefined || args.cacheTypeK === "q8_0") &&
417
+ (args.cacheTypeV === undefined || args.cacheTypeV === "q8_0")
418
+ ) {
419
+ args.cacheTypeK = "f16";
420
+ args.cacheTypeV = "f16";
421
+ }
422
+ }
423
+
424
+ // Catalog-declared GPU offload default — only apply when the caller
425
+ // didn't override `gpuLayers`. Numeric `gpuLayers` is the canonical
426
+ // shape; `"auto"` is the loader's default and we don't need to set
427
+ // anything for it.
428
+ if (
429
+ catalog?.gpuLayers !== undefined &&
430
+ typeof catalog.gpuLayers === "number" &&
431
+ args.gpuLayers === undefined
432
+ ) {
433
+ args.gpuLayers = catalog.gpuLayers;
434
+ }
435
+
436
+ // flashAttention default from catalog optimizations block. Per-load
437
+ // overrides win.
438
+ if (
439
+ runtime?.optimizations?.flashAttention !== undefined &&
440
+ args.flashAttention === undefined
441
+ ) {
442
+ args.flashAttention = runtime.optimizations.flashAttention;
443
+ }
444
+
445
+ // mmap / mlock from catalog optimizations. `noMmap === true` means
446
+ // disable mmap explicitly; otherwise leave the loader default.
447
+ if (runtime?.optimizations?.noMmap !== undefined && args.mmap === undefined) {
448
+ args.mmap = !runtime.optimizations.noMmap;
449
+ }
450
+ if (runtime?.optimizations?.mlock !== undefined && args.mlock === undefined) {
451
+ args.mlock = runtime.optimizations.mlock;
452
+ }
453
+ }
454
+
455
+ function installedWeightMb(
456
+ installed: InstalledModel,
457
+ catalog: CatalogModel | undefined,
458
+ ): number {
459
+ if (
460
+ typeof installed.sizeBytes === "number" &&
461
+ Number.isFinite(installed.sizeBytes) &&
462
+ installed.sizeBytes > 0
463
+ ) {
464
+ return installed.sizeBytes / (1024 * 1024);
465
+ }
466
+ if (
467
+ catalog &&
468
+ typeof catalog.sizeGb === "number" &&
469
+ Number.isFinite(catalog.sizeGb) &&
470
+ catalog.sizeGb > 0
471
+ ) {
472
+ return catalog.sizeGb * 1024;
473
+ }
474
+ return 0;
475
+ }
476
+
477
+ /** ELIZA_PREFER_ACCURATE_KV_WHEN_HEADROOM=1 opts into the f16-KV-on-headroom path. */
478
+ function preferAccurateKvWhenHeadroom(): boolean {
479
+ const v =
480
+ process.env.ELIZA_PREFER_ACCURATE_KV_WHEN_HEADROOM?.trim().toLowerCase();
481
+ return v === "1" || v === "true" || v === "yes";
482
+ }
483
+
484
+ function resolveRuntimeContextFit(
485
+ installed: InstalledModel,
486
+ catalog: CatalogModel | undefined,
487
+ nativeContext: number | undefined,
488
+ hardware: HardwareProbe | undefined,
489
+ ): RuntimeContextFit | null {
490
+ if (!catalog || nativeContext === undefined) return null;
491
+ if (!hardware) return null;
492
+
493
+ return computeRuntimeContextFit({
494
+ params: catalog.params,
495
+ weightMb: installedWeightMb(installed, catalog),
496
+ usableMb: Math.max(
497
+ 0,
498
+ hostRamMbFromProbe(hardware) - ramHeadroomReserveMb(),
499
+ ),
500
+ nativeContext,
501
+ preferAccurateKvWhenHeadroom: preferAccurateKvWhenHeadroom(),
502
+ });
503
+ }
504
+
505
+ function mergeOverrides(
506
+ args: LocalInferenceLoadArgs,
507
+ overrides: LocalInferenceLoadOverrides | undefined,
508
+ ): void {
509
+ if (!overrides) return;
510
+ if (overrides.contextSize !== undefined)
511
+ args.contextSize = overrides.contextSize;
512
+ if (overrides.cacheTypeK !== undefined)
513
+ args.cacheTypeK = overrides.cacheTypeK;
514
+ if (overrides.cacheTypeV !== undefined)
515
+ args.cacheTypeV = overrides.cacheTypeV;
516
+ if (overrides.gpuLayers !== undefined) args.gpuLayers = overrides.gpuLayers;
517
+ if (overrides.kvOffload !== undefined) args.kvOffload = overrides.kvOffload;
518
+ if (overrides.flashAttention !== undefined) {
519
+ args.flashAttention = overrides.flashAttention;
520
+ }
521
+ if (overrides.mmap !== undefined) args.mmap = overrides.mmap;
522
+ if (overrides.mlock !== undefined) args.mlock = overrides.mlock;
523
+ if (overrides.useGpu !== undefined) args.useGpu = overrides.useGpu;
524
+ if (overrides.maxThreads !== undefined)
525
+ args.maxThreads = overrides.maxThreads;
526
+ }
527
+
528
+ /**
529
+ * Resolve the per-tier mmproj GGUF path for a given installed model when
530
+ * the catalog declares the tier ships a vision projector AND the file is
531
+ * actually on disk under the bundle root.
532
+ *
533
+ * Returns:
534
+ * - the absolute path to the mmproj file when the tier has vision and
535
+ * the file exists.
536
+ * - undefined when the tier has no vision component (text-only bundle)
537
+ * or when the file hasn't been downloaded yet. In the latter case
538
+ * the coordinator emits a one-shot warning; vision capability is
539
+ * unavailable for the session but the text load still succeeds.
540
+ *
541
+ * Path layout: the catalog's `sourceModel.components.vision.file` is the
542
+ * Hugging Face-relative path, e.g. `bundles/2b/vision/mmproj-2b.gguf`.
543
+ * Locally the bundleRoot already represents the per-tier "bundles/<tier>"
544
+ * subtree, so we strip the leading `bundles/<tier>/` segment before
545
+ * joining against the local bundleRoot. When that prefix isn't present
546
+ * (e.g. a custom bundle layout), we fall through to the original path
547
+ * unchanged. Manifest-validated bundles (`bundleRoot` set) are the only
548
+ * path that lands a vision component — external-scan models (LM Studio,
549
+ * Jan) don't.
550
+ */
551
+ export function resolveMmprojPath(
552
+ installed: InstalledModel,
553
+ catalog: CatalogModel | undefined,
554
+ ): string | undefined {
555
+ if (!catalog) return undefined;
556
+ const visionComponent = catalog.sourceModel?.components?.vision;
557
+ if (!visionComponent?.file) return undefined;
558
+ const bundleRoot = installed.bundleRoot;
559
+ if (!bundleRoot) return undefined;
560
+ const local = stripBundlePrefix(visionComponent.file, installed.id);
561
+ const candidate = pathJoin(bundleRoot, local);
562
+ if (!existsSync(candidate)) return undefined;
563
+ return candidate;
564
+ }
565
+
566
+ function resolveMtpDrafterPath(
567
+ installed: InstalledModel,
568
+ catalog: CatalogModel | undefined,
569
+ manifestLoader: ManifestLoader,
570
+ ): string | undefined {
571
+ const bundleRoot = installed.bundleRoot;
572
+ if (!bundleRoot) return undefined;
573
+
574
+ const manifest = manifestLoader(installed.id, installed);
575
+ for (const entry of manifest?.files.mtp ?? []) {
576
+ const candidate = pathJoin(bundleRoot, entry.path);
577
+ if (existsSync(candidate)) return candidate;
578
+ }
579
+
580
+ const catalogFile =
581
+ catalog?.runtime?.mtp?.drafterFile ??
582
+ catalog?.sourceModel?.components?.mtp?.file;
583
+ if (!catalogFile) return undefined;
584
+ const local = stripBundlePrefix(catalogFile, installed.id);
585
+ const candidate = pathJoin(bundleRoot, local);
586
+ if (!existsSync(candidate)) return undefined;
587
+ return candidate;
588
+ }
589
+
590
+ /**
591
+ * Strip the `bundles/<tier-slug>/` prefix the catalog uses for HF
592
+ * paths so the remaining string is bundle-root-relative. When the
593
+ * prefix isn't present, return the input unchanged.
594
+ */
595
+ function stripBundlePrefix(catalogFile: string, modelId: string): string {
596
+ const slug = modelId.startsWith("eliza-1-")
597
+ ? modelId.slice("eliza-1-".length)
598
+ : modelId;
599
+ const prefix = `bundles/${slug}/`;
600
+ if (catalogFile.startsWith(prefix)) {
601
+ return catalogFile.slice(prefix.length);
602
+ }
603
+ return catalogFile;
604
+ }
605
+
606
+ const DEFAULT_MOBILE_CONTEXT_CEILING = 8192;
607
+
608
+ /**
609
+ * Whether this on-device inference runtime is a memory-constrained mobile
610
+ * platform (iOS/Android). The agent runs inside the embedded engine and the
611
+ * host injects the platform marker into the process env at start; desktop and
612
+ * server have no marker, so they keep the full catalog context ceiling.
613
+ */
614
+ function isMobileLocalInferenceRuntime(): boolean {
615
+ if (typeof process === "undefined" || !process.env) return false;
616
+ const platform = (
617
+ process.env.ELIZA_MOBILE_PLATFORM ||
618
+ process.env.ELIZA_PLATFORM ||
619
+ ""
620
+ )
621
+ .trim()
622
+ .toLowerCase();
623
+ return platform === "ios" || platform === "android";
624
+ }
625
+
626
+ function mobileContextCeiling(): number {
627
+ const raw = process.env?.ELIZA_MOBILE_CONTEXT_CEILING?.trim();
628
+ const parsed = raw ? Number.parseInt(raw, 10) : Number.NaN;
629
+ return Number.isInteger(parsed) && parsed >= 256
630
+ ? parsed
631
+ : DEFAULT_MOBILE_CONTEXT_CEILING;
632
+ }
633
+
634
+ export async function resolveLocalInferenceLoadArgs(
635
+ installed: InstalledModel,
636
+ overrides?: LocalInferenceLoadOverrides,
637
+ options: ResolveLocalInferenceLoadArgsOptions = {},
638
+ ): Promise<LocalInferenceLoadArgs> {
639
+ const args: LocalInferenceLoadArgs = { modelPath: installed.path };
640
+ const catalog = findCatalogModel(installed.id);
641
+ const runtime = catalog?.runtime;
642
+ const manifestLoader = options.manifestLoader ?? defaultManifestLoader;
643
+
644
+ applyCatalogDefaults(
645
+ args,
646
+ installed,
647
+ catalog,
648
+ manifestLoader,
649
+ options.hardware,
650
+ );
651
+
652
+ // WS2: when the tier declares vision and the per-tier mmproj GGUF is
653
+ // already on disk, plumb the path. The text load is never gated on
654
+ // mmproj — when the file is missing on a vision-capable tier the
655
+ // coordinator emits a one-shot warning and continues.
656
+ const mmprojPath = resolveMmprojPath(installed, catalog);
657
+ if (mmprojPath) {
658
+ args.mmprojPath = mmprojPath;
659
+ }
660
+
661
+ const mtp = runtime?.mtp;
662
+ if (mtp) {
663
+ // Native MTP launch defaults. Do NOT replace catalog `contextLength`
664
+ // here; `applyCatalogDefaults` owns the chat-side context. The MTP
665
+ // block only owns the speculative draft window.
666
+ //
667
+ // Two MTP shapes: embedded-draft-head MTP embeds the draft head in
668
+ // the text GGUF (no `drafterFile` in the catalog) and runs with no
669
+ // separate draft model; separate-drafter MTP declares a `drafterFile`
670
+ // and requires the bundled drafter GGUF to be present on disk.
671
+ const sameFileMtp = !mtp.drafterFile;
672
+ const drafterPath = sameFileMtp
673
+ ? undefined
674
+ : resolveMtpDrafterPath(installed, catalog, manifestLoader);
675
+ if (!sameFileMtp && installed.bundleRoot && !drafterPath) {
676
+ throw new Error(
677
+ `[local-inference] ${installed.id} declares a separate-drafter MTP but no bundled drafter GGUF was found under ${installed.bundleRoot}`,
678
+ );
679
+ }
680
+ args.useGpu = true;
681
+ args.draftModelPath = drafterPath;
682
+ args.draftMin = mtp.draftMin;
683
+ args.draftMax = mtp.draftMax;
684
+ args.speculativeSamples = mtp.draftMax;
685
+ args.mobileSpeculative = true;
686
+ }
687
+
688
+ mergeOverrides(args, overrides);
689
+
690
+ // Mobile context ceiling. A 128k-trained model's catalog `contextLength`
691
+ // (e.g. 131072) implies a multi-GB KV cache; loading it at full width on a
692
+ // phone is impractically slow and OOMs, so the on-device agent's first reply
693
+ // never lands. On iOS/Android clamp the context window (and any speculative
694
+ // draft window) to a mobile-sane ceiling so local inference is usable;
695
+ // desktop/server keep the full catalog ceiling. Override with
696
+ // ELIZA_MOBILE_CONTEXT_CEILING for capable devices.
697
+ if (args.contextSize !== undefined && isMobileLocalInferenceRuntime()) {
698
+ const ceiling = mobileContextCeiling();
699
+ if (args.contextSize > ceiling) args.contextSize = ceiling;
700
+ if (
701
+ args.draftContextSize !== undefined &&
702
+ args.draftContextSize > ceiling
703
+ ) {
704
+ args.draftContextSize = ceiling;
705
+ }
706
+ }
707
+
708
+ if (args.cacheTypeK) args.cacheTypeK = args.cacheTypeK.trim().toLowerCase();
709
+ if (args.cacheTypeV) args.cacheTypeV = args.cacheTypeV.trim().toLowerCase();
710
+
711
+ // Validate the final merged args. The route layer is the one
712
+ // that calls `validateLocalInferenceLoadArgs` with `allowFork: false`
713
+ // against just the overrides — see `local-inference-compat-routes.ts`.
714
+ validateLocalInferenceLoadArgs(args, { allowFork: true });
715
+ return args;
716
+ }
717
+
718
+ const MB_PER_GB = 1024;
719
+
720
+ export class ModelDoesNotFitError extends Error {
721
+ readonly modelId: string;
722
+ readonly requiredMb: number;
723
+ readonly usableMb: number;
724
+ readonly hostRamMb: number;
725
+ readonly fittingVariantId: string | null;
726
+
727
+ constructor(args: {
728
+ modelId: string;
729
+ requiredMb: number;
730
+ usableMb: number;
731
+ hostRamMb: number;
732
+ fittingVariantId: string | null;
733
+ }) {
734
+ const variantHint = args.fittingVariantId
735
+ ? args.fittingVariantId === args.modelId
736
+ ? ""
737
+ : ` The largest context variant of this tier that would fit is "${args.fittingVariantId}".`
738
+ : " No context variant of this tier fits this host.";
739
+ super(
740
+ `[local-inference] Model "${args.modelId}" needs ~${args.requiredMb} MB RAM to boot, but only ~${args.usableMb} MB are usable on this host (${args.hostRamMb} MB total, after the OS/runtime headroom reserve). Refusing to load it.${variantHint} Pick a smaller tier in Settings → Model Hub, or set ELIZA_LOCAL_RAM_HEADROOM_MB lower if you accept running closer to the limit.`,
741
+ );
742
+ this.name = "ModelDoesNotFitError";
743
+ this.modelId = args.modelId;
744
+ this.requiredMb = args.requiredMb;
745
+ this.usableMb = args.usableMb;
746
+ this.hostRamMb = args.hostRamMb;
747
+ this.fittingVariantId = args.fittingVariantId;
748
+ }
749
+ }
750
+
751
+ /**
752
+ * Admission gate: refuse a model load when the host can't fit the bundle's
753
+ * boot floor. `hostRamMb` is the host's total RAM in megabytes. `installed`
754
+ * is forwarded to `assessRamFit` so a manifest-declared `ramBudgetMb` wins
755
+ * over the catalog scalar. Throws `ModelDoesNotFitError` on no-fit; returns
756
+ * the (advisory) fit decision otherwise so callers can log a `tight` warning.
757
+ *
758
+ * Models with no catalog entry (external HF blobs) are not gated — the
759
+ * catalog has no RAM budget for them, so we trust the operator's explicit
760
+ * pick (the dispatcher's load-time error surfaces if it genuinely OOMs).
761
+ */
762
+ export function assertModelFitsHost(
763
+ installed: InstalledModel,
764
+ hostRamMb: number,
765
+ options: RamFitOptions = {},
766
+ ): { level: "fits" | "tight"; minMb: number; recommendedMb: number } {
767
+ const catalog = findCatalogModel(installed.id);
768
+ if (!catalog) return { level: "fits", minMb: 0, recommendedMb: 0 };
769
+ const fit = assessRamFit(catalog, hostRamMb, { ...options, installed });
770
+ if (fit.fits) {
771
+ return {
772
+ level: fit.level === "wontfit" ? "tight" : fit.level,
773
+ minMb: fit.budget.minMb,
774
+ recommendedMb: fit.budget.recommendedMb,
775
+ };
776
+ }
777
+ const fitting = pickFittingContextVariant(catalog, hostRamMb, {
778
+ ...options,
779
+ installed,
780
+ });
781
+ throw new ModelDoesNotFitError({
782
+ modelId: installed.id,
783
+ requiredMb: fit.budget.minMb,
784
+ usableMb: fit.usableMb,
785
+ hostRamMb,
786
+ fittingVariantId: fitting?.id ?? null,
787
+ });
788
+ }
789
+
790
+ /**
791
+ * Typed error for refused local-voice sessions. Mirrors
792
+ * `ModelDoesNotFitError` but at the bundle level — emitted by
793
+ * `assertVoiceBundleFitsHost` when the whole co-resident voice + text stack
794
+ * cannot fit a host's RAM (per R9 §2.3 / §3.2).
795
+ *
796
+ * Catch this at the runtime's voice-session-start boundary and surface the
797
+ * tier-warning copy (`TIER_WARNING_COPY[<tier>]`) — DO NOT load weights and
798
+ * watch `MemoryMonitor` evict mid-session.
799
+ */
800
+ export class VoiceBundleDoesNotFitError extends Error {
801
+ readonly tierSlot: string;
802
+ readonly deviceTier: string;
803
+ readonly requiredPeakMb: number;
804
+ readonly requiredSteadyStateMb: number;
805
+ readonly usableMb: number;
806
+ readonly hostRamMb: number;
807
+
808
+ constructor(args: {
809
+ tierSlot: string;
810
+ deviceTier: string;
811
+ requiredPeakMb: number;
812
+ requiredSteadyStateMb: number;
813
+ usableMb: number;
814
+ hostRamMb: number;
815
+ }) {
816
+ super(
817
+ `[local-inference] The voice bundle for tier "${args.tierSlot}" needs ~${args.requiredSteadyStateMb} MB steady-state (+~${args.requiredPeakMb - args.requiredSteadyStateMb} MB transient TTS peak) but only ~${args.usableMb} MB are usable on this host (${args.hostRamMb} MB total, after the OS/runtime headroom reserve). Refusing to start local voice; the runtime should fall back to cloud TTS+ASR or refuse the user-facing action.`,
818
+ );
819
+ this.name = "VoiceBundleDoesNotFitError";
820
+ this.tierSlot = args.tierSlot;
821
+ this.deviceTier = args.deviceTier;
822
+ this.requiredPeakMb = args.requiredPeakMb;
823
+ this.requiredSteadyStateMb = args.requiredSteadyStateMb;
824
+ this.usableMb = args.usableMb;
825
+ this.hostRamMb = args.hostRamMb;
826
+ }
827
+ }
828
+
829
+ /**
830
+ * Cross-model admission gate for the local-voice session. Sums the whole
831
+ * co-resident bundle (LM + ASR + TTS + embedding + VAD +
832
+ * wake-word + turn-detector + emotion + speaker-encoder + transient TTS
833
+ * peak) and refuses entry when the host can't fit it.
834
+ *
835
+ * Returns the decision on `fits`. Throws `VoiceBundleDoesNotFitError` when
836
+ * `wontfit` (when `strict=true`, the default), or just returns the
837
+ * `wontfit` decision when `strict=false` (the runtime then logs and
838
+ * degrades silently). Pair with `TIER_WARNING_COPY[deviceTier]` for
839
+ * user-facing UX.
840
+ *
841
+ * R9 §1.4 + §2.3 + §3.2 spec.
842
+ */
843
+ export function assertVoiceBundleFitsHost(args: {
844
+ tierSlot: string;
845
+ deviceTier: string;
846
+ hostRamMb: number;
847
+ reserveMb?: number;
848
+ strict?: boolean;
849
+ }): {
850
+ level: "fits" | "tight" | "wontfit";
851
+ steadyStateMb: number;
852
+ peakMb: number;
853
+ usableMb: number;
854
+ fits: boolean;
855
+ } {
856
+ if (!(args.tierSlot in VOICE_ENSEMBLE_BUDGETS)) {
857
+ // Unknown tier slot — be permissive: the runtime hasn't built a
858
+ // canonical slot for this combination yet, and falling through to
859
+ // `assertModelFitsHost` (the per-tier check) is the right default.
860
+ return {
861
+ level: "fits",
862
+ steadyStateMb: 0,
863
+ peakMb: 0,
864
+ usableMb: Math.max(0, args.hostRamMb - (args.reserveMb ?? 1536)),
865
+ fits: true,
866
+ };
867
+ }
868
+ const decision = assessVoiceBundleFits({
869
+ tierSlot: args.tierSlot as VoiceTierSlot,
870
+ deviceTier: args.deviceTier as "MAX" | "GOOD" | "OKAY" | "POOR",
871
+ hostRamMb: args.hostRamMb,
872
+ reserveMb: args.reserveMb,
873
+ });
874
+ if (decision.level === "wontfit" && args.strict !== false) {
875
+ throw new VoiceBundleDoesNotFitError({
876
+ tierSlot: args.tierSlot,
877
+ deviceTier: args.deviceTier,
878
+ requiredPeakMb: Math.round(decision.peakMb),
879
+ requiredSteadyStateMb: Math.round(decision.steadyStateMb),
880
+ usableMb: Math.round(decision.usableMb),
881
+ hostRamMb: args.hostRamMb,
882
+ });
883
+ }
884
+ return {
885
+ level: decision.level,
886
+ steadyStateMb: decision.steadyStateMb,
887
+ peakMb: decision.peakMb,
888
+ usableMb: decision.usableMb,
889
+ fits: decision.fits,
890
+ };
891
+ }
892
+
893
+ function hostRamMbFromProbe(probe: HardwareProbe): number {
894
+ return Math.round(probe.totalRamGb * MB_PER_GB);
895
+ }
896
+
897
+ /**
898
+ * Refusal raised when activation is asked for a model whose own
899
+ * `eliza-1.manifest.json` says its text eval has not passed (`candidate.*` /
900
+ * `weights-staged.*` tiers). Carries the structured payload the route layer
901
+ * surfaces verbatim to the API consumer: `manifestVersion` so the UI can
902
+ * say "this tier isn't ready" with the actual version string, and
903
+ * `failedEvals` so the user sees which checks are still red.
904
+ *
905
+ * Why we gate here, not just at download:
906
+ * - the bundle may already be on disk (hand-staged, manually copied, or
907
+ * downloaded before a fail-state was recorded), so the download gate
908
+ * alone leaves a window where a candidate-only bundle can be flipped
909
+ * into the active model slot and silently emit `[unused]` tokens.
910
+ *
911
+ * See issue #7679 for the original symptom: the runtime activated a
912
+ * candidate `1.0.0-candidate.1` bundle whose every `evals.*.passed`
913
+ * was `false`, then served BERT/WordPiece reserved tokens (`[unused0..99]`
914
+ * / `[PAD]`) as chat output with no actionable error.
915
+ */
916
+ export class CandidateModelActivationError extends Error {
917
+ readonly modelId: string;
918
+ readonly manifestVersion: string;
919
+ readonly failedEvals: ReadonlyArray<string>;
920
+
921
+ constructor(args: {
922
+ modelId: string;
923
+ manifestVersion: string;
924
+ failedEvals: ReadonlyArray<string>;
925
+ }) {
926
+ const evalSuffix =
927
+ args.failedEvals.length > 0
928
+ ? ` Failed evals: ${args.failedEvals.join(", ")}.`
929
+ : "";
930
+ super(
931
+ `Model "${args.modelId}" is candidate-only — its manifest (version ${args.manifestVersion}) reports evals.textEval.passed=false. Refusing to activate.${evalSuffix} Wait for the publisher to flip the manifest off candidate/weights-staged and re-fetch the bundle.`,
932
+ );
933
+ this.name = "CandidateModelActivationError";
934
+ this.modelId = args.modelId;
935
+ this.manifestVersion = args.manifestVersion;
936
+ this.failedEvals = args.failedEvals;
937
+ }
938
+ }
939
+
940
+ /**
941
+ * Activation eval gate. Reads the installed bundle's manifest and refuses
942
+ * activation when `evals.textEval.passed` is not `true`. A bundle with no
943
+ * `eliza-1.manifest.json` on disk (third-party HF GGUFs, external scans,
944
+ * pre-bundle installs) is *not* gated — the gate only applies to bundles
945
+ * that ship a published manifest, which is the source of truth for the
946
+ * publish state.
947
+ *
948
+ * Throws `CandidateModelActivationError` on a failing manifest; returns
949
+ * silently otherwise.
950
+ */
951
+ export function assertManifestEvalsPassed(
952
+ installed: InstalledModel,
953
+ manifestLoader: ManifestLoader = defaultManifestLoader,
954
+ ): void {
955
+ const manifest = manifestLoader(installed.id, installed);
956
+ if (!manifest) return;
957
+ if (manifest.evals.textEval.passed === true) return;
958
+ throw new CandidateModelActivationError({
959
+ modelId: installed.id,
960
+ manifestVersion: manifest.version,
961
+ failedEvals: collectFailedEvalNames(manifest),
962
+ });
963
+ }
964
+
965
+ function collectFailedEvalNames(manifest: Eliza1Manifest): string[] {
966
+ const failed: string[] = [];
967
+ const evals = manifest.evals;
968
+ if (evals.textEval.passed !== true) failed.push("textEval");
969
+ if (evals.voiceRtf.passed !== true) failed.push("voiceRtf");
970
+ if (evals.e2eLoopOk !== true) failed.push("e2eLoopOk");
971
+ if (evals.thirtyTurnOk !== true) failed.push("thirtyTurnOk");
972
+ if (evals.asrWer && evals.asrWer.passed !== true) failed.push("asrWer");
973
+ if (evals.embedMteb && evals.embedMteb.passed !== true) {
974
+ failed.push("embedMteb");
975
+ }
976
+ if (evals.vadLatencyMs && evals.vadLatencyMs.passed !== true) {
977
+ failed.push("vadLatencyMs");
978
+ }
979
+ if (evals.expressive && evals.expressive.passed !== true) {
980
+ failed.push("expressive");
981
+ }
982
+ if (evals.turnDetector && evals.turnDetector.passed !== true) {
983
+ failed.push("turnDetector");
984
+ }
985
+ return failed;
986
+ }
987
+
988
+ /**
989
+ * Refusal raised when activation is asked for a manifest-shipping bundle whose
990
+ * declared `kernels.required` is missing one of the kernels its tier requires
991
+ * (`REQUIRED_KERNELS_BY_TIER`). native/CLAUDE.md §3#5 makes this a hard error:
992
+ * a bundle that doesn't declare its required quant/attention kernels would emit
993
+ * garbage (or silently fall back to an un-optimized path), so we refuse to
994
+ * activate it rather than run a broken model.
995
+ */
996
+ export class MissingRequiredKernelsError extends Error {
997
+ readonly modelId: string;
998
+ readonly tier: Eliza1Tier;
999
+ readonly missing: ReadonlyArray<Eliza1Kernel>;
1000
+
1001
+ constructor(args: {
1002
+ modelId: string;
1003
+ tier: Eliza1Tier;
1004
+ missing: ReadonlyArray<Eliza1Kernel>;
1005
+ }) {
1006
+ super(
1007
+ `Model "${args.modelId}" (tier ${args.tier}) is missing required kernel(s): ${args.missing.join(", ")}. Its manifest declares kernels.required without the tier's mandatory set (${REQUIRED_KERNELS_BY_TIER[args.tier].join(", ")}). Refusing to activate — re-fetch a correctly-built bundle.`,
1008
+ );
1009
+ this.name = "MissingRequiredKernelsError";
1010
+ this.modelId = args.modelId;
1011
+ this.tier = args.tier;
1012
+ this.missing = args.missing;
1013
+ }
1014
+ }
1015
+
1016
+ /**
1017
+ * Activation kernel gate (native/CLAUDE.md §3#5). When the installed bundle
1018
+ * ships a manifest, verify it declares every kernel its tier requires; throw
1019
+ * `MissingRequiredKernelsError` otherwise. A bundle with no manifest (bare
1020
+ * GGUF, external scan, dev path) is NOT gated — there is no kernel contract to
1021
+ * check, so it is a no-op.
1022
+ */
1023
+ export function assertRequiredKernelsPresent(
1024
+ installed: InstalledModel,
1025
+ manifestLoader: ManifestLoader = defaultManifestLoader,
1026
+ ): void {
1027
+ const manifest = manifestLoader(installed.id, installed);
1028
+ if (!manifest) return;
1029
+ const missing = missingRequiredKernels(
1030
+ manifest.tier,
1031
+ manifest.kernels.required,
1032
+ );
1033
+ if (missing.length === 0) return;
1034
+ throw new MissingRequiredKernelsError({
1035
+ modelId: installed.id,
1036
+ tier: manifest.tier,
1037
+ missing,
1038
+ });
1039
+ }
1040
+
1041
+ /**
1042
+ * native/CLAUDE.md §3#5: "The runtime MUST log the kernel set on startup."
1043
+ * Emits one structured line per activation naming the resolved required +
1044
+ * optional kernel set and the compute backend. Required is the union of the
1045
+ * tier's mandatory manifest kernels and any catalog-declared `requiresKernel`
1046
+ * (mapped runtime→manifest); optional is the tier's optional set. Best-effort:
1047
+ * never throws — a bad probe or unknown tier degrades the line, never the load.
1048
+ */
1049
+ function logResolvedKernelSet(
1050
+ installed: InstalledModel,
1051
+ catalog: CatalogModel | undefined,
1052
+ manifest: Eliza1Manifest | undefined,
1053
+ probe: HardwareProbe,
1054
+ ): void {
1055
+ const tier: Eliza1Tier | undefined =
1056
+ manifest?.tier ??
1057
+ (installed.id.startsWith("eliza-1-")
1058
+ ? (installed.id.slice("eliza-1-".length) as Eliza1Tier)
1059
+ : undefined);
1060
+ if (!tier || !REQUIRED_KERNELS_BY_TIER[tier]) return;
1061
+
1062
+ const required = new Set<Eliza1Kernel>(REQUIRED_KERNELS_BY_TIER[tier]);
1063
+ for (const runtimeKernel of catalog?.runtime?.optimizations?.requiresKernel ??
1064
+ []) {
1065
+ const mapped = RUNTIME_TO_ELIZA1_KERNEL[runtimeKernel as never];
1066
+ if (mapped) required.add(mapped);
1067
+ }
1068
+ const optional = OPTIONAL_KERNELS_BY_TIER[tier];
1069
+ const backend = resolveComputeBackendLabel(probe);
1070
+ console.info(
1071
+ `[LocalInferenceEngine] kernel set: required=[${[...required].join(", ")}] optional=[${optional.join(", ")}] backend=${backend}`,
1072
+ );
1073
+ }
1074
+
1075
+ /**
1076
+ * Best-effort label for the compute backend the fused lib will autoselect.
1077
+ * The actual CPU/GPU pick happens inside the FFI runtime; this reports the
1078
+ * host probe's detected GPU backend (or `cpu`) for the startup log only.
1079
+ */
1080
+ function resolveComputeBackendLabel(probe: HardwareProbe): string {
1081
+ return probe.gpu ? probe.gpu.backend : "cpu";
1082
+ }
1083
+
1084
+ function isLoader(value: unknown): value is LocalInferenceLoader {
1085
+ if (!value || typeof value !== "object") return false;
1086
+ const candidate = value as Partial<LocalInferenceLoader>;
1087
+ return (
1088
+ typeof candidate.loadModel === "function" &&
1089
+ typeof candidate.unloadModel === "function" &&
1090
+ typeof candidate.currentModelPath === "function"
1091
+ );
1092
+ }
1093
+
1094
+ export class ActiveModelCoordinator {
1095
+ private state: ActiveModelState = {
1096
+ modelId: null,
1097
+ loadedAt: null,
1098
+ status: "idle",
1099
+ };
1100
+
1101
+ /**
1102
+ * The last model that successfully reached `status: "ready"`, plus the
1103
+ * inputs needed to re-load it. switchTo() tears the active model down
1104
+ * before loading the new one (unload-then-load); if the new load fails we
1105
+ * restore this so a failed switch never leaves the host with zero models
1106
+ * loaded while a working one existed moments earlier. `null` until the
1107
+ * first successful load (or after an unload).
1108
+ */
1109
+ private lastReady: {
1110
+ installed: InstalledModel;
1111
+ overrides?: LocalInferenceLoadOverrides;
1112
+ state: ActiveModelState;
1113
+ } | null = null;
1114
+
1115
+ private readonly listeners = new Set<(state: ActiveModelState) => void>();
1116
+
1117
+ snapshot(): ActiveModelState {
1118
+ return { ...this.state };
1119
+ }
1120
+
1121
+ subscribe(listener: (state: ActiveModelState) => void): () => void {
1122
+ this.listeners.add(listener);
1123
+ return () => {
1124
+ this.listeners.delete(listener);
1125
+ };
1126
+ }
1127
+
1128
+ private emit(): void {
1129
+ const current = { ...this.state };
1130
+ for (const listener of this.listeners) {
1131
+ try {
1132
+ listener(current);
1133
+ } catch {
1134
+ this.listeners.delete(listener);
1135
+ }
1136
+ }
1137
+ }
1138
+
1139
+ /**
1140
+ * WS2: one-shot warning latch per (modelId) — when the tier declares
1141
+ * vision but no mmproj GGUF was found on disk, log once so the
1142
+ * operator sees that vision is degraded for this session. The
1143
+ * arbiter's vision-describe capability stays unregistered for this
1144
+ * session; plugin-vision falls back to its non-eliza-1 path.
1145
+ */
1146
+ private readonly warnedDegradedVisionFor = new Set<string>();
1147
+
1148
+ private warnIfVisionDegraded(
1149
+ installed: InstalledModel,
1150
+ resolvedMmprojPath: string | undefined,
1151
+ ): void {
1152
+ const catalog = findCatalogModel(installed.id);
1153
+ const tierClaimsVision = Boolean(
1154
+ catalog?.sourceModel?.components?.vision?.file,
1155
+ );
1156
+ if (!tierClaimsVision) return;
1157
+ if (resolvedMmprojPath) return;
1158
+ if (this.warnedDegradedVisionFor.has(installed.id)) return;
1159
+ this.warnedDegradedVisionFor.add(installed.id);
1160
+ console.warn(
1161
+ `[local-inference] vision capability unavailable for tier "${installed.id}" — the bundle declares vision/mmproj but the projector GGUF is not on disk under "${installed.bundleRoot ?? "<no-bundleRoot>"}". Text and voice will continue to load; plugin-vision will fall back to its Florence-2 path. Download the per-tier mmproj-<tier>.gguf to enable native vision-describe.`,
1162
+ );
1163
+ }
1164
+
1165
+ /** Return the loader service from the current runtime, if registered. */
1166
+ private getLoader(runtime: AgentRuntime | null): LocalInferenceLoader | null {
1167
+ if (!runtime) return null;
1168
+ const candidate = (
1169
+ runtime as {
1170
+ getService?: (name: string) => unknown;
1171
+ }
1172
+ ).getService?.("localInferenceLoader");
1173
+ return isLoader(candidate) ? candidate : null;
1174
+ }
1175
+
1176
+ async switchTo(
1177
+ runtime: AgentRuntime | null,
1178
+ installed: InstalledModel,
1179
+ overrides?: LocalInferenceLoadOverrides,
1180
+ opts: { hardware?: HardwareProbe; manifestLoader?: ManifestLoader } = {},
1181
+ ): Promise<ActiveModelState> {
1182
+ // Activation eval gate (#7679). Refuse to flip a candidate-only /
1183
+ // weights-staged bundle into the active model slot — the manifest
1184
+ // already says its text eval hasn't passed, so the only thing
1185
+ // activation buys is `[unused]`/`[PAD]` tokens in chat output and
1186
+ // a confused user. Runs BEFORE the loading state is emitted so
1187
+ // the UI never shows "loading → error" for a known-bad bundle;
1188
+ // it sees the 422 from the route layer directly.
1189
+ assertManifestEvalsPassed(installed, opts.manifestLoader);
1190
+
1191
+ // Activation kernel gate (native/CLAUDE.md §3#5). A manifest-shipping
1192
+ // bundle that doesn't declare its tier's required kernels would run an
1193
+ // un-optimized/broken path — refuse it here, before the loading state,
1194
+ // same as the eval gate. No-op for bare-GGUF/dev bundles (no manifest).
1195
+ assertRequiredKernelsPresent(installed, opts.manifestLoader);
1196
+
1197
+ this.state = {
1198
+ modelId: installed.id,
1199
+ loadedAt: null,
1200
+ status: "loading",
1201
+ };
1202
+ this.emit();
1203
+
1204
+ // Prefer a runtime-registered loader (plugin-local-ai or equivalent)
1205
+ // when present — it will already have warmed up the right configuration.
1206
+ // Otherwise, fall back to the standalone engine, which is the default
1207
+ // path for users who haven't separately enabled plugin-local-ai.
1208
+ const loader = this.getLoader(runtime);
1209
+
1210
+ // Snapshot the previously-active model BEFORE the unload-then-load tears
1211
+ // it down, so a failed switch can restore it instead of leaving zero
1212
+ // models loaded under the requested id.
1213
+ const previous = this.lastReady;
1214
+ let previousDisplaced = false;
1215
+
1216
+ try {
1217
+ const ready = await this.performLoad(
1218
+ loader,
1219
+ installed,
1220
+ overrides,
1221
+ opts,
1222
+ () => {
1223
+ previousDisplaced = true;
1224
+ },
1225
+ );
1226
+ this.state = ready;
1227
+ this.lastReady = { installed, overrides, state: ready };
1228
+ } catch (err) {
1229
+ const failure = err instanceof Error ? err.message : String(err);
1230
+ if (previous) {
1231
+ previousDisplaced =
1232
+ (loader?.currentModelPath() ??
1233
+ localInferenceEngine.currentModelPath()) !==
1234
+ previous.installed.path;
1235
+ }
1236
+ // Attempt to restore the previously-active model. The unload-then-load
1237
+ // already tore it down, so without this the host has no model loaded.
1238
+ if (previous && previousDisplaced) {
1239
+ try {
1240
+ const restored = await this.performLoad(
1241
+ loader,
1242
+ previous.installed,
1243
+ previous.overrides,
1244
+ opts,
1245
+ () => {},
1246
+ );
1247
+ this.state = restored;
1248
+ this.lastReady = {
1249
+ installed: previous.installed,
1250
+ overrides: previous.overrides,
1251
+ state: restored,
1252
+ };
1253
+ console.warn(
1254
+ `[local-inference] Failed to switch to "${installed.id}" (${failure}); restored previously-active model "${previous.installed.id}".`,
1255
+ );
1256
+ this.emit();
1257
+ return this.snapshot();
1258
+ } catch (restoreErr) {
1259
+ const restoreFailure =
1260
+ restoreErr instanceof Error
1261
+ ? restoreErr.message
1262
+ : String(restoreErr);
1263
+ console.error(
1264
+ `[local-inference] Failed to switch to "${installed.id}" (${failure}) AND failed to restore "${previous.installed.id}" (${restoreFailure}). No model is loaded.`,
1265
+ );
1266
+ }
1267
+ } else if (previous) {
1268
+ // Admission/load-arg errors happen before unload, so the previous
1269
+ // model is still live. Restore the coordinator state without touching
1270
+ // the loader and surface the failed request only as a warning.
1271
+ this.state = previous.state;
1272
+ this.lastReady = previous;
1273
+ console.warn(
1274
+ `[local-inference] Refused to switch to "${installed.id}" before unloading the active model "${previous.installed.id}" (${failure}).`,
1275
+ );
1276
+ this.emit();
1277
+ return this.snapshot();
1278
+ }
1279
+ // No prior model to restore (or restore also failed): report honestly
1280
+ // that nothing is loaded rather than attributing a phantom id.
1281
+ this.lastReady = null;
1282
+ this.state = {
1283
+ modelId: null,
1284
+ loadedAt: null,
1285
+ status: "error",
1286
+ error: failure,
1287
+ };
1288
+ }
1289
+
1290
+ this.emit();
1291
+ if (installed.source === "eliza-download") {
1292
+ try {
1293
+ await touchElizaModel(installed.id);
1294
+ } catch (err) {
1295
+ console.warn(
1296
+ `[local-inference] Model "${installed.id}" loaded, but failed to update last-used metadata: ${err instanceof Error ? err.message : String(err)}`,
1297
+ );
1298
+ }
1299
+ }
1300
+ return this.snapshot();
1301
+ }
1302
+
1303
+ /**
1304
+ * Run the unload-then-load against the loader (or standalone engine) and
1305
+ * build the `status: "ready"` state. Throws on any load failure; never
1306
+ * mutates `this.state`/`this.lastReady` so callers control rollback.
1307
+ */
1308
+ private async performLoad(
1309
+ loader: LocalInferenceLoader | null,
1310
+ installed: InstalledModel,
1311
+ overrides: LocalInferenceLoadOverrides | undefined,
1312
+ opts: { hardware?: HardwareProbe; manifestLoader?: ManifestLoader },
1313
+ markPreviousDisplaced: () => void,
1314
+ ): Promise<ActiveModelState> {
1315
+ // RAM-budget admission control (W10 / J1): refuse a model that won't
1316
+ // fit this host *before* touching the loader, so we never half-load
1317
+ // and OOM. `assertModelFitsHost` throws `ModelDoesNotFitError` with
1318
+ // the specific numbers + the largest fitting variant of the tier.
1319
+ const probe = opts.hardware ?? (await probeHardware());
1320
+ const admission = assertModelFitsHost(installed, hostRamMbFromProbe(probe));
1321
+ if (admission.level === "tight") {
1322
+ console.warn(
1323
+ `[local-inference] Loading "${installed.id}" with tight RAM headroom (~${admission.minMb} MB floor, ${admission.recommendedMb} MB recommended; ${hostRamMbFromProbe(probe)} MB host). Expect swapping under sustained load.`,
1324
+ );
1325
+ }
1326
+ const resolved = await resolveLocalInferenceLoadArgs(installed, overrides, {
1327
+ hardware: probe,
1328
+ });
1329
+ // WS2: warn one-shot when the tier declares vision but the
1330
+ // per-tier mmproj GGUF isn't on disk yet. The text load still
1331
+ // proceeds; vision capability is degraded for this session
1332
+ // (plugin-vision falls back to its Florence-2 path).
1333
+ this.warnIfVisionDegraded(installed, resolved.mmprojPath);
1334
+ if (loader) {
1335
+ markPreviousDisplaced();
1336
+ await loader.unloadModel();
1337
+ await loader.loadModel(resolved);
1338
+ } else {
1339
+ await localInferenceEngine.load(installed.path, resolved);
1340
+ }
1341
+ // native/CLAUDE.md §3#5: log the resolved kernel set once per activation,
1342
+ // after the load lands. Best-effort — never throws.
1343
+ const manifestLoader = opts.manifestLoader ?? defaultManifestLoader;
1344
+ logResolvedKernelSet(
1345
+ installed,
1346
+ findCatalogModel(installed.id),
1347
+ manifestLoader(installed.id, installed) ?? undefined,
1348
+ probe,
1349
+ );
1350
+ const runtimeLoad = loader
1351
+ ? null
1352
+ : localInferenceEngine.currentRuntimeLoadConfig();
1353
+ // Surface the effective load config so consumers (the benchmark
1354
+ // harness, the Settings UI, the active-model SSE) can verify the
1355
+ // requested overrides actually took hold instead of silently
1356
+ // falling back to a smaller context or fp16 KV.
1357
+ return {
1358
+ modelId: installed.id,
1359
+ loadedAt: new Date().toISOString(),
1360
+ status: "ready",
1361
+ loadedContextSize:
1362
+ runtimeLoad?.contextSize ?? resolved.contextSize ?? null,
1363
+ loadedCacheTypeK: runtimeLoad
1364
+ ? runtimeLoad.cacheTypeK
1365
+ : (resolved.cacheTypeK ?? null),
1366
+ loadedCacheTypeV: runtimeLoad
1367
+ ? runtimeLoad.cacheTypeV
1368
+ : (resolved.cacheTypeV ?? null),
1369
+ loadedGpuLayers:
1370
+ runtimeLoad !== null
1371
+ ? runtimeLoad.gpuLayers
1372
+ : typeof resolved.gpuLayers === "number"
1373
+ ? resolved.gpuLayers
1374
+ : null,
1375
+ };
1376
+ }
1377
+
1378
+ async unload(runtime: AgentRuntime | null): Promise<ActiveModelState> {
1379
+ const loader = this.getLoader(runtime);
1380
+ try {
1381
+ if (loader) {
1382
+ await loader.unloadModel();
1383
+ } else {
1384
+ await localInferenceEngine.unload();
1385
+ }
1386
+ } catch (err) {
1387
+ this.state = {
1388
+ modelId: null,
1389
+ loadedAt: null,
1390
+ status: "error",
1391
+ error: err instanceof Error ? err.message : String(err),
1392
+ loadedContextSize: null,
1393
+ loadedCacheTypeK: null,
1394
+ loadedCacheTypeV: null,
1395
+ loadedGpuLayers: null,
1396
+ };
1397
+ this.emit();
1398
+ return this.snapshot();
1399
+ }
1400
+ // The model was deliberately unloaded — drop the restore snapshot so a
1401
+ // later failed switch doesn't silently re-load a model the operator
1402
+ // asked to unload.
1403
+ this.lastReady = null;
1404
+ this.state = {
1405
+ modelId: null,
1406
+ loadedAt: null,
1407
+ status: "idle",
1408
+ loadedContextSize: null,
1409
+ loadedCacheTypeK: null,
1410
+ loadedCacheTypeV: null,
1411
+ loadedGpuLayers: null,
1412
+ };
1413
+ this.emit();
1414
+ return this.snapshot();
1415
+ }
1416
+ }