@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,1543 @@
1
+ import crypto from "node:crypto";
2
+ import * as dns from "node:dns";
3
+ import fs from "node:fs";
4
+ import fsp from "node:fs/promises";
5
+ import * as http from "node:http";
6
+ import * as https from "node:https";
7
+ import os from "node:os";
8
+ import path from "node:path";
9
+ import {
10
+ type ContentValue,
11
+ logger,
12
+ readJsonBody,
13
+ resolveStateDir,
14
+ sendJson,
15
+ sendJsonError,
16
+ } from "@elizaos/core";
17
+ import {
18
+ buildHuggingFaceResolveUrl,
19
+ MODEL_CATALOG as SHARED_MODEL_CATALOG,
20
+ type CatalogModel as SharedCatalogModel,
21
+ } from "@elizaos/shared";
22
+ import {
23
+ LOCAL_INFERENCE_MODEL_TYPES,
24
+ LOCAL_INFERENCE_PROVIDER_ID,
25
+ LOCAL_INFERENCE_TEXT_MODEL_TYPES,
26
+ } from "./provider.js";
27
+ import {
28
+ assertManifestEvalsPassed,
29
+ CandidateModelActivationError,
30
+ } from "./services/active-model.js";
31
+ import { localInferenceService } from "./services/service.js";
32
+ import { prewarmLocalVoiceStackForModel } from "./services/voice-prewarm.js";
33
+
34
+ type ModelRole = "chat" | "embedding";
35
+ type DownloadState =
36
+ | "queued"
37
+ | "downloading"
38
+ | "completed"
39
+ | "failed"
40
+ | "cancelled";
41
+
42
+ type MobileDeviceBridgeApi = {
43
+ getMobileDeviceBridgeStatus: () => MobileDeviceBridgeStatus;
44
+ loadMobileDeviceBridgeModel: (
45
+ modelPath: string,
46
+ modelId: string,
47
+ ) => Promise<void>;
48
+ unloadMobileDeviceBridgeModel: () => Promise<void>;
49
+ };
50
+
51
+ type MobileDeviceBridgeStatus = {
52
+ enabled?: boolean;
53
+ connected?: boolean;
54
+ reason?: string;
55
+ devices: Array<{ loadedPath?: string | null }>;
56
+ };
57
+
58
+ type AospLocalInferenceApi = {
59
+ buildAospLoadModelArgs: (
60
+ role: "chat" | "embedding",
61
+ modelPath: string,
62
+ ) => unknown;
63
+ activateAospLocalInferenceModel: (args: {
64
+ modelId: string;
65
+ modelPath: string;
66
+ loadArgs: unknown;
67
+ }) => Promise<typeof activeModelState>;
68
+ clearAospLocalInferenceModel: () => Promise<typeof activeModelState>;
69
+ };
70
+
71
+ let mobileDeviceBridgeApiPromise: Promise<MobileDeviceBridgeApi> | null = null;
72
+ let aospLocalInferenceApiPromise: Promise<AospLocalInferenceApi> | null = null;
73
+
74
+ function getMobileDeviceBridgeApi(): Promise<MobileDeviceBridgeApi> {
75
+ mobileDeviceBridgeApiPromise ??= import(
76
+ "@elizaos/plugin-capacitor-bridge"
77
+ ) as Promise<MobileDeviceBridgeApi>;
78
+ return mobileDeviceBridgeApiPromise;
79
+ }
80
+
81
+ function getAospLocalInferenceApi(): Promise<AospLocalInferenceApi> {
82
+ aospLocalInferenceApiPromise ??= import(
83
+ "@elizaos/plugin-aosp-local-inference"
84
+ ) as Promise<AospLocalInferenceApi>;
85
+ return aospLocalInferenceApiPromise;
86
+ }
87
+
88
+ function shouldUseAospLocalInference(): boolean {
89
+ const value = process.env.ELIZA_LOCAL_LLAMA?.trim().toLowerCase();
90
+ return value === "1" || value === "true" || value === "yes";
91
+ }
92
+
93
+ function getMobileDeviceBridgeStatusUnavailable(): MobileDeviceBridgeStatus {
94
+ return {
95
+ enabled: false,
96
+ connected: false,
97
+ reason: "mobile device bridge is not loaded",
98
+ devices: [],
99
+ };
100
+ }
101
+
102
+ export type LocalInferenceCommandIntent =
103
+ | "retry"
104
+ | "resume"
105
+ | "redownload"
106
+ | "download"
107
+ | "cancel"
108
+ | "switch_smaller"
109
+ | "status"
110
+ | "use_cloud"
111
+ | "use_local";
112
+
113
+ interface CatalogModel extends SharedCatalogModel {
114
+ role: ModelRole;
115
+ }
116
+
117
+ interface InstalledModel {
118
+ id: string;
119
+ displayName: string;
120
+ path: string;
121
+ sizeBytes: number;
122
+ hfRepo?: string;
123
+ installedAt: string;
124
+ lastUsedAt: string | null;
125
+ source: "eliza-download";
126
+ sha256?: string;
127
+ lastVerifiedAt?: string;
128
+ }
129
+
130
+ interface DownloadJob {
131
+ jobId: string;
132
+ modelId: string;
133
+ state: DownloadState;
134
+ received: number;
135
+ total: number;
136
+ bytesPerSec: number;
137
+ etaMs: number | null;
138
+ startedAt: string;
139
+ updatedAt: string;
140
+ error?: string;
141
+ }
142
+
143
+ export interface LocalInferenceChatMetadata {
144
+ [key: string]: ContentValue;
145
+ intent?: LocalInferenceCommandIntent;
146
+ status:
147
+ | "missing"
148
+ | "downloading"
149
+ | "loading"
150
+ | "failed"
151
+ | "no_space"
152
+ | "idle"
153
+ | "ready"
154
+ | "cancelled"
155
+ | "routing";
156
+ modelId?: string | null;
157
+ activeModelId?: string | null;
158
+ provider?: string;
159
+ error?: string;
160
+ progress?: {
161
+ percent?: number;
162
+ receivedBytes: number;
163
+ totalBytes: number;
164
+ bytesPerSec?: number;
165
+ etaMs?: number | null;
166
+ };
167
+ }
168
+
169
+ export interface LocalInferenceChatResult {
170
+ text: string;
171
+ localInference: LocalInferenceChatMetadata;
172
+ }
173
+
174
+ type Assignments = Partial<
175
+ Record<(typeof LOCAL_INFERENCE_MODEL_TYPES)[number], string>
176
+ >;
177
+
178
+ interface RoutingPreferences {
179
+ preferredProvider: Record<string, string>;
180
+ policy: Record<string, string>;
181
+ }
182
+
183
+ interface RoutingPreferencesFile {
184
+ version: number;
185
+ preferences: RoutingPreferences;
186
+ }
187
+
188
+ let activeModelState: {
189
+ modelId: string | null;
190
+ loadedAt: string | null;
191
+ status: "idle" | "loading" | "ready" | "error";
192
+ error?: string;
193
+ } = { modelId: null, loadedAt: null, status: "idle" };
194
+
195
+ export function getLocalInferenceActiveModelId(): string | undefined {
196
+ const serviceActive = localInferenceService.getActive();
197
+ if (serviceActive.status === "ready" && serviceActive.modelId?.trim()) {
198
+ return serviceActive.modelId.trim();
199
+ }
200
+ return activeModelState.status === "ready" && activeModelState.modelId?.trim()
201
+ ? activeModelState.modelId.trim()
202
+ : undefined;
203
+ }
204
+
205
+ function catalogRole(model: SharedCatalogModel): ModelRole {
206
+ if ((model.category as string) === "embedding") return "embedding";
207
+ return "chat";
208
+ }
209
+
210
+ const CATALOG: CatalogModel[] = SHARED_MODEL_CATALOG.map((model) => ({
211
+ ...model,
212
+ role: catalogRole(model),
213
+ }));
214
+
215
+ const activeDownloads = new Map<
216
+ string,
217
+ { job: DownloadJob; abortController: AbortController }
218
+ >();
219
+ const MOBILE_DNS_SERVERS = ["8.8.8.8", "1.1.1.1"];
220
+ const mobileDnsResolver = new dns.Resolver();
221
+ mobileDnsResolver.setServers(MOBILE_DNS_SERVERS);
222
+
223
+ function stateDir(): string {
224
+ return resolveStateDir();
225
+ }
226
+
227
+ function localInferenceRoot(): string {
228
+ return path.join(stateDir(), "local-inference");
229
+ }
230
+
231
+ function modelsDir(): string {
232
+ return path.join(localInferenceRoot(), "models");
233
+ }
234
+
235
+ function downloadsDir(): string {
236
+ return path.join(localInferenceRoot(), "downloads");
237
+ }
238
+
239
+ function registryPath(): string {
240
+ return path.join(localInferenceRoot(), "registry.json");
241
+ }
242
+
243
+ function assignmentsPath(): string {
244
+ return path.join(localInferenceRoot(), "assignments.json");
245
+ }
246
+
247
+ function routingPath(): string {
248
+ return path.join(localInferenceRoot(), "routing.json");
249
+ }
250
+
251
+ function aospActivePath(): string {
252
+ return path.join(localInferenceRoot(), "aosp-active.json");
253
+ }
254
+
255
+ function finalModelPath(model: CatalogModel): string {
256
+ return path.join(
257
+ modelsDir(),
258
+ `${model.id.replace(/[^a-zA-Z0-9._-]/g, "_")}.gguf`,
259
+ );
260
+ }
261
+
262
+ function stagingPath(model: CatalogModel): string {
263
+ return path.join(
264
+ downloadsDir(),
265
+ `${model.id.replace(/[^a-zA-Z0-9._-]/g, "_")}.part`,
266
+ );
267
+ }
268
+
269
+ function huggingFaceResolveUrl(model: CatalogModel): string {
270
+ return buildHuggingFaceResolveUrl(model);
271
+ }
272
+
273
+ function shouldUseMobileDns(): boolean {
274
+ const platform = process.env.ELIZA_PLATFORM?.toLowerCase();
275
+ return platform === "android" || platform === "ios";
276
+ }
277
+
278
+ const mobileLookup: http.RequestOptions["lookup"] = (
279
+ hostname,
280
+ options,
281
+ callback,
282
+ ) => {
283
+ mobileDnsResolver.resolve4(hostname, (error, addresses) => {
284
+ if (error) {
285
+ callback(error, undefined as never, undefined as never);
286
+ return;
287
+ }
288
+ if (options.all) {
289
+ callback(
290
+ null,
291
+ addresses.map((address) => ({ address, family: 4 })),
292
+ undefined as never,
293
+ );
294
+ return;
295
+ }
296
+ callback(null, addresses[0], 4);
297
+ });
298
+ };
299
+
300
+ async function openDownloadResponse(
301
+ url: string,
302
+ headers: Record<string, string>,
303
+ signal: AbortSignal,
304
+ redirectCount = 0,
305
+ ): Promise<http.IncomingMessage> {
306
+ if (redirectCount > 5) {
307
+ throw new Error("Too many redirects while downloading model");
308
+ }
309
+
310
+ const parsed = new URL(url);
311
+ const transport = parsed.protocol === "http:" ? http : https;
312
+
313
+ return new Promise((resolve, reject) => {
314
+ const req = transport.get(
315
+ parsed,
316
+ {
317
+ headers,
318
+ lookup: shouldUseMobileDns() ? mobileLookup : undefined,
319
+ },
320
+ (response) => {
321
+ const statusCode = response.statusCode ?? 0;
322
+ const location = response.headers.location;
323
+ if (location && [301, 302, 303, 307, 308].includes(statusCode)) {
324
+ response.resume();
325
+ resolve(
326
+ openDownloadResponse(
327
+ new URL(location, parsed).toString(),
328
+ headers,
329
+ signal,
330
+ redirectCount + 1,
331
+ ),
332
+ );
333
+ return;
334
+ }
335
+ resolve(response);
336
+ },
337
+ );
338
+
339
+ const abort = () => {
340
+ req.destroy(new Error("Download cancelled"));
341
+ };
342
+ if (signal.aborted) {
343
+ abort();
344
+ return;
345
+ }
346
+ signal.addEventListener("abort", abort, { once: true });
347
+ req.on("error", reject);
348
+ req.on("close", () => signal.removeEventListener("abort", abort));
349
+ });
350
+ }
351
+
352
+ async function ensureLocalInferenceDirs(): Promise<void> {
353
+ await fsp.mkdir(modelsDir(), { recursive: true });
354
+ await fsp.mkdir(downloadsDir(), { recursive: true });
355
+ }
356
+
357
+ async function readJsonFile<T>(filePath: string, fallback: T): Promise<T> {
358
+ try {
359
+ return JSON.parse(await fsp.readFile(filePath, "utf8")) as T;
360
+ } catch {
361
+ return fallback;
362
+ }
363
+ }
364
+
365
+ async function writeJsonFile(
366
+ filePath: string,
367
+ payload: unknown,
368
+ ): Promise<void> {
369
+ await fsp.mkdir(path.dirname(filePath), { recursive: true });
370
+ const tmp = `${filePath}.tmp`;
371
+ await fsp.writeFile(tmp, JSON.stringify(payload, null, 2), "utf8");
372
+ await fsp.rename(tmp, filePath);
373
+ }
374
+
375
+ async function hashFile(filePath: string): Promise<string> {
376
+ return new Promise((resolve, reject) => {
377
+ const hash = crypto.createHash("sha256");
378
+ const stream = fs.createReadStream(filePath, {
379
+ highWaterMark: 1024 * 1024,
380
+ });
381
+ stream.on("data", (chunk) => hash.update(chunk));
382
+ stream.on("end", () => resolve(hash.digest("hex")));
383
+ stream.on("error", reject);
384
+ });
385
+ }
386
+
387
+ async function isGgufFile(filePath: string): Promise<boolean> {
388
+ try {
389
+ const file = await fsp.open(filePath, "r");
390
+ try {
391
+ const buffer = Buffer.alloc(4);
392
+ await file.read(buffer, 0, 4, 0);
393
+ return buffer.toString("ascii") === "GGUF";
394
+ } finally {
395
+ await file.close();
396
+ }
397
+ } catch {
398
+ return false;
399
+ }
400
+ }
401
+
402
+ async function readRegistry(): Promise<InstalledModel[]> {
403
+ const registry = await readJsonFile<{
404
+ version?: number;
405
+ models?: InstalledModel[];
406
+ }>(registryPath(), { version: 1, models: [] });
407
+ const models = Array.isArray(registry.models) ? registry.models : [];
408
+ const installed: InstalledModel[] = [];
409
+ for (const model of models) {
410
+ if (!model.id || !model.path) continue;
411
+ try {
412
+ const stat = await fsp.stat(model.path);
413
+ if (stat.isFile()) installed.push({ ...model, sizeBytes: stat.size });
414
+ } catch {
415
+ // Ignore stale registry entries.
416
+ }
417
+ }
418
+ return installed;
419
+ }
420
+
421
+ async function writeRegistry(models: InstalledModel[]): Promise<void> {
422
+ await writeJsonFile(registryPath(), { version: 1, models });
423
+ }
424
+
425
+ async function upsertInstalledModel(model: InstalledModel): Promise<void> {
426
+ const current = await readRegistry();
427
+ await writeRegistry([
428
+ ...current.filter((entry) => entry.id !== model.id),
429
+ model,
430
+ ]);
431
+ }
432
+
433
+ async function removeInstalledModel(id: string): Promise<boolean> {
434
+ const current = await readRegistry();
435
+ const target = current.find((model) => model.id === id);
436
+ if (!target) return false;
437
+ await fsp.rm(target.path, { force: true });
438
+ await writeRegistry(current.filter((model) => model.id !== id));
439
+ return true;
440
+ }
441
+
442
+ async function readAssignments(): Promise<Assignments> {
443
+ const file = await readJsonFile<{ assignments?: Assignments }>(
444
+ assignmentsPath(),
445
+ {
446
+ assignments: {},
447
+ },
448
+ );
449
+ return file.assignments ?? {};
450
+ }
451
+
452
+ async function writeAssignments(
453
+ assignments: Assignments,
454
+ ): Promise<Assignments> {
455
+ await writeJsonFile(assignmentsPath(), { version: 1, assignments });
456
+ return assignments;
457
+ }
458
+
459
+ function defaultRoutingPreferences(): RoutingPreferencesFile {
460
+ return {
461
+ version: 1,
462
+ preferences: {
463
+ preferredProvider: {},
464
+ policy: {},
465
+ },
466
+ };
467
+ }
468
+
469
+ async function assignModel(
470
+ model: CatalogModel,
471
+ overwrite: boolean,
472
+ ): Promise<void> {
473
+ const assignments = await readAssignments();
474
+ if (model.role === "embedding") {
475
+ if (overwrite || !assignments.TEXT_EMBEDDING) {
476
+ assignments.TEXT_EMBEDDING = model.id;
477
+ }
478
+ } else if (model.role === "chat") {
479
+ if (overwrite || !assignments.TEXT_SMALL) assignments.TEXT_SMALL = model.id;
480
+ if (overwrite || !assignments.TEXT_LARGE) assignments.TEXT_LARGE = model.id;
481
+ if (overwrite || !assignments.TEXT_EMBEDDING) {
482
+ assignments.TEXT_EMBEDDING = model.id;
483
+ }
484
+ if (overwrite || !assignments.TEXT_TO_SPEECH) {
485
+ assignments.TEXT_TO_SPEECH = model.id;
486
+ }
487
+ if (overwrite || !assignments.TRANSCRIPTION) {
488
+ assignments.TRANSCRIPTION = model.id;
489
+ }
490
+ }
491
+ await writeAssignments(assignments);
492
+ }
493
+
494
+ async function ensureDefaultAssignment(model: CatalogModel): Promise<void> {
495
+ await assignModel(model, false);
496
+ }
497
+
498
+ async function downloadModel(
499
+ model: CatalogModel,
500
+ record: DownloadJob,
501
+ ): Promise<void> {
502
+ const abortController = activeDownloads.get(model.id)?.abortController;
503
+ if (!abortController) return;
504
+
505
+ const finalPath = finalModelPath(model);
506
+ const partialPath = stagingPath(model);
507
+ const existingPartial = await fsp
508
+ .stat(partialPath)
509
+ .then((stat) => (stat.isFile() ? stat.size : 0))
510
+ .catch(() => 0);
511
+
512
+ record.state = "downloading";
513
+ record.received = existingPartial;
514
+ record.updatedAt = new Date().toISOString();
515
+
516
+ try {
517
+ const headers: Record<string, string> = {
518
+ "user-agent": "Eliza-MobileLocalInference/1.0",
519
+ };
520
+ if (existingPartial > 0) headers.range = `bytes=${existingPartial}-`;
521
+ const response = await openDownloadResponse(
522
+ huggingFaceResolveUrl(model),
523
+ headers,
524
+ abortController.signal,
525
+ );
526
+ const statusCode = response.statusCode ?? 0;
527
+ if (statusCode < 200 || statusCode >= 300) {
528
+ throw new Error(`HTTP ${statusCode} ${response.statusMessage ?? ""}`);
529
+ }
530
+ const contentLength = Number.parseInt(
531
+ String(response.headers["content-length"] ?? "0"),
532
+ 10,
533
+ );
534
+ if (Number.isFinite(contentLength) && contentLength > 0) {
535
+ record.total = existingPartial + contentLength;
536
+ }
537
+
538
+ const stream = fs.createWriteStream(partialPath, {
539
+ flags: existingPartial > 0 ? "a" : "w",
540
+ });
541
+ let lastSampleAt = Date.now();
542
+ let lastSampleBytes = record.received;
543
+
544
+ try {
545
+ for await (const chunk of response) {
546
+ const value = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
547
+ if (!stream.write(Buffer.from(value))) {
548
+ await new Promise<void>((resolve) => stream.once("drain", resolve));
549
+ }
550
+ record.received += value.length;
551
+ const now = Date.now();
552
+ const elapsed = now - lastSampleAt;
553
+ if (elapsed >= 1000) {
554
+ record.bytesPerSec =
555
+ ((record.received - lastSampleBytes) * 1000) / elapsed;
556
+ record.etaMs =
557
+ record.bytesPerSec > 0
558
+ ? ((record.total - record.received) * 1000) / record.bytesPerSec
559
+ : null;
560
+ lastSampleAt = now;
561
+ lastSampleBytes = record.received;
562
+ record.updatedAt = new Date().toISOString();
563
+ }
564
+ }
565
+ } finally {
566
+ stream.end();
567
+ await new Promise<void>((resolve, reject) => {
568
+ stream.on("finish", resolve);
569
+ stream.on("error", reject);
570
+ });
571
+ }
572
+
573
+ await fsp.rename(partialPath, finalPath);
574
+ if (!(await isGgufFile(finalPath))) {
575
+ throw new Error("Downloaded file is not a valid GGUF");
576
+ }
577
+ const stat = await fsp.stat(finalPath);
578
+ const sha256 = await hashFile(finalPath);
579
+ await upsertInstalledModel({
580
+ id: model.id,
581
+ displayName: model.displayName,
582
+ path: finalPath,
583
+ sizeBytes: stat.size,
584
+ hfRepo: model.hfRepo,
585
+ installedAt: new Date().toISOString(),
586
+ lastUsedAt: null,
587
+ source: "eliza-download",
588
+ sha256,
589
+ lastVerifiedAt: new Date().toISOString(),
590
+ });
591
+ await ensureDefaultAssignment(model);
592
+
593
+ record.state = "completed";
594
+ record.received = stat.size;
595
+ record.total = stat.size;
596
+ record.updatedAt = new Date().toISOString();
597
+ } catch (error) {
598
+ if (abortController.signal.aborted) {
599
+ record.state = "cancelled";
600
+ } else {
601
+ record.state = "failed";
602
+ record.error = error instanceof Error ? error.message : String(error);
603
+ logger.warn(
604
+ `[local-inference] Download failed for ${model.id}: ${record.error}`,
605
+ );
606
+ }
607
+ record.updatedAt = new Date().toISOString();
608
+ } finally {
609
+ if (record.state !== "downloading") {
610
+ activeDownloads.delete(model.id);
611
+ }
612
+ }
613
+ }
614
+
615
+ async function startDownload(modelId: string): Promise<DownloadJob> {
616
+ const existing = activeDownloads.get(modelId);
617
+ if (existing) return { ...existing.job };
618
+ const model = CATALOG.find((entry) => entry.id === modelId);
619
+ if (!model) throw new Error(`Unknown model id: ${modelId}`);
620
+ await ensureLocalInferenceDirs();
621
+ const job: DownloadJob = {
622
+ jobId: crypto.randomUUID(),
623
+ modelId,
624
+ state: "queued",
625
+ received: 0,
626
+ total: Math.round(model.sizeGb * 1024 ** 3),
627
+ bytesPerSec: 0,
628
+ etaMs: null,
629
+ startedAt: new Date().toISOString(),
630
+ updatedAt: new Date().toISOString(),
631
+ };
632
+ activeDownloads.set(modelId, {
633
+ job,
634
+ abortController: new AbortController(),
635
+ });
636
+ void downloadModel(model, job);
637
+ return { ...job };
638
+ }
639
+
640
+ async function installedSnapshot(): Promise<InstalledModel[]> {
641
+ await ensureLocalInferenceDirs();
642
+ return readRegistry();
643
+ }
644
+
645
+ export async function getLocalInferenceActiveSnapshot(): Promise<{
646
+ modelId: string | null;
647
+ loadedAt: string | null;
648
+ status: "idle" | "loading" | "ready" | "error";
649
+ error?: string;
650
+ loadedContextSize?: number | null;
651
+ loadedCacheTypeK?: string | null;
652
+ loadedCacheTypeV?: string | null;
653
+ loadedGpuLayers?: number | null;
654
+ }> {
655
+ const serviceActive = localInferenceService.getActive();
656
+ if (serviceActive.status === "ready" && serviceActive.modelId) {
657
+ return serviceActive;
658
+ }
659
+ const aospActive = await readJsonFile<{
660
+ status?: string;
661
+ role?: string;
662
+ path?: string;
663
+ loadedAt?: string;
664
+ } | null>(aospActivePath(), null);
665
+ if (
666
+ aospActive?.status === "ready" &&
667
+ aospActive.role === "chat" &&
668
+ typeof aospActive.path === "string"
669
+ ) {
670
+ // aosp-active.json is the authoritative "a local chat model is loaded and
671
+ // serving in-process" signal for the agent-side path (ELIZA_LOCAL_LLAMA),
672
+ // written by plugin-aosp-local-inference when it loads the GGUF. Report
673
+ // ready off that file directly — do NOT gate it on the installed-models
674
+ // registry: a device can stage the GGUF without registering it (e.g. a
675
+ // pushed smoke model, or any direct install), and the model is loaded
676
+ // regardless. Resolve a friendly modelId from the registry when present,
677
+ // else fall back to the gguf filename so the snapshot stays meaningful.
678
+ const installed = (await installedSnapshot()).find(
679
+ (model) => model.path === aospActive.path,
680
+ );
681
+ return {
682
+ modelId:
683
+ installed?.id ?? path.basename(aospActive.path).replace(/\.gguf$/i, ""),
684
+ loadedAt:
685
+ typeof aospActive.loadedAt === "string" ? aospActive.loadedAt : null,
686
+ status: "ready",
687
+ };
688
+ }
689
+ const bridgeStatus = await getMobileDeviceBridgeApi()
690
+ .then((api) => api.getMobileDeviceBridgeStatus())
691
+ .catch(() => getMobileDeviceBridgeStatusUnavailable());
692
+ const loadedPath = bridgeStatus.devices.find((device) =>
693
+ Boolean(device.loadedPath),
694
+ )?.loadedPath;
695
+ if (!loadedPath) return activeModelState;
696
+ // A connected device bridge that reports a loadedPath has the GGUF loaded and
697
+ // serving on-device — that's "ready", same as the AOSP path above. Don't gate
698
+ // on the installed-models registry (a device may load a directly-staged
699
+ // model); resolve a friendly modelId from the registry when present, else the
700
+ // gguf filename.
701
+ const installed = (await installedSnapshot()).find(
702
+ (model) => model.path === loadedPath,
703
+ );
704
+ return {
705
+ modelId: installed?.id ?? path.basename(loadedPath).replace(/\.gguf$/i, ""),
706
+ loadedAt: activeModelState.loadedAt,
707
+ status: "ready",
708
+ };
709
+ }
710
+
711
+ async function hubSnapshot(): Promise<Record<string, unknown>> {
712
+ return {
713
+ catalog: CATALOG.filter((model) => !model.hiddenFromCatalog),
714
+ installed: await installedSnapshot(),
715
+ active: await getLocalInferenceActiveSnapshot(),
716
+ downloads: [...activeDownloads.values()].map(({ job }) => ({ ...job })),
717
+ hardware: {
718
+ totalRamGb: Math.round((os.totalmem() / 1024 ** 3) * 10) / 10,
719
+ freeRamGb: Math.round((os.freemem() / 1024 ** 3) * 10) / 10,
720
+ gpu: null,
721
+ cpuCores: os.cpus().length,
722
+ platform: process.platform,
723
+ arch: process.arch,
724
+ appleSilicon: process.platform === "darwin" && process.arch === "arm64",
725
+ recommendedBucket: "small",
726
+ source: "os-fallback",
727
+ },
728
+ assignments: await readAssignments(),
729
+ };
730
+ }
731
+
732
+ function chatModels(): CatalogModel[] {
733
+ return CATALOG.filter((model) => model.role === "chat");
734
+ }
735
+
736
+ function recommendedChatModel(): CatalogModel | null {
737
+ const totalRamGb = os.totalmem() / 1024 ** 3;
738
+ const candidates = chatModels()
739
+ .filter((model) => totalRamGb >= model.minRamGb)
740
+ .sort((left, right) => right.sizeGb - left.sizeGb);
741
+ return (
742
+ candidates[0] ?? chatModels().sort((a, b) => a.sizeGb - b.sizeGb)[0] ?? null
743
+ );
744
+ }
745
+
746
+ function isNoSpaceMessage(value: unknown): boolean {
747
+ const message =
748
+ value instanceof Error
749
+ ? value.message
750
+ : typeof value === "string"
751
+ ? value
752
+ : "";
753
+ return /\b(?:enospc|no space left|disk full|not enough (?:disk )?space|insufficient storage)\b/i.test(
754
+ message,
755
+ );
756
+ }
757
+
758
+ function formatBytes(bytes: number): string {
759
+ if (!Number.isFinite(bytes) || bytes <= 0) return "0 B";
760
+ const units = ["B", "KB", "MB", "GB", "TB"];
761
+ let value = bytes;
762
+ let unitIndex = 0;
763
+ while (value >= 1024 && unitIndex < units.length - 1) {
764
+ value /= 1024;
765
+ unitIndex += 1;
766
+ }
767
+ const precision = value >= 10 || unitIndex === 0 ? 0 : 1;
768
+ return `${value.toFixed(precision)} ${units[unitIndex]}`;
769
+ }
770
+
771
+ function progressForJob(
772
+ job: DownloadJob,
773
+ ): LocalInferenceChatMetadata["progress"] {
774
+ const percent =
775
+ job.total > 0
776
+ ? Math.max(0, Math.min(100, Math.round((job.received / job.total) * 100)))
777
+ : undefined;
778
+ return {
779
+ ...(typeof percent === "number" ? { percent } : {}),
780
+ receivedBytes: job.received,
781
+ totalBytes: job.total,
782
+ ...(job.bytesPerSec > 0
783
+ ? { bytesPerSec: Math.round(job.bytesPerSec) }
784
+ : {}),
785
+ etaMs: job.etaMs,
786
+ };
787
+ }
788
+
789
+ function progressText(
790
+ progress: LocalInferenceChatMetadata["progress"] | undefined,
791
+ ): string {
792
+ if (!progress) return "";
793
+ const percent =
794
+ typeof progress.percent === "number" ? `${progress.percent}%` : "progress";
795
+ const total =
796
+ progress.totalBytes > 0 ? ` of ${formatBytes(progress.totalBytes)}` : "";
797
+ return `${percent} (${formatBytes(progress.receivedBytes)}${total})`;
798
+ }
799
+
800
+ function pickStatusLine(status: LocalInferenceChatMetadata["status"]): string {
801
+ const variants: Record<LocalInferenceChatMetadata["status"], string[]> = {
802
+ missing: [
803
+ "I do not have a local chat model installed yet.",
804
+ "Local chat is waiting on a model download.",
805
+ "There is no local chat model ready on this device.",
806
+ ],
807
+ downloading: [
808
+ "The local model is still downloading.",
809
+ "I am still pulling down the local model.",
810
+ "Local inference is waiting for the model download to finish.",
811
+ ],
812
+ loading: [
813
+ "The local model is loading now.",
814
+ "I am warming up the local model.",
815
+ "Local inference is still bringing the model online.",
816
+ ],
817
+ failed: [
818
+ "The local model setup hit an error.",
819
+ "Local inference failed before generation could start.",
820
+ "The local model is not ready because the last operation failed.",
821
+ ],
822
+ no_space: [
823
+ "The local model needs more disk space before it can finish.",
824
+ "Local inference is blocked because storage is full.",
825
+ "The model download cannot continue until some disk space is freed.",
826
+ ],
827
+ idle: [
828
+ "A local model is installed, but none is loaded right now.",
829
+ "Local inference is idle with an installed model available.",
830
+ "The local model is installed and waiting to be activated.",
831
+ ],
832
+ ready: [
833
+ "Local inference is ready.",
834
+ "The local model is loaded and ready.",
835
+ "On-device inference is online.",
836
+ ],
837
+ cancelled: [
838
+ "I cancelled the local model download.",
839
+ "The local download has been stopped.",
840
+ "Local model download cancelled.",
841
+ ],
842
+ routing: [
843
+ "I updated the inference routing.",
844
+ "The model routing preference is updated.",
845
+ "Inference routing has been changed.",
846
+ ],
847
+ };
848
+ const list = variants[status];
849
+ return list[Math.floor(Date.now() / 15_000) % list.length] ?? list[0];
850
+ }
851
+
852
+ function buildLocalInferenceChatResult(
853
+ metadata: LocalInferenceChatMetadata,
854
+ detail?: string,
855
+ ): LocalInferenceChatResult {
856
+ const progress = progressText(metadata.progress);
857
+ const parts = [
858
+ pickStatusLine(metadata.status),
859
+ metadata.modelId ? `Model: ${metadata.modelId}.` : "",
860
+ progress ? `Progress: ${progress}.` : "",
861
+ metadata.error ? `Error: ${metadata.error}` : "",
862
+ detail ?? "",
863
+ ].filter((part) => part.trim().length > 0);
864
+ return {
865
+ text: parts.join(" "),
866
+ localInference: metadata,
867
+ };
868
+ }
869
+
870
+ function resolveRequestedCatalogModel(prompt: string): CatalogModel | null {
871
+ const normalized = prompt.toLowerCase();
872
+ return (
873
+ chatModels().find((model) => {
874
+ const candidates = [
875
+ model.id,
876
+ model.displayName,
877
+ model.params,
878
+ model.bucket,
879
+ model.category,
880
+ ].map((value) => value.toLowerCase());
881
+ return candidates.some((candidate) => normalized.includes(candidate));
882
+ }) ?? null
883
+ );
884
+ }
885
+
886
+ async function resolveDefaultChatModel(
887
+ prompt: string,
888
+ ): Promise<CatalogModel | null> {
889
+ const requested = resolveRequestedCatalogModel(prompt);
890
+ if (requested) return requested;
891
+ const installed = await installedSnapshot();
892
+ const active = await getLocalInferenceActiveSnapshot();
893
+ const activeCatalog = active.modelId
894
+ ? CATALOG.find(
895
+ (model) => model.id === active.modelId && model.role === "chat",
896
+ )
897
+ : null;
898
+ if (activeCatalog) return activeCatalog;
899
+ const installedCatalog = installed
900
+ .map((entry) =>
901
+ CATALOG.find((model) => model.id === entry.id && model.role === "chat"),
902
+ )
903
+ .filter((model): model is CatalogModel => Boolean(model))
904
+ .sort((a, b) => a.sizeGb - b.sizeGb)[0];
905
+ return installedCatalog ?? recommendedChatModel();
906
+ }
907
+
908
+ async function setRoutingForChat(provider: string): Promise<void> {
909
+ const current = await readJsonFile<RoutingPreferencesFile>(
910
+ routingPath(),
911
+ defaultRoutingPreferences(),
912
+ );
913
+ const preferences = current.preferences;
914
+ for (const slot of LOCAL_INFERENCE_TEXT_MODEL_TYPES) {
915
+ preferences.preferredProvider[slot] = provider;
916
+ preferences.policy[slot] = "manual";
917
+ }
918
+ await writeJsonFile(routingPath(), { version: 1, preferences });
919
+ }
920
+
921
+ async function activateInstalledModel(
922
+ installed: InstalledModel,
923
+ ): Promise<LocalInferenceChatResult> {
924
+ activeModelState = {
925
+ modelId: installed.id,
926
+ loadedAt: null,
927
+ status: "loading",
928
+ };
929
+ try {
930
+ const { loadMobileDeviceBridgeModel } = await getMobileDeviceBridgeApi();
931
+ await loadMobileDeviceBridgeModel(installed.path, installed.id);
932
+ activeModelState = {
933
+ modelId: installed.id,
934
+ loadedAt: new Date().toISOString(),
935
+ status: "ready",
936
+ };
937
+ return buildLocalInferenceChatResult({
938
+ intent: "use_local",
939
+ status: "ready",
940
+ modelId: installed.id,
941
+ activeModelId: installed.id,
942
+ provider: "capacitor-llama",
943
+ });
944
+ } catch (error) {
945
+ const message = error instanceof Error ? error.message : String(error);
946
+ activeModelState = {
947
+ modelId: installed.id,
948
+ loadedAt: null,
949
+ status: "error",
950
+ error: message,
951
+ };
952
+ return buildLocalInferenceChatResult({
953
+ intent: "use_local",
954
+ status: isNoSpaceMessage(message) ? "no_space" : "failed",
955
+ modelId: installed.id,
956
+ activeModelId: null,
957
+ error: message,
958
+ });
959
+ }
960
+ }
961
+
962
+ export async function getLocalInferenceChatStatus(
963
+ intent: LocalInferenceCommandIntent = "status",
964
+ error?: unknown,
965
+ ): Promise<LocalInferenceChatResult> {
966
+ const activeDownload = [...activeDownloads.values()]
967
+ .map(({ job }) => ({ ...job }))
968
+ .find((job) => job.state === "queued" || job.state === "downloading");
969
+ if (activeDownload) {
970
+ return buildLocalInferenceChatResult({
971
+ intent,
972
+ status: "downloading",
973
+ modelId: activeDownload.modelId,
974
+ activeModelId: activeModelState.modelId,
975
+ progress: progressForJob(activeDownload),
976
+ });
977
+ }
978
+
979
+ const active = await getLocalInferenceActiveSnapshot();
980
+ if (activeModelState.status === "loading") {
981
+ return buildLocalInferenceChatResult({
982
+ intent,
983
+ status: "loading",
984
+ modelId: activeModelState.modelId,
985
+ activeModelId: active.modelId,
986
+ });
987
+ }
988
+
989
+ const errorMessage =
990
+ error instanceof Error
991
+ ? error.message
992
+ : typeof error === "string"
993
+ ? error
994
+ : activeModelState.error;
995
+ if (errorMessage) {
996
+ return buildLocalInferenceChatResult({
997
+ intent,
998
+ status: isNoSpaceMessage(errorMessage) ? "no_space" : "failed",
999
+ modelId: activeModelState.modelId,
1000
+ activeModelId: active.modelId,
1001
+ error: errorMessage,
1002
+ });
1003
+ }
1004
+
1005
+ if (active.status === "ready" && active.modelId) {
1006
+ const provider =
1007
+ localInferenceService.getActive().status === "ready"
1008
+ ? LOCAL_INFERENCE_PROVIDER_ID
1009
+ : "capacitor-llama";
1010
+ return buildLocalInferenceChatResult({
1011
+ intent,
1012
+ status: "ready",
1013
+ modelId: active.modelId,
1014
+ activeModelId: active.modelId,
1015
+ provider,
1016
+ });
1017
+ }
1018
+
1019
+ const installed = await installedSnapshot();
1020
+ const installedChat = installed.find((entry) =>
1021
+ CATALOG.some((model) => model.id === entry.id && model.role === "chat"),
1022
+ );
1023
+ if (installedChat) {
1024
+ return buildLocalInferenceChatResult({
1025
+ intent,
1026
+ status: "idle",
1027
+ modelId: installedChat.id,
1028
+ activeModelId: active.modelId,
1029
+ });
1030
+ }
1031
+
1032
+ return buildLocalInferenceChatResult({
1033
+ intent,
1034
+ status: "missing",
1035
+ modelId: null,
1036
+ activeModelId: active.modelId,
1037
+ });
1038
+ }
1039
+
1040
+ export async function handleLocalInferenceChatCommand(
1041
+ intent: LocalInferenceCommandIntent,
1042
+ prompt: string,
1043
+ ): Promise<LocalInferenceChatResult> {
1044
+ if (intent === "status") {
1045
+ return getLocalInferenceChatStatus(intent);
1046
+ }
1047
+
1048
+ if (intent === "cancel") {
1049
+ const requested = resolveRequestedCatalogModel(prompt);
1050
+ const targets = requested ? [requested.id] : [...activeDownloads.keys()];
1051
+ for (const modelId of targets) {
1052
+ activeDownloads.get(modelId)?.abortController.abort();
1053
+ activeDownloads.delete(modelId);
1054
+ }
1055
+ return buildLocalInferenceChatResult({
1056
+ intent,
1057
+ status: "cancelled",
1058
+ modelId: requested?.id ?? targets[0] ?? null,
1059
+ activeModelId: activeModelState.modelId,
1060
+ });
1061
+ }
1062
+
1063
+ if (intent === "use_cloud") {
1064
+ await setRoutingForChat("elizacloud");
1065
+ return buildLocalInferenceChatResult(
1066
+ {
1067
+ intent,
1068
+ status: "routing",
1069
+ modelId: activeModelState.modelId,
1070
+ activeModelId: activeModelState.modelId,
1071
+ provider: "elizacloud",
1072
+ },
1073
+ "Subsequent chat model calls will prefer Eliza Cloud.",
1074
+ );
1075
+ }
1076
+
1077
+ if (intent === "use_local") {
1078
+ await setRoutingForChat("capacitor-llama");
1079
+ const installed = await installedSnapshot();
1080
+ const requested = await resolveDefaultChatModel(prompt);
1081
+ const installedModel = installed.find(
1082
+ (entry) => entry.id === requested?.id,
1083
+ );
1084
+ if (installedModel) {
1085
+ return activateInstalledModel(installedModel);
1086
+ }
1087
+ if (requested) {
1088
+ const job = await startDownload(requested.id);
1089
+ return buildLocalInferenceChatResult(
1090
+ {
1091
+ intent: "download",
1092
+ status: "downloading",
1093
+ modelId: requested.id,
1094
+ activeModelId: activeModelState.modelId,
1095
+ provider: "capacitor-llama",
1096
+ progress: progressForJob(job),
1097
+ },
1098
+ "I also set chat routing to prefer local inference.",
1099
+ );
1100
+ }
1101
+ return getLocalInferenceChatStatus(intent);
1102
+ }
1103
+
1104
+ if (intent === "switch_smaller") {
1105
+ const active = await getLocalInferenceActiveSnapshot();
1106
+ const installed = await installedSnapshot();
1107
+ const activeCatalog = active.modelId
1108
+ ? CATALOG.find((model) => model.id === active.modelId)
1109
+ : null;
1110
+ const smallerInstalled = installed
1111
+ .map((entry) => ({
1112
+ entry,
1113
+ catalog: CATALOG.find(
1114
+ (model) => model.id === entry.id && model.role === "chat",
1115
+ ),
1116
+ }))
1117
+ .filter(
1118
+ (entry): entry is { entry: InstalledModel; catalog: CatalogModel } => {
1119
+ const catalog = entry.catalog;
1120
+ if (!catalog) return false;
1121
+ return !activeCatalog || catalog.sizeGb < activeCatalog.sizeGb;
1122
+ },
1123
+ )
1124
+ .sort((a, b) => a.catalog.sizeGb - b.catalog.sizeGb)[0];
1125
+ if (smallerInstalled) {
1126
+ return activateInstalledModel(smallerInstalled.entry);
1127
+ }
1128
+ const smallest = chatModels().sort((a, b) => a.sizeGb - b.sizeGb)[0];
1129
+ if (smallest) {
1130
+ const job = await startDownload(smallest.id);
1131
+ return buildLocalInferenceChatResult(
1132
+ {
1133
+ intent,
1134
+ status: "downloading",
1135
+ modelId: smallest.id,
1136
+ activeModelId: active.modelId,
1137
+ progress: progressForJob(job),
1138
+ },
1139
+ "I could not switch to a smaller installed model, so I started the smallest local chat model download.",
1140
+ );
1141
+ }
1142
+ }
1143
+
1144
+ const model = await resolveDefaultChatModel(prompt);
1145
+ if (!model) {
1146
+ return getLocalInferenceChatStatus(intent);
1147
+ }
1148
+ if (intent === "redownload") {
1149
+ await removeInstalledModel(model.id).catch(() => false);
1150
+ }
1151
+ const job = await startDownload(model.id);
1152
+ return buildLocalInferenceChatResult({
1153
+ intent,
1154
+ status: "downloading",
1155
+ modelId: model.id,
1156
+ activeModelId: activeModelState.modelId,
1157
+ progress: progressForJob(job),
1158
+ });
1159
+ }
1160
+
1161
+ function writeSse(res: http.ServerResponse, payload: unknown): void {
1162
+ res.write(`data: ${JSON.stringify(payload)}\n\n`);
1163
+ }
1164
+
1165
+ export async function handleLocalInferenceRoutes(
1166
+ req: http.IncomingMessage,
1167
+ res: http.ServerResponse,
1168
+ ): Promise<boolean> {
1169
+ const method = (req.method ?? "GET").toUpperCase();
1170
+ const url = new URL(req.url ?? "/", "http://localhost");
1171
+ const pathname = url.pathname;
1172
+ // Co-located voice-first-run namespace — runs alongside local-inference
1173
+ // so the existing /api/local-inference/* mount point in server.ts also
1174
+ // catches /api/voice/first-run/* without a second wire-up.
1175
+ if (pathname.startsWith("/api/voice/first-run/")) {
1176
+ const { handleVoiceFirstRunRoutes } = await import(
1177
+ "./routes/voice-first-run-routes.js"
1178
+ );
1179
+ if (await handleVoiceFirstRunRoutes(req, res)) return true;
1180
+ }
1181
+ // Family-member capture route lives under /v1/voice/first-run/family-member.
1182
+ if (pathname === "/v1/voice/first-run/family-member") {
1183
+ const { handleFamilyMemberRoute } = await import(
1184
+ "./routes/family-member-route.js"
1185
+ );
1186
+ if (await handleFamilyMemberRoute(req, res)) return true;
1187
+ }
1188
+ // Speaker voice-profile binding routes (bind/unbind a recognized voice to
1189
+ // an elizaOS entity) live under /v1/voice/speaker-profiles.
1190
+ if (pathname.startsWith("/v1/voice/speaker-profiles")) {
1191
+ const { handleVoiceSpeakerProfileRoutes } = await import(
1192
+ "./routes/voice-speaker-profile-routes.js"
1193
+ );
1194
+ if (await handleVoiceSpeakerProfileRoutes(req, res)) return true;
1195
+ }
1196
+ if (!pathname.startsWith("/api/local-inference/")) return false;
1197
+
1198
+ // Voice-sub-model auto-updater compat namespace
1199
+ // (R5-versioning §3 + §4 + §5). The route module owns its own
1200
+ // path-prefix check and returns false on miss so non-voice-model
1201
+ // /api/local-inference/* paths fall through to the handlers below.
1202
+ if (pathname.startsWith("/api/local-inference/voice-models")) {
1203
+ const { handleVoiceModelsRoutes } = await import(
1204
+ "./routes/voice-models-routes.js"
1205
+ );
1206
+ if (await handleVoiceModelsRoutes(req, res)) return true;
1207
+ }
1208
+
1209
+ if (
1210
+ method === "GET" &&
1211
+ pathname === "/api/local-inference/downloads/stream"
1212
+ ) {
1213
+ res.writeHead(200, {
1214
+ "Content-Type": "text/event-stream",
1215
+ "Cache-Control": "no-cache, no-transform",
1216
+ Connection: "keep-alive",
1217
+ });
1218
+ const interval = setInterval(() => {
1219
+ writeSse(res, {
1220
+ type: "snapshot",
1221
+ downloads: [...activeDownloads.values()].map(({ job }) => ({ ...job })),
1222
+ });
1223
+ }, 1000);
1224
+ interval.unref();
1225
+ writeSse(res, {
1226
+ type: "snapshot",
1227
+ downloads: [...activeDownloads.values()].map(({ job }) => ({ ...job })),
1228
+ });
1229
+ req.on("close", () => clearInterval(interval));
1230
+ return true;
1231
+ }
1232
+
1233
+ if (method === "GET" && pathname === "/api/local-inference/hub") {
1234
+ sendJson(res, await hubSnapshot());
1235
+ return true;
1236
+ }
1237
+ if (method === "GET" && pathname === "/api/local-inference/hardware") {
1238
+ sendJson(res, (await hubSnapshot()).hardware);
1239
+ return true;
1240
+ }
1241
+ if (method === "GET" && pathname === "/api/local-inference/catalog") {
1242
+ sendJson(res, {
1243
+ models: CATALOG.filter((model) => !model.hiddenFromCatalog),
1244
+ });
1245
+ return true;
1246
+ }
1247
+ if (method === "GET" && pathname === "/api/local-inference/installed") {
1248
+ sendJson(res, { models: await installedSnapshot() });
1249
+ return true;
1250
+ }
1251
+ if (method === "GET" && pathname === "/api/local-inference/device") {
1252
+ const bridge = await getMobileDeviceBridgeApi()
1253
+ .then((api) => api.getMobileDeviceBridgeStatus())
1254
+ .catch(() => getMobileDeviceBridgeStatusUnavailable());
1255
+ sendJson(res, bridge);
1256
+ return true;
1257
+ }
1258
+ if (method === "GET" && pathname === "/api/local-inference/providers") {
1259
+ const bridge = await getMobileDeviceBridgeApi()
1260
+ .then((api) => api.getMobileDeviceBridgeStatus())
1261
+ .catch(() => getMobileDeviceBridgeStatusUnavailable());
1262
+ const installed = await installedSnapshot();
1263
+ sendJson(res, {
1264
+ providers: [
1265
+ {
1266
+ id: "capacitor-llama",
1267
+ label: "Eliza-1 on-device runtime (mobile)",
1268
+ kind: "local",
1269
+ description: "Runs Eliza-1 natively on iOS or Android via Capacitor.",
1270
+ supportedSlots: ["TEXT_SMALL", "TEXT_LARGE", "TEXT_EMBEDDING"],
1271
+ configureHref: null,
1272
+ enableState: {
1273
+ enabled: bridge.connected,
1274
+ reason: bridge.connected
1275
+ ? "Device bridge connected"
1276
+ : "Waiting for device bridge",
1277
+ },
1278
+ registeredSlots: ["TEXT_SMALL", "TEXT_LARGE", "TEXT_EMBEDDING"],
1279
+ },
1280
+ {
1281
+ id: LOCAL_INFERENCE_PROVIDER_ID,
1282
+ label: "Eliza-1 local inference",
1283
+ kind: "local",
1284
+ description:
1285
+ "Eliza-1 bundles installed in this agent state directory.",
1286
+ supportedSlots: LOCAL_INFERENCE_MODEL_TYPES,
1287
+ configureHref: "#local-inference-panel",
1288
+ enableState: {
1289
+ enabled: installed.length > 0,
1290
+ reason:
1291
+ installed.length > 0
1292
+ ? "Eliza-1 bundle installed"
1293
+ : "No Eliza-1 bundle installed",
1294
+ },
1295
+ registeredSlots:
1296
+ installed.length > 0 ? LOCAL_INFERENCE_MODEL_TYPES : [],
1297
+ },
1298
+ ],
1299
+ });
1300
+ return true;
1301
+ }
1302
+ if (method === "GET" && pathname === "/api/local-inference/assignments") {
1303
+ sendJson(res, { assignments: await readAssignments() });
1304
+ return true;
1305
+ }
1306
+ if (method === "POST" && pathname === "/api/local-inference/assignments") {
1307
+ const body = await readJsonBody<Record<string, unknown>>(req, res);
1308
+ if (!body) return true;
1309
+ const slot = typeof body.slot === "string" ? body.slot : null;
1310
+ if (!slot) {
1311
+ sendJsonError(res, "slot is required");
1312
+ return true;
1313
+ }
1314
+ const assignments = await readAssignments();
1315
+ if (typeof body.modelId === "string" && body.modelId.trim()) {
1316
+ assignments[slot as keyof Assignments] = body.modelId.trim();
1317
+ } else {
1318
+ delete assignments[slot as keyof Assignments];
1319
+ }
1320
+ sendJson(res, { assignments: await writeAssignments(assignments) });
1321
+ return true;
1322
+ }
1323
+ if (method === "GET" && pathname === "/api/local-inference/routing") {
1324
+ const preferences = await readJsonFile<RoutingPreferencesFile>(
1325
+ routingPath(),
1326
+ defaultRoutingPreferences(),
1327
+ );
1328
+ sendJson(res, {
1329
+ registrations: LOCAL_INFERENCE_MODEL_TYPES.map((modelType) => ({
1330
+ modelType,
1331
+ provider: LOCAL_INFERENCE_PROVIDER_ID,
1332
+ priority: 0,
1333
+ registeredAt: new Date().toISOString(),
1334
+ })),
1335
+ preferences: preferences.preferences,
1336
+ });
1337
+ return true;
1338
+ }
1339
+ if (
1340
+ method === "POST" &&
1341
+ (pathname === "/api/local-inference/routing/preferred" ||
1342
+ pathname === "/api/local-inference/routing/policy")
1343
+ ) {
1344
+ const body = await readJsonBody<Record<string, unknown>>(req, res);
1345
+ if (!body || typeof body.slot !== "string") {
1346
+ sendJsonError(res, "slot is required");
1347
+ return true;
1348
+ }
1349
+ const current = await readJsonFile<RoutingPreferencesFile>(
1350
+ routingPath(),
1351
+ defaultRoutingPreferences(),
1352
+ );
1353
+ const preferences = current.preferences;
1354
+ const slot = body.slot;
1355
+ if (pathname.endsWith("/preferred")) {
1356
+ if (typeof body.provider === "string" && body.provider.trim()) {
1357
+ preferences.preferredProvider[slot] = body.provider.trim();
1358
+ } else {
1359
+ delete preferences.preferredProvider[slot];
1360
+ }
1361
+ } else if (typeof body.policy === "string" && body.policy.trim()) {
1362
+ preferences.policy[slot] = body.policy.trim();
1363
+ } else {
1364
+ delete preferences.policy[slot];
1365
+ }
1366
+ await writeJsonFile(routingPath(), { version: 1, preferences });
1367
+ sendJson(res, { preferences });
1368
+ return true;
1369
+ }
1370
+ if (method === "POST" && pathname === "/api/local-inference/downloads") {
1371
+ const body = await readJsonBody<Record<string, unknown>>(req, res);
1372
+ if (!body) return true;
1373
+ const modelId = typeof body.modelId === "string" ? body.modelId : null;
1374
+ if (!modelId) {
1375
+ sendJsonError(res, "modelId is required");
1376
+ return true;
1377
+ }
1378
+ try {
1379
+ sendJson(res, { job: await startDownload(modelId) }, 202);
1380
+ } catch (error) {
1381
+ sendJsonError(
1382
+ res,
1383
+ error instanceof Error ? error.message : "Failed to start download",
1384
+ 400,
1385
+ );
1386
+ }
1387
+ return true;
1388
+ }
1389
+ const downloadMatch = /^\/api\/local-inference\/downloads\/([^/]+)$/.exec(
1390
+ pathname,
1391
+ );
1392
+ if (method === "DELETE" && downloadMatch) {
1393
+ const modelId = decodeURIComponent(downloadMatch[1] ?? "");
1394
+ activeDownloads.get(modelId)?.abortController.abort();
1395
+ activeDownloads.delete(modelId);
1396
+ sendJson(res, { cancelled: true });
1397
+ return true;
1398
+ }
1399
+ if (method === "GET" && pathname === "/api/local-inference/active") {
1400
+ sendJson(res, await getLocalInferenceActiveSnapshot());
1401
+ return true;
1402
+ }
1403
+ if (method === "POST" && pathname === "/api/local-inference/active") {
1404
+ const body = await readJsonBody<Record<string, unknown>>(req, res);
1405
+ if (!body || typeof body.modelId !== "string") {
1406
+ sendJsonError(res, "modelId is required");
1407
+ return true;
1408
+ }
1409
+ const installed = (await installedSnapshot()).find(
1410
+ (model) => model.id === body.modelId,
1411
+ );
1412
+ if (!installed) {
1413
+ sendJsonError(res, `Model not installed: ${body.modelId}`, 404);
1414
+ return true;
1415
+ }
1416
+ // #7679: refuse to activate a candidate-only / weights-staged bundle
1417
+ // whose manifest reports `evals.textEval.passed=false`. Runs before
1418
+ // any assignment write or device-bridge load so a known-bad bundle
1419
+ // can't take over the assignment slots nor leave the bridge holding
1420
+ // a half-loaded model. The gate only fires for tiers whose
1421
+ // `eliza-1.manifest.json` is reachable next to the installed bundle
1422
+ // (see `defaultManifestLoader`); external-scan / non-bundle installs
1423
+ // are passed through.
1424
+ try {
1425
+ assertManifestEvalsPassed(installed);
1426
+ } catch (err) {
1427
+ if (err instanceof CandidateModelActivationError) {
1428
+ sendJson(
1429
+ res,
1430
+ {
1431
+ error: err.message,
1432
+ modelId: err.modelId,
1433
+ manifestVersion: err.manifestVersion,
1434
+ failedEvals: err.failedEvals,
1435
+ },
1436
+ 422,
1437
+ );
1438
+ return true;
1439
+ }
1440
+ throw err;
1441
+ }
1442
+ const catalog = CATALOG.find((model) => model.id === installed.id);
1443
+ if (catalog) await assignModel(catalog, true);
1444
+ try {
1445
+ activeModelState = {
1446
+ modelId: installed.id,
1447
+ loadedAt: null,
1448
+ status: "loading",
1449
+ };
1450
+ if (shouldUseAospLocalInference()) {
1451
+ const { activateAospLocalInferenceModel, buildAospLoadModelArgs } =
1452
+ await getAospLocalInferenceApi();
1453
+ activeModelState = await activateAospLocalInferenceModel({
1454
+ modelId: installed.id,
1455
+ modelPath: installed.path,
1456
+ loadArgs: buildAospLoadModelArgs("chat", installed.path),
1457
+ });
1458
+ sendJson(res, activeModelState);
1459
+ void prewarmLocalVoiceStackForModel(installed.id);
1460
+ return true;
1461
+ }
1462
+ const { loadMobileDeviceBridgeModel } = await getMobileDeviceBridgeApi();
1463
+ await loadMobileDeviceBridgeModel(installed.path, installed.id);
1464
+ activeModelState = {
1465
+ modelId: installed.id,
1466
+ loadedAt: new Date().toISOString(),
1467
+ status: "ready",
1468
+ };
1469
+ sendJson(res, activeModelState);
1470
+ void prewarmLocalVoiceStackForModel(installed.id);
1471
+ } catch (error) {
1472
+ activeModelState = {
1473
+ modelId: installed.id,
1474
+ loadedAt: null,
1475
+ status: "error",
1476
+ error: error instanceof Error ? error.message : String(error),
1477
+ };
1478
+ sendJsonError(
1479
+ res,
1480
+ error instanceof Error ? error.message : "Failed to load model",
1481
+ 503,
1482
+ );
1483
+ }
1484
+ return true;
1485
+ }
1486
+ if (method === "DELETE" && pathname === "/api/local-inference/active") {
1487
+ try {
1488
+ if (shouldUseAospLocalInference()) {
1489
+ const { clearAospLocalInferenceModel } =
1490
+ await getAospLocalInferenceApi();
1491
+ activeModelState = await clearAospLocalInferenceModel();
1492
+ sendJson(res, activeModelState);
1493
+ return true;
1494
+ }
1495
+ const { unloadMobileDeviceBridgeModel } =
1496
+ await getMobileDeviceBridgeApi();
1497
+ await unloadMobileDeviceBridgeModel();
1498
+ activeModelState = { modelId: null, loadedAt: null, status: "idle" };
1499
+ sendJson(res, activeModelState);
1500
+ } catch (error) {
1501
+ sendJsonError(
1502
+ res,
1503
+ error instanceof Error ? error.message : "Failed to unload model",
1504
+ 503,
1505
+ );
1506
+ }
1507
+ return true;
1508
+ }
1509
+ const verifyMatch =
1510
+ /^\/api\/local-inference\/installed\/([^/]+)\/verify$/.exec(pathname);
1511
+ if (method === "POST" && verifyMatch) {
1512
+ const id = decodeURIComponent(verifyMatch[1] ?? "");
1513
+ const installed = (await installedSnapshot()).find(
1514
+ (model) => model.id === id,
1515
+ );
1516
+ if (!installed) {
1517
+ sendJsonError(res, "Model not installed", 404);
1518
+ return true;
1519
+ }
1520
+ const currentSha256 = await hashFile(installed.path);
1521
+ sendJson(res, {
1522
+ state: currentSha256 === installed.sha256 ? "ok" : "unknown",
1523
+ currentSha256,
1524
+ expectedSha256: installed.sha256 ?? null,
1525
+ currentBytes: installed.sizeBytes,
1526
+ });
1527
+ return true;
1528
+ }
1529
+ const installedMatch = /^\/api\/local-inference\/installed\/([^/]+)$/.exec(
1530
+ pathname,
1531
+ );
1532
+ if (method === "DELETE" && installedMatch) {
1533
+ const id = decodeURIComponent(installedMatch[1] ?? "");
1534
+ sendJson(res, { removed: await removeInstalledModel(id) });
1535
+ return true;
1536
+ }
1537
+ if (method === "GET" && pathname === "/api/local-inference/hf-search") {
1538
+ sendJson(res, { models: [] });
1539
+ return true;
1540
+ }
1541
+
1542
+ return false;
1543
+ }