@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,1398 @@
1
+ /// <reference path="./capacitor-llama.d.ts" />
2
+
3
+ /**
4
+ * Registers the standalone llama.cpp engine as the runtime handler for
5
+ * `ModelType.TEXT_SMALL` and `ModelType.TEXT_LARGE`.
6
+ *
7
+ * Priority is 0 — same band as cloud and direct provider plugins. Tie-breaks
8
+ * between local and cloud are owned by the routing-policy layer
9
+ * (`router-handler.ts` + `routing-policy.ts`), not by this priority value:
10
+ * the router sits at MAX_SAFE_INTEGER and consults the user's policy
11
+ * (manual / cheapest / fastest / prefer-local / round-robin) on every call.
12
+ *
13
+ * Until the cuttlefish smoke landed this was -1 to "let cloud win by default,"
14
+ * but that conflated routing-policy (a user preference) with handler
15
+ * priority (a registration ordinal). The runtime's getModel() returns
16
+ * undefined when no priority-0 handler is registered, which manifested as
17
+ * "No handler found for delegate type: TEXT_SMALL" on AOSP builds where
18
+ * the AOSP local inference loader is the only provider. Both cloud-only and
19
+ * local-only deployments now have a registered priority-0 handler; the
20
+ * router decides which one fires per request.
21
+ *
22
+ * Parallels `ensure-text-to-speech-handler.ts` — same shape, same guards.
23
+ */
24
+
25
+ import { existsSync, linkSync, mkdirSync, symlinkSync } from "node:fs";
26
+ import path from "node:path";
27
+ import {
28
+ type AgentRuntime,
29
+ type GenerateTextParams,
30
+ type IAgentRuntime,
31
+ type ImageDescriptionParams,
32
+ type ImageDescriptionResult,
33
+ logger,
34
+ ModelType,
35
+ renderMessageHandlerStablePrefix,
36
+ type TextEmbeddingParams,
37
+ type TextToSpeechParams,
38
+ type TranscriptionParams,
39
+ type UUID,
40
+ } from "@elizaos/core";
41
+ import { LocalInferenceUnavailableError } from "../provider";
42
+ import {
43
+ type LocalInferenceLoader,
44
+ resolveLocalInferenceLoadArgs,
45
+ } from "../services/active-model";
46
+ import {
47
+ autoAssignAtBoot,
48
+ isEmbeddingModelId,
49
+ readEffectiveAssignments,
50
+ } from "../services/assignments";
51
+ import {
52
+ extractConversationId,
53
+ extractPromptCacheKey,
54
+ resolveLocalCacheKey,
55
+ } from "../services/cache-bridge";
56
+ import { deviceBridge } from "../services/device-bridge";
57
+ import { localInferenceEngine } from "../services/engine";
58
+ import { handlerRegistry } from "../services/handler-registry";
59
+ import { tryGetMemoryArbiter } from "../services/memory-arbiter";
60
+ import { listInstalledModels } from "../services/registry";
61
+ import { installRouterHandler } from "../services/router-handler";
62
+ import {
63
+ type ElizaHarnessSchema,
64
+ elizaHarnessSchemaFromSkeleton,
65
+ } from "../services/structured-output";
66
+ import type { AgentModelSlot } from "../services/types";
67
+ import { decodeMonoPcm16Wav, type TranscriptionAudio } from "../services/voice";
68
+ import { DEFAULT_MODELS_DIR } from "./embedding-manager-support";
69
+ import { EMBEDDING_PRESETS } from "./embedding-presets";
70
+ import { isLocalEmbeddingDisabledByEnv } from "./embedding-warmup-policy";
71
+
72
+ type GenerateTextHandler = (
73
+ runtime: IAgentRuntime,
74
+ params: GenerateTextParams,
75
+ ) => Promise<string>;
76
+
77
+ /**
78
+ * Embedding handler signature — accepts the same union the runtime hands
79
+ * to TEXT_EMBEDDING calls (`TextEmbeddingParams | string | null`) and
80
+ * returns the raw float vector.
81
+ */
82
+ type EmbeddingHandler = (
83
+ runtime: IAgentRuntime,
84
+ params: TextEmbeddingParams | string | null,
85
+ ) => Promise<number[]>;
86
+
87
+ type TextToSpeechHandler = (
88
+ runtime: IAgentRuntime,
89
+ params: TextToSpeechParams | string,
90
+ ) => Promise<Uint8Array>;
91
+
92
+ type TranscriptionHandler = (
93
+ runtime: IAgentRuntime,
94
+ params: TranscriptionParams | Buffer | string | LocalTranscriptionParams,
95
+ ) => Promise<string>;
96
+
97
+ type ImageDescriptionHandler = (
98
+ runtime: IAgentRuntime,
99
+ params: ImageDescriptionParams | string,
100
+ ) => Promise<ImageDescriptionResult>;
101
+
102
+ interface LocalTranscriptionParams {
103
+ pcm?: Float32Array;
104
+ audio?: Uint8Array | ArrayBuffer | Buffer;
105
+ sampleRateHz?: number;
106
+ sampleRate?: number;
107
+ signal?: AbortSignal;
108
+ }
109
+
110
+ type LocalModelHandler =
111
+ | GenerateTextHandler
112
+ | EmbeddingHandler
113
+ | TextToSpeechHandler
114
+ | TranscriptionHandler
115
+ | ImageDescriptionHandler;
116
+
117
+ type RuntimeWithModelRegistration = AgentRuntime & {
118
+ getModel: (modelType: string | number) => LocalModelHandler | undefined;
119
+ registerModel: (
120
+ modelType: string | number,
121
+ handler: LocalModelHandler,
122
+ provider: string,
123
+ priority?: number,
124
+ ) => void;
125
+ };
126
+
127
+ const LOCAL_INFERENCE_PROVIDER = "eliza-local-inference";
128
+ const DEVICE_BRIDGE_PROVIDER = "eliza-device-bridge";
129
+ const CAPACITOR_LLAMA_PROVIDER = "capacitor-llama";
130
+ const AOSP_LLAMA_PROVIDER = "eliza-aosp-llama";
131
+ const LOCAL_INFERENCE_HANDLER_INSTALLED = Symbol.for(
132
+ "elizaos.local-inference.handlers-installed",
133
+ );
134
+ type RuntimeWithLocalInferenceFlag = RuntimeWithModelRegistration & {
135
+ [LOCAL_INFERENCE_HANDLER_INSTALLED]?: boolean;
136
+ };
137
+ /**
138
+ * Same band as cloud / direct provider plugins. Tie-breaks between
139
+ * candidates live in `routing-policy.ts`, not in this number — the
140
+ * router (registered at MAX_SAFE_INTEGER) consults the user's
141
+ * per-slot policy on every dispatch.
142
+ *
143
+ * Was -1 historically, which made `runtime.getModel(TEXT_SMALL)` return
144
+ * undefined when the AOSP local-inference loader was the only registered
145
+ * provider. The smoke run failed with "No handler found for delegate
146
+ * type: TEXT_SMALL"; bumping to 0 unblocks AOSP without changing
147
+ * cloud-only deployments (cloud providers still register at 0 and the
148
+ * routing-policy layer picks between them).
149
+ */
150
+ const LOCAL_INFERENCE_PRIORITY = 0;
151
+
152
+ export function shouldRegisterLocalInferenceHandlers(mode: string): boolean {
153
+ return mode === "local" || mode === "local-only";
154
+ }
155
+
156
+ function normalizeRuntimeMode(value: unknown): string | null {
157
+ if (typeof value !== "string") return null;
158
+ const normalized = value.trim().toLowerCase();
159
+ if (normalized === "local-safe" || normalized === "local-yolo")
160
+ return "local";
161
+ if (
162
+ normalized === "local" ||
163
+ normalized === "local-only" ||
164
+ normalized === "cloud" ||
165
+ normalized === "remote"
166
+ ) {
167
+ return normalized;
168
+ }
169
+ return null;
170
+ }
171
+
172
+ function getRuntimeMode(runtime: IAgentRuntime): string {
173
+ for (const key of [
174
+ "ELIZA_DEPLOYMENT_RUNTIME",
175
+ "ELIZA_RUNTIME_MODE",
176
+ "RUNTIME_MODE",
177
+ ] as const) {
178
+ const fromSetting = normalizeRuntimeMode(runtime.getSetting(key));
179
+ if (fromSetting) return fromSetting;
180
+ const fromEnv = normalizeRuntimeMode(process.env[key]);
181
+ if (fromEnv) return fromEnv;
182
+ }
183
+ if (
184
+ process.env.ELIZA_CLOUD_PROVISIONED === "1" ||
185
+ process.env.ELIZAOS_CLOUD_ENABLED === "1"
186
+ ) {
187
+ return "cloud";
188
+ }
189
+ return "local";
190
+ }
191
+
192
+ function getLoader(runtime: IAgentRuntime): LocalInferenceLoader | null {
193
+ const candidate = (
194
+ runtime as { getService?: (name: string) => unknown }
195
+ ).getService?.("localInferenceLoader");
196
+ if (!candidate || typeof candidate !== "object") return null;
197
+ const loader = candidate as Partial<LocalInferenceLoader>;
198
+ if (
199
+ typeof loader.loadModel === "function" &&
200
+ typeof loader.unloadModel === "function"
201
+ ) {
202
+ return candidate as LocalInferenceLoader;
203
+ }
204
+ return null;
205
+ }
206
+
207
+ /**
208
+ * Look up the model assigned to a given agent slot and ensure it's the
209
+ * one loaded before generation runs. Loads lazily on first call; swaps
210
+ * when a different slot's assignment fires with a different model.
211
+ *
212
+ * If no assignment is set for the slot, falls back to whatever is
213
+ * currently loaded — UNLESS the loaded model is an embedding model and
214
+ * this is a chat/generative slot. That combination produces `[unused{N}]`
215
+ * garbage (a BERT model forced to autoregress), so we fail loudly with an
216
+ * actionable message instead. See elizaOS/eliza#7687.
217
+ */
218
+ async function ensureAssignedModelLoaded(
219
+ loader: LocalInferenceLoader | null,
220
+ slot: AgentModelSlot,
221
+ ): Promise<void> {
222
+ const assignments = await readEffectiveAssignments();
223
+ const assignedId = assignments[slot];
224
+ if (!assignedId) {
225
+ // Loud-failure guard: an unassigned chat slot must not silently
226
+ // dispatch to whatever model happens to be loaded — if that's an
227
+ // embedding model, completion emits reserved-token garbage.
228
+ if (slot === "TEXT_SMALL" || slot === "TEXT_LARGE") {
229
+ const installed = await listInstalledModels();
230
+ const currentPath =
231
+ loader?.currentModelPath() ?? localInferenceEngine.currentModelPath();
232
+ const current = currentPath
233
+ ? installed.find((m) => m.path === currentPath)
234
+ : undefined;
235
+ if (current && isEmbeddingModelId(current.id)) {
236
+ throw new Error(
237
+ `[local-inference] No chat model assigned for slot ${slot} — open Settings → Local models. The currently-loaded model (${current.id}) is an embedding model and cannot serve text generation.`,
238
+ );
239
+ }
240
+ }
241
+ return;
242
+ }
243
+
244
+ // Desktop fast path: check the engine state directly.
245
+ if (!loader && localInferenceEngine.currentModelPath()) {
246
+ const installed = await listInstalledModels();
247
+ const current = installed.find(
248
+ (m) => m.path === localInferenceEngine.currentModelPath(),
249
+ );
250
+ if (current?.id === assignedId) return;
251
+ }
252
+
253
+ // Via loader: compare reported path against assignment.
254
+ if (loader) {
255
+ const currentPath = loader.currentModelPath();
256
+ if (currentPath) {
257
+ const installed = await listInstalledModels();
258
+ const current = installed.find((m) => m.path === currentPath);
259
+ if (current?.id === assignedId) return;
260
+ }
261
+ }
262
+
263
+ const installed = await listInstalledModels();
264
+ const target = installed.find((m) => m.id === assignedId);
265
+ if (!target) {
266
+ throw new Error(
267
+ `[local-inference] Slot ${slot} assigned to ${assignedId}, but that model is not installed.`,
268
+ );
269
+ }
270
+
271
+ if (loader) {
272
+ await loader.unloadModel();
273
+ await loader.loadModel(await resolveLocalInferenceLoadArgs(target));
274
+ } else {
275
+ await localInferenceEngine.load(target.path);
276
+ }
277
+ }
278
+
279
+ /**
280
+ * True when the caller opted this generation into *guided structured decode* —
281
+ * the deterministic-token prefill-plan short-circuit on top of the GBNF
282
+ * constrained decode. Off by default: needs either an explicit
283
+ * `providerOptions.eliza.guidedDecode === true` (the planner / message service
284
+ * sets this when it built a forced skeleton) or the process-wide
285
+ * `ELIZA_LOCAL_GUIDED_DECODE=1` opt-in.
286
+ */
287
+ function guidedDecodeRequested(params: GenerateTextParams): boolean {
288
+ const providerOptions = (params as { providerOptions?: unknown })
289
+ .providerOptions;
290
+ const elizaOpts =
291
+ providerOptions && typeof providerOptions === "object"
292
+ ? (providerOptions as { eliza?: { guidedDecode?: unknown } }).eliza
293
+ : undefined;
294
+ if (elizaOpts && elizaOpts.guidedDecode === true) return true;
295
+ const env = process.env.ELIZA_LOCAL_GUIDED_DECODE;
296
+ return env === "1" || env === "true";
297
+ }
298
+
299
+ /**
300
+ * Build the {@link ElizaHarnessSchema} for this call — the bundle of the
301
+ * forced skeleton, the pre-built grammar (when the producer supplied one), and
302
+ * the derived deterministic-token prefill plan. Returns undefined unless guided
303
+ * decode is requested AND a `responseSkeleton` (or explicit `grammar`) is
304
+ * present (schema presence == the off-by-default switch for the prefill plan).
305
+ */
306
+ function elizaHarnessSchemaFromParams(
307
+ params: GenerateTextParams,
308
+ ): ElizaHarnessSchema | undefined {
309
+ if (!guidedDecodeRequested(params)) return undefined;
310
+ const skeleton = params.responseSkeleton;
311
+ if (!skeleton) return undefined;
312
+ return elizaHarnessSchemaFromSkeleton({
313
+ skeleton,
314
+ grammar: typeof params.grammar === "string" ? params.grammar : undefined,
315
+ });
316
+ }
317
+
318
+ function extractThinkingControl(
319
+ providerOptions: unknown,
320
+ ): "auto" | "on" | "off" | undefined {
321
+ const elizaOpts =
322
+ providerOptions && typeof providerOptions === "object"
323
+ ? (providerOptions as { eliza?: { thinking?: unknown } }).eliza
324
+ : undefined;
325
+ const thinking = elizaOpts?.thinking;
326
+ return thinking === "auto" || thinking === "on" || thinking === "off"
327
+ ? thinking
328
+ : undefined;
329
+ }
330
+
331
+ /**
332
+ * Project a `GenerateTextParams` onto the engine's `GenerateArgs`, threading
333
+ * the structure-forcing extensions (`prefill`, `responseSkeleton`, `grammar`,
334
+ * `streamStructured`, `elizaSchema`) and wiring `onStreamChunk` to the engine's
335
+ * per-token `onTextChunk`. Cloud adapters ignore these fields; the local engine
336
+ * honours them (the forced-span / prefill / grammar / prefill-plan path is
337
+ * local-model-only).
338
+ */
339
+ function engineGenerateArgsFromParams(
340
+ params: GenerateTextParams,
341
+ cacheKey: string | undefined,
342
+ ): {
343
+ prompt: string;
344
+ stopSequences?: string[];
345
+ cacheKey?: string;
346
+ signal?: AbortSignal;
347
+ maxTokens?: number;
348
+ temperature?: number;
349
+ topP?: number;
350
+ prefill?: string;
351
+ responseSkeleton?: GenerateTextParams["responseSkeleton"];
352
+ grammar?: string;
353
+ streamStructured?: boolean;
354
+ elizaSchema?: ElizaHarnessSchema;
355
+ spanSamplerPlan?: GenerateTextParams["spanSamplerPlan"];
356
+ thinking?: "auto" | "on" | "off";
357
+ onTextChunk?: (chunk: string) => void | Promise<void>;
358
+ voiceOutput?: "user-visible" | "internal";
359
+ } {
360
+ const renderContent = (content: unknown): string => {
361
+ if (typeof content === "string") return content;
362
+ if (Array.isArray(content)) {
363
+ return content
364
+ .map((part) => {
365
+ if (typeof part === "string") return part;
366
+ if (
367
+ part &&
368
+ typeof part === "object" &&
369
+ typeof (part as { text?: unknown }).text === "string"
370
+ ) {
371
+ return (part as { text: string }).text;
372
+ }
373
+ return "";
374
+ })
375
+ .filter(Boolean)
376
+ .join("\n");
377
+ }
378
+ return "";
379
+ };
380
+ const promptFromSegments =
381
+ params.promptSegments && params.promptSegments.length > 0
382
+ ? params.promptSegments.map((segment) => segment.content).join("")
383
+ : "";
384
+ const promptFromMessages =
385
+ !promptFromSegments && params.messages && params.messages.length > 0
386
+ ? params.messages
387
+ .map((message) => {
388
+ const content = renderContent(message.content);
389
+ return content ? `${message.role}:\n${content}` : "";
390
+ })
391
+ .filter(Boolean)
392
+ .join("\n\n")
393
+ : "";
394
+ const streamStructured = params.streamStructured === true;
395
+ // Surface per-token chunks to the caller. The runtime passes the agent
396
+ // reply path's `onStreamChunk` here when it wants the LLM→TTS handoff —
397
+ // previously dropped at this layer. Only wire it when the caller asked
398
+ // for streaming (`stream` or `streamStructured`) so non-streaming callers
399
+ // don't pay the chunk-callback overhead.
400
+ const onTextChunk =
401
+ (params.stream === true || streamStructured) &&
402
+ typeof params.onStreamChunk === "function"
403
+ ? (chunk: string) => params.onStreamChunk?.(chunk)
404
+ : undefined;
405
+ return {
406
+ prompt: params.prompt ?? (promptFromSegments || promptFromMessages),
407
+ stopSequences: params.stopSequences,
408
+ cacheKey,
409
+ signal: params.signal,
410
+ maxTokens: params.maxTokens,
411
+ temperature: params.temperature,
412
+ topP: params.topP,
413
+ prefill: params.prefill,
414
+ responseSkeleton: params.responseSkeleton,
415
+ grammar: params.grammar,
416
+ streamStructured: streamStructured || undefined,
417
+ elizaSchema: elizaHarnessSchemaFromParams(params),
418
+ spanSamplerPlan: params.spanSamplerPlan,
419
+ thinking: extractThinkingControl(params.providerOptions),
420
+ onTextChunk,
421
+ voiceOutput:
422
+ params.voiceOutput ??
423
+ (typeof params.onStreamChunk === "function" ? "user-visible" : undefined),
424
+ };
425
+ }
426
+
427
+ function makeHandler(slot: AgentModelSlot): GenerateTextHandler {
428
+ return async (runtime, params) => {
429
+ const loader = getLoader(runtime);
430
+
431
+ // Lazy-load the assigned model for this slot, if any. Swaps are
432
+ // expensive; the user is expected to assign a small number of models.
433
+ await ensureAssignedModelLoaded(loader, slot);
434
+
435
+ // Resolve the strongest cache key the runtime can give us. Order of
436
+ // precedence (see `resolveLocalCacheKey`):
437
+ // 1. Conversation id — survives any prompt drift
438
+ // 2. Stable-prefix hash — survives unstable-tail timestamps
439
+ // 3. Provider plan hashes — back-compat
440
+ const providerOptions = (params as { providerOptions?: unknown })
441
+ .providerOptions;
442
+ const conversationId = extractConversationId(providerOptions);
443
+ const cacheKey =
444
+ resolveLocalCacheKey(providerOptions) ??
445
+ extractPromptCacheKey(providerOptions) ??
446
+ undefined;
447
+ const engineArgs = engineGenerateArgsFromParams(params, cacheKey);
448
+
449
+ // Prefer a runtime-registered loader that implements `generate` — that's
450
+ // the mobile / device-bridge path. On desktop we fall back to the
451
+ // standalone engine.
452
+ if (loader?.generate) {
453
+ return loader.generate(engineArgs);
454
+ }
455
+ if (!(await localInferenceEngine.available())) {
456
+ // No native binding: signal UNAVAILABLE (typed) so the cross-provider
457
+ // router skips local inference and falls back to a registered cloud/API
458
+ // provider, instead of hard-failing the whole turn.
459
+ throw new LocalInferenceUnavailableError(
460
+ slot,
461
+ "backend_unavailable",
462
+ `[local-inference] No llama.cpp binding available for ${slot} request`,
463
+ );
464
+ }
465
+ if (!localInferenceEngine.hasLoadedModel()) {
466
+ // No local model loaded: signal UNAVAILABLE (typed) so the router falls
467
+ // back to a registered cloud/API provider (e.g. Anthropic) when one
468
+ // exists, rather than hard-failing while a usable provider is present.
469
+ throw new LocalInferenceUnavailableError(
470
+ slot,
471
+ "backend_unavailable",
472
+ `[local-inference] No local model is active. Assign a model to ${slot} or activate one in Settings → Local models.`,
473
+ );
474
+ }
475
+
476
+ // Long-lived conversation? Open / reuse a registry handle so this
477
+ // turn lands on the same slot every time, regardless of prompt
478
+ // hash drift. The handle API additionally returns Anthropic-shape
479
+ // usage telemetry, which we surface at INFO once per generation.
480
+ if (conversationId) {
481
+ const modelId =
482
+ localInferenceEngine.currentModelPath() ?? "default-local-model";
483
+ const handle =
484
+ localInferenceEngine.conversation(conversationId, modelId) ??
485
+ localInferenceEngine.openConversation({
486
+ conversationId,
487
+ modelId,
488
+ });
489
+ const { cacheKey: _drop, ...convArgs } = engineArgs;
490
+ const result = await localInferenceEngine.generateInConversation(
491
+ handle,
492
+ convArgs,
493
+ );
494
+ // Per-generation usage log. Match the Anthropic plugin's
495
+ // observability surface so cloud and local share the same
496
+ // mental model. Cache hit rate is reported when input_tokens > 0.
497
+ const u = result.usage;
498
+ const hitRate =
499
+ u.cache_hit_rate !== undefined
500
+ ? `${Math.round(u.cache_hit_rate * 100)}%`
501
+ : "n/a";
502
+ const mtpRate =
503
+ typeof u.mtp_acceptance_rate === "number"
504
+ ? ` mtp=${Math.round(u.mtp_acceptance_rate * 100)}%`
505
+ : "";
506
+ logger.info(
507
+ `[local-inference] usage conv=${conversationId} slot=${result.slotId} in=${u.input_tokens} out=${u.output_tokens} cache_read=${u.cache_read_input_tokens} cache_create=${u.cache_creation_input_tokens} hit=${hitRate}${mtpRate}`,
508
+ );
509
+ // Auto-tune signal — emits a one-line warn if the high-water mark
510
+ // outgrew the configured slot count this turn. Cheap to call,
511
+ // and the warning is what the operator needs to see.
512
+ localInferenceEngine.warnIfParallelTooLow({ warn: logger.warn });
513
+ return result.text;
514
+ }
515
+
516
+ // No conversation context: fall through to the existing hash-based
517
+ // slot allocation. Doesn't break any caller that wasn't aware of
518
+ // conversation handles.
519
+ return localInferenceEngine.generate(engineArgs);
520
+ };
521
+ }
522
+
523
+ /**
524
+ * Normalize the runtime's TEXT_EMBEDDING input shape — `params` may be the
525
+ * structured `TextEmbeddingParams` (when called from a typed plugin), a
526
+ * raw string (when called from action runners), or `null` (an internal
527
+ * warmup probe used to size the shipped embedding vector).
528
+ */
529
+ function extractEmbeddingText(
530
+ params: TextEmbeddingParams | string | null,
531
+ ): string {
532
+ if (params === null) return "";
533
+ if (typeof params === "string") return params;
534
+ return params.text;
535
+ }
536
+
537
+ /**
538
+ * Build the TEXT_EMBEDDING handler. Mirrors `makeHandler` for generate:
539
+ * routes through the loader's `embed` if available, otherwise throws so
540
+ * the runtime falls back to a non-local provider rather than serving a
541
+ * silent zero-vector (Commandment 8: don't hide broken pipelines).
542
+ */
543
+ function makeEmbeddingHandler(): EmbeddingHandler {
544
+ return async (runtime, params) => {
545
+ const loader = getLoader(runtime);
546
+ if (!loader?.embed) {
547
+ throw new Error(
548
+ "[local-inference] Active loader does not implement embed; falling through to next provider",
549
+ );
550
+ }
551
+ // Embeddings in this runtime are not slot-aware — there's a single
552
+ // active model. Make sure the user's TEXT_EMBEDDING assignment, if
553
+ // any, is loaded before we hit the loader.
554
+ await ensureAssignedModelLoaded(loader, "TEXT_EMBEDDING");
555
+ const text = extractEmbeddingText(params);
556
+ const result = await loader.embed({ input: text });
557
+ return result.embedding;
558
+ };
559
+ }
560
+
561
+ interface DesktopEmbeddingConfig {
562
+ modelsDir: string;
563
+ model: string;
564
+ contextSize: number;
565
+ gpuLayers: number;
566
+ }
567
+
568
+ /**
569
+ * Resolve the desktop embedding model + load params from the same
570
+ * `LOCAL_EMBEDDING_*` env that `configureLocalEmbeddingPlugin` and the boot
571
+ * warmup set, falling back to the compact gte-small preset.
572
+ */
573
+ function resolveDesktopEmbeddingConfig(): DesktopEmbeddingConfig {
574
+ const preset = EMBEDDING_PRESETS.performance;
575
+ const modelsDir = process.env.MODELS_DIR?.trim() || DEFAULT_MODELS_DIR;
576
+ const model = process.env.LOCAL_EMBEDDING_MODEL?.trim() || preset.model;
577
+ const ctxEnv = Number(process.env.LOCAL_EMBEDDING_CONTEXT_SIZE);
578
+ const contextSize =
579
+ Number.isFinite(ctxEnv) && ctxEnv > 0 ? ctxEnv : preset.contextSize;
580
+ const gpuLayersEnv = process.env.LOCAL_EMBEDDING_GPU_LAYERS?.trim();
581
+ const gpuLayersNum = Number(gpuLayersEnv);
582
+ // "999 = all layers on GPU" per llama.cpp; the desktop adapter clamps to
583
+ // the model's metadata layer count, so "auto"/"max" map to 999.
584
+ const gpuLayers =
585
+ gpuLayersEnv === "auto" || gpuLayersEnv === "max"
586
+ ? 999
587
+ : Number.isFinite(gpuLayersNum)
588
+ ? gpuLayersNum
589
+ : 0;
590
+ return { modelsDir, model, contextSize, gpuLayers };
591
+ }
592
+
593
+ /**
594
+ * Resolve (or stage) the bundle root the fused `eliza_inference_embed` should
595
+ * anchor at for the dedicated embedding model. The fused C side embeds over the
596
+ * single GGUF under `<root>/text/`, so we must point it at an isolated bundle
597
+ * that contains ONLY the embedding model — never the chat bundle's text model
598
+ * (whose decoder-as-embedder output has a different dimension). Resolution:
599
+ * 1. `ELIZA_EMBED_BUNDLE_ROOT` — explicit override.
600
+ * 2. The model already lives under a `text/` dir (`<root>/text/<model>.gguf`).
601
+ * 3. `<modelsDir>/text/<model>` exists → anchor at `<modelsDir>`.
602
+ * 4. Otherwise STAGE the dedicated embedding GGUF as the sole entry under
603
+ * `<modelsDir>/.eliza-embed-bundle/text/` (hardlink, symlink fallback) so
604
+ * the fused lib loads gte-small (384-dim bi-encoder, SQL dim384) — the
605
+ * same model the retired libllama path used, now through the fused lib.
606
+ * Returns null only when the embedding GGUF is not present (boot warmup may
607
+ * still be downloading) — the handler then raises LocalInferenceUnavailable and
608
+ * the runtime falls through to the next embedding provider.
609
+ */
610
+ function resolveFusedEmbedBundleRoot(
611
+ cfg: DesktopEmbeddingConfig,
612
+ ): string | null {
613
+ const override = process.env.ELIZA_EMBED_BUNDLE_ROOT?.trim();
614
+ if (override && existsSync(path.join(override, "text"))) return override;
615
+ const modelPath = path.resolve(cfg.modelsDir, cfg.model);
616
+ const parent = path.dirname(modelPath);
617
+ if (path.basename(parent) === "text" && existsSync(modelPath)) {
618
+ return path.dirname(parent);
619
+ }
620
+ if (existsSync(path.join(cfg.modelsDir, "text", cfg.model))) {
621
+ return cfg.modelsDir;
622
+ }
623
+ if (!existsSync(modelPath)) return null;
624
+ const root = path.join(cfg.modelsDir, ".eliza-embed-bundle");
625
+ const textDir = path.join(root, "text");
626
+ const staged = path.join(textDir, path.basename(cfg.model));
627
+ try {
628
+ mkdirSync(textDir, { recursive: true });
629
+ if (!existsSync(staged)) {
630
+ try {
631
+ linkSync(modelPath, staged);
632
+ } catch {
633
+ symlinkSync(modelPath, staged);
634
+ }
635
+ }
636
+ return root;
637
+ } catch (err) {
638
+ logger.warn(
639
+ `[local-inference] could not stage the fused embed bundle for "${cfg.model}": ${String(err)}`,
640
+ );
641
+ return null;
642
+ }
643
+ }
644
+
645
+ /**
646
+ * Lazily-resolved fused embedding handle. When the fused `libelizainference`
647
+ * (ABI v9) is present, reports `embedSupported()`, and a `<root>/text/` bundle
648
+ * root resolves for the embedding model, the desktop TEXT_EMBEDDING handler
649
+ * computes embeddings through `eliza_inference_embed` over the fused handle's
650
+ * resident text vocab — retiring the node-llama-cpp / libllama embedding path.
651
+ * `null` once resolution fails (the handler then falls back).
652
+ */
653
+ let fusedEmbedHandlePromise: Promise<{
654
+ ffi: import("../services/voice/ffi-bindings").ElizaInferenceFfi;
655
+ ctx: import("../services/voice/ffi-bindings").ElizaInferenceContextHandle;
656
+ embed: NonNullable<
657
+ import("../services/voice/ffi-bindings").ElizaInferenceFfi["embed"]
658
+ >;
659
+ } | null> | null;
660
+
661
+ async function getFusedEmbeddingHandle(cfg: DesktopEmbeddingConfig): Promise<{
662
+ embed: (text: string) => Float32Array;
663
+ } | null> {
664
+ if (fusedEmbedHandlePromise === null) {
665
+ fusedEmbedHandlePromise = (async () => {
666
+ try {
667
+ require.resolve("bun:ffi");
668
+ } catch {
669
+ return null;
670
+ }
671
+ const { resolveFusedLibraryPath } = await import(
672
+ "../services/desktop-fused-ffi-backend-runtime"
673
+ );
674
+ const bundleRoot = resolveFusedEmbedBundleRoot(cfg);
675
+ if (!bundleRoot) return null;
676
+ const libPath = resolveFusedLibraryPath(bundleRoot);
677
+ if (!libPath) return null;
678
+ const { loadElizaInferenceFfi } = await import(
679
+ "../services/voice/ffi-bindings"
680
+ );
681
+ const ffi = loadElizaInferenceFfi(libPath);
682
+ if (
683
+ typeof ffi.embedSupported !== "function" ||
684
+ ffi.embedSupported() !== true ||
685
+ typeof ffi.embed !== "function"
686
+ ) {
687
+ ffi.close();
688
+ return null;
689
+ }
690
+ const ctx = ffi.create(bundleRoot);
691
+ logger.info(
692
+ `[local-inference] Desktop embeddings via fused libelizainference (eliza_inference_embed) anchored at ${bundleRoot} — node-llama-cpp embedding path retired`,
693
+ );
694
+ return { ffi, ctx, embed: ffi.embed };
695
+ })().catch(() => {
696
+ fusedEmbedHandlePromise = null;
697
+ return null;
698
+ });
699
+ }
700
+ const handle = await fusedEmbedHandlePromise;
701
+ if (!handle) return null;
702
+ // gte-small / BERT bi-encoders use MEAN pooling; a decoder-as-embedder
703
+ // (`--pooling last`) is selected via ELIZA_EMBED_POOLING=last.
704
+ const pooling =
705
+ process.env.ELIZA_EMBED_POOLING?.trim().toLowerCase() === "last" ? 3 : 1;
706
+ return {
707
+ embed: (text: string) => handle.embed({ ctx: handle.ctx, text, pooling }),
708
+ };
709
+ }
710
+
711
+ /**
712
+ * Desktop TEXT_EMBEDDING handler over the FUSED `libelizainference`
713
+ * (`eliza_inference_embed`, ABI v9). The dedicated embedding GGUF (gte-small,
714
+ * 384-dim — an exact match for plugin-sql's dim384 column) is staged as the
715
+ * sole entry of an isolated fused embed bundle (see `resolveFusedEmbedBundleRoot`)
716
+ * so the fused lib loads it directly. libllama is retired: there is no
717
+ * capacitor/libllama fallback. When the fused embed cannot resolve (no bun:ffi,
718
+ * no fused lib, or the embedding GGUF is still downloading) this throws so the
719
+ * runtime falls through to the operator-configured provider — never a silent
720
+ * zero-vector (Commandment 8).
721
+ */
722
+ function makeFusedEmbeddingHandler(): EmbeddingHandler {
723
+ return async (_runtime, params) => {
724
+ const text = extractEmbeddingText(params);
725
+ const cfg = resolveDesktopEmbeddingConfig();
726
+ const fused = await getFusedEmbeddingHandle(cfg);
727
+ if (!fused) {
728
+ throw new LocalInferenceUnavailableError(
729
+ ModelType.TEXT_EMBEDDING,
730
+ "backend_unavailable",
731
+ `[local-inference] TEXT_EMBEDDING unavailable: the fused libelizainference ` +
732
+ `embed path could not resolve for "${cfg.model}" (needs bun:ffi, the fused ` +
733
+ `lib, and the embedding GGUF present). libllama is retired — falling through ` +
734
+ `to the next embedding provider.`,
735
+ );
736
+ }
737
+ return Array.from(fused.embed(text));
738
+ };
739
+ }
740
+
741
+ function extractSpeechText(params: TextToSpeechParams | string): string {
742
+ if (typeof params === "string") return params;
743
+ if (params && typeof params.text === "string") return params.text;
744
+ throw new Error(
745
+ "[local-inference] TEXT_TO_SPEECH requires a string or { text } input",
746
+ );
747
+ }
748
+
749
+ function extractSpeechSignal(
750
+ params: TextToSpeechParams | string,
751
+ ): AbortSignal | undefined {
752
+ return typeof params === "object" && params !== null
753
+ ? params.signal
754
+ : undefined;
755
+ }
756
+
757
+ function makeTextToSpeechHandler(): TextToSpeechHandler {
758
+ return async (_runtime, params) => {
759
+ const text = extractSpeechText(params);
760
+ if (text.length === 0) {
761
+ throw new Error(
762
+ "[local-inference] TEXT_TO_SPEECH text must be non-empty",
763
+ );
764
+ }
765
+ // Do not filter singing, emotion tags, or lyrical phrasing here. The
766
+ // local voice bundle advertises its expressive capability in the
767
+ // manifest; runtime safety policy lives above this model adapter.
768
+ await localInferenceEngine.ensureActiveBundleVoiceReady();
769
+ return localInferenceEngine.synthesizeSpeech(
770
+ text,
771
+ extractSpeechSignal(params),
772
+ );
773
+ };
774
+ }
775
+
776
+ function toUint8Array(value: Uint8Array | ArrayBuffer | Buffer): Uint8Array {
777
+ if (value instanceof Uint8Array) {
778
+ return new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
779
+ }
780
+ return new Uint8Array(value);
781
+ }
782
+
783
+ function extractTranscriptionAudio(
784
+ params: TranscriptionParams | Buffer | string | LocalTranscriptionParams,
785
+ ): TranscriptionAudio {
786
+ if (typeof params === "string") {
787
+ throw new Error(
788
+ "[local-inference] TRANSCRIPTION via the local voice runtime requires PCM/WAV bytes; URL/path strings are not fetched by this provider",
789
+ );
790
+ }
791
+ if (params instanceof Uint8Array || params instanceof ArrayBuffer) {
792
+ return decodeMonoPcm16Wav(toUint8Array(params));
793
+ }
794
+ if (!params || typeof params !== "object") {
795
+ throw new Error(
796
+ "[local-inference] TRANSCRIPTION requires PCM/WAV bytes or { pcm, sampleRateHz }",
797
+ );
798
+ }
799
+ if ("audioUrl" in params && typeof params.audioUrl === "string") {
800
+ throw new Error(
801
+ "[local-inference] TRANSCRIPTION audioUrl is not fetched by the local voice runtime; pass mono PCM16 WAV bytes or { pcm, sampleRateHz }",
802
+ );
803
+ }
804
+ if ("pcm" in params && params.pcm instanceof Float32Array) {
805
+ const sampleRate =
806
+ ("sampleRateHz" in params ? params.sampleRateHz : undefined) ??
807
+ ("sampleRate" in params ? params.sampleRate : undefined);
808
+ if (typeof sampleRate !== "number" || sampleRate <= 0) {
809
+ throw new Error(
810
+ "[local-inference] TRANSCRIPTION { pcm } requires a positive sampleRateHz",
811
+ );
812
+ }
813
+ return { pcm: params.pcm, sampleRate };
814
+ }
815
+ if (
816
+ "audio" in params &&
817
+ (params.audio instanceof Uint8Array || params.audio instanceof ArrayBuffer)
818
+ ) {
819
+ return decodeMonoPcm16Wav(toUint8Array(params.audio));
820
+ }
821
+ throw new Error(
822
+ "[local-inference] TRANSCRIPTION requires mono PCM16 WAV bytes or { pcm, sampleRateHz } for the local voice runtime",
823
+ );
824
+ }
825
+
826
+ function extractTranscriptionSignal(
827
+ params: TranscriptionParams | Buffer | string | LocalTranscriptionParams,
828
+ ): AbortSignal | undefined {
829
+ return typeof params === "object" && params !== null
830
+ ? (params as { signal?: AbortSignal }).signal
831
+ : undefined;
832
+ }
833
+
834
+ function throwIfAborted(signal: AbortSignal | undefined): void {
835
+ if (!signal?.aborted) return;
836
+ throw signal.reason instanceof Error
837
+ ? signal.reason
838
+ : new DOMException("Aborted", "AbortError");
839
+ }
840
+
841
+ function makeTranscriptionHandler(): TranscriptionHandler {
842
+ return async (_runtime, params) => {
843
+ const signal = extractTranscriptionSignal(params);
844
+ throwIfAborted(signal);
845
+ const audio = extractTranscriptionAudio(params);
846
+ // The fused libelizainference ASR runtime is the sole on-device
847
+ // transcriber. A startup/availability failure propagates (AGENTS.md §3) —
848
+ // there is no whisper.cpp second attempt and no silent empty transcript.
849
+ await localInferenceEngine.ensureActiveBundleVoiceReady();
850
+ throwIfAborted(signal);
851
+ const transcript = await localInferenceEngine.transcribePcm(audio, signal);
852
+ throwIfAborted(signal);
853
+ return transcript;
854
+ };
855
+ }
856
+
857
+ function paramsToVisionRequest(params: ImageDescriptionParams | string): {
858
+ image: { kind: "dataUrl"; dataUrl: string } | { kind: "url"; url: string };
859
+ prompt?: string;
860
+ } {
861
+ const url = typeof params === "string" ? params : params.imageUrl;
862
+ if (typeof url !== "string" || url.length === 0) {
863
+ throw new Error(
864
+ "[local-inference] IMAGE_DESCRIPTION requires a non-empty imageUrl",
865
+ );
866
+ }
867
+ const prompt = typeof params === "object" ? params.prompt : undefined;
868
+ if (url.startsWith("data:")) {
869
+ return { image: { kind: "dataUrl", dataUrl: url }, prompt };
870
+ }
871
+ return { image: { kind: "url", url }, prompt };
872
+ }
873
+
874
+ function normalizeImageDescription(
875
+ result: ImageDescriptionResult | string,
876
+ ): ImageDescriptionResult {
877
+ if (typeof result === "string") {
878
+ const description = result.trim();
879
+ if (!description) {
880
+ throw new Error(
881
+ "[local-inference] IMAGE_DESCRIPTION backend returned an empty description",
882
+ );
883
+ }
884
+ return {
885
+ title: description.split(/[.!?]/, 1)[0]?.trim() || "Image",
886
+ description,
887
+ };
888
+ }
889
+ if (
890
+ result &&
891
+ typeof result === "object" &&
892
+ typeof result.title === "string" &&
893
+ typeof result.description === "string" &&
894
+ result.title.trim().length > 0 &&
895
+ result.description.trim().length > 0
896
+ ) {
897
+ return {
898
+ title: result.title.trim(),
899
+ description: result.description.trim(),
900
+ };
901
+ }
902
+ throw new Error(
903
+ "[local-inference] IMAGE_DESCRIPTION backend returned an invalid description",
904
+ );
905
+ }
906
+
907
+ /**
908
+ * Runtime setting marker that plugin-vision polls before preferring the
909
+ * Eliza-1 vision path over its legacy Florence path. We set it only when
910
+ * the process-wide arbiter advertises the `vision-describe` capability.
911
+ */
912
+ const ELIZA1_VISION_MARKER = "ELIZA1_VISION_HANDLER_PRESENT";
913
+
914
+ function markEliza1VisionHandlerPresent(runtime: IAgentRuntime): void {
915
+ const r = runtime as IAgentRuntime & {
916
+ setSetting?: (key: string, value: unknown) => void;
917
+ getSetting?: (key: string) => unknown;
918
+ };
919
+ if (typeof r.setSetting !== "function") return;
920
+ if (typeof r.getSetting === "function") {
921
+ const existing = r.getSetting(ELIZA1_VISION_MARKER);
922
+ if (existing === "1" || existing === true) return;
923
+ }
924
+ try {
925
+ r.setSetting(ELIZA1_VISION_MARKER, "1");
926
+ } catch {
927
+ // Some test runtimes don't accept setSetting at runtime — non-fatal.
928
+ }
929
+ }
930
+
931
+ function makeImageDescriptionHandler(): ImageDescriptionHandler {
932
+ return async (runtime, params) => {
933
+ const arbiter = tryGetMemoryArbiter();
934
+ if (
935
+ !arbiter?.hasCapability("vision-describe") ||
936
+ typeof arbiter.requestVisionDescribe !== "function"
937
+ ) {
938
+ throw new Error(
939
+ "[local-inference] IMAGE_DESCRIPTION requires an active Eliza-1 vision-capable bundle with the vision-describe capability registered",
940
+ );
941
+ }
942
+ markEliza1VisionHandlerPresent(runtime);
943
+ const modelKeyCandidate =
944
+ typeof params === "object"
945
+ ? (params as ImageDescriptionParams & { modelKey?: unknown }).modelKey
946
+ : undefined;
947
+ const modelKey =
948
+ typeof modelKeyCandidate === "string" && modelKeyCandidate
949
+ ? modelKeyCandidate
950
+ : "qwen3-vl";
951
+ const request = paramsToVisionRequest(params);
952
+ const result = await arbiter.requestVisionDescribe<
953
+ typeof request,
954
+ ImageDescriptionResult | string
955
+ >({ modelKey, payload: request });
956
+ return normalizeImageDescription(result);
957
+ };
958
+ }
959
+
960
+ /**
961
+ * Register the device-bridge loader on the runtime. Accepts load/generate
962
+ * calls whether or not a mobile device is currently connected — parked
963
+ * calls resolve on reconnect (up to a timeout). Cheaper than waiting for
964
+ * the first device register to register the service: ordering is already
965
+ * handled inside `DeviceBridge.generate`.
966
+ */
967
+ function registerDeviceBridgeLoader(runtime: AgentRuntime): void {
968
+ const withRegistration = runtime as AgentRuntime & {
969
+ registerService?: (name: string, impl: unknown) => unknown;
970
+ };
971
+ if (typeof withRegistration.registerService !== "function") return;
972
+ const loader: LocalInferenceLoader = {
973
+ loadModel: (args) => deviceBridge.loadModel(args),
974
+ unloadModel: () => deviceBridge.unloadModel(),
975
+ currentModelPath: () => deviceBridge.currentModelPath(),
976
+ generate: (args) => deviceBridge.generate(args),
977
+ embed: (args) => deviceBridge.embed(args),
978
+ };
979
+ // Expose the process-wide MemoryArbiter through the registered
980
+ // `localInferenceLoader` service so provider.ts can route
981
+ // IMAGE_DESCRIPTION (WS2) and IMAGE (WS3) requests to the arbiter.
982
+ // Without this accessor the IMAGE handler unconditionally surfaces
983
+ // `capability_unavailable` because the registered service has no
984
+ // arbiter accessor — the singleton `localInferenceService` is not
985
+ // the same object that gets registered with the runtime.
986
+ const loaderWithArbiter = Object.assign(loader, {
987
+ getMemoryArbiter: () => tryGetMemoryArbiter(),
988
+ });
989
+ withRegistration.registerService("localInferenceLoader", loaderWithArbiter);
990
+ }
991
+
992
+ /**
993
+ * AOSP / generic-FFI path: load the fused `libelizainference.so` into the bun
994
+ * process via `bun:ffi` (the AOSP plugin's loader; libllama is retired). The
995
+ * loader stays inactive at runtime when neither `ELIZA_LOCAL_LLAMA === "1"`
996
+ * (kept as the legacy opt-in env name) nor `process.arch === "riscv64"` is
997
+ * true (see `isAospEnabled` in `@elizaos/plugin-aosp-local-inference`), so the
998
+ * dynamic import below is safe on every platform; we only attempt registration
999
+ * when one of the triggers fires.
1000
+ *
1001
+ * riscv64 rationale: `capacitor-llama` ships prebuilts only for
1002
+ * linux-{x64,arm64}, darwin-arm64, win-x64. Riscv64 hosts have no native NAPI
1003
+ * binding option; the cross-built fused `libelizainference.so` is the only
1004
+ * in-process llama.cpp path. The FFI loader satisfies the same
1005
+ * `localInferenceLoader` service contract, so the rest of the engine —
1006
+ * model handlers, embedding routing, response handler — works unchanged.
1007
+ *
1008
+ * The `try`/`catch` is justified because the AOSP build can ship the .so on
1009
+ * one ABI but be invoked on another (e.g. cuttlefish_x86_64 reporting both
1010
+ * x86_64 and arm64-v8a). When `ELIZA_LOCAL_LLAMA=1` is set but registration
1011
+ * fails, the loader logs at `error` level — we must NOT silently fall
1012
+ * through to the device-bridge or stock engine: the operator opted in and
1013
+ * deserves the failure surfaced clearly. The riscv64 auto-trigger uses the
1014
+ * same path; if the bundled `libelizainference.so` is missing the failure is
1015
+ * logged but inference falls through to Cloud routing (per CLAUDE.md deployment
1016
+ * topologies — local-only is supported but Cloud is an acceptable fallback
1017
+ * when the on-device backend is unavailable).
1018
+ */
1019
+ export function shouldAttemptAospLlamaLoader(
1020
+ env: NodeJS.ProcessEnv = process.env,
1021
+ arch: NodeJS.Architecture = process.arch,
1022
+ ): boolean {
1023
+ if (env.ELIZA_DISABLE_FFI_LLAMA?.trim() === "1") return false;
1024
+ if (env.ELIZA_LOCAL_LLAMA?.trim() === "1") return true;
1025
+ if (arch === "riscv64") return true;
1026
+ return false;
1027
+ }
1028
+
1029
+ async function tryRegisterAospLlamaLoader(
1030
+ runtime: AgentRuntime,
1031
+ ): Promise<boolean> {
1032
+ if (!shouldAttemptAospLlamaLoader()) return false;
1033
+ try {
1034
+ const dynamicImport = new Function("id", "return import(id)") as (
1035
+ id: string,
1036
+ ) => Promise<{
1037
+ registerAospLlamaLoader?: (r: AgentRuntime) => Promise<boolean> | boolean;
1038
+ }>;
1039
+ const mod = await dynamicImport("@elizaos/plugin-aosp-local-inference");
1040
+ if (typeof mod.registerAospLlamaLoader !== "function") {
1041
+ logger.error(
1042
+ "[local-inference] AOSP llama adapter import resolved but missing registerAospLlamaLoader export",
1043
+ );
1044
+ return false;
1045
+ }
1046
+ const result = await mod.registerAospLlamaLoader(runtime);
1047
+ return Boolean(result);
1048
+ } catch (err) {
1049
+ logger.error(
1050
+ "[local-inference] AOSP llama adapter unavailable while ELIZA_LOCAL_LLAMA=1:",
1051
+ err instanceof Error ? err.message : String(err),
1052
+ );
1053
+ return false;
1054
+ }
1055
+ }
1056
+
1057
+ async function tryRegisterCapacitorLoader(
1058
+ runtime: AgentRuntime,
1059
+ ): Promise<boolean> {
1060
+ // Only meaningful under Capacitor (iOS/Android). Dynamic import so web /
1061
+ // desktop bundlers don't choke on the native plugin metadata.
1062
+ const cap = (globalThis as Record<string, unknown>).Capacitor as
1063
+ | { isNativePlatform?: () => boolean }
1064
+ | undefined;
1065
+ if (!cap?.isNativePlatform?.()) return false;
1066
+ try {
1067
+ const { registerCapacitorLlamaLoader } = await import(
1068
+ "@elizaos/capacitor-llama"
1069
+ );
1070
+ const capacitorRuntime: Parameters<typeof registerCapacitorLlamaLoader>[0] =
1071
+ Object.create(runtime);
1072
+ registerCapacitorLlamaLoader(capacitorRuntime);
1073
+ logger.info(
1074
+ "[local-inference] Registered capacitor-llama loader for mobile on-device inference",
1075
+ );
1076
+ return true;
1077
+ } catch (err) {
1078
+ logger.debug(
1079
+ "[local-inference] capacitor-llama not available:",
1080
+ err instanceof Error ? err.message : String(err),
1081
+ );
1082
+ }
1083
+ return false;
1084
+ }
1085
+
1086
+ /**
1087
+ * Synthetic conversation id used to keep the Stage-1 stable prefix
1088
+ * (system prompt + tool/action schema block + stable provider blocks)
1089
+ * resident on a deterministic slot before any real conversation lands.
1090
+ * `deriveSlotId("conv:__system_prefix__", parallel)` is stable, so this
1091
+ * always warms the same slot; per-room conversations get their own slot
1092
+ * via `conv:<roomId>` and inherit the radix-shared prefix tokens.
1093
+ */
1094
+ const SYSTEM_PREFIX_CONVERSATION_ID = "__system_prefix__";
1095
+
1096
+ /**
1097
+ * Render the Stage-1 stable prefix for `roomId` and KV-prefill the
1098
+ * local-inference slot that conversation pins to. Wire this from the
1099
+ * voice turn controller (W9) on `speech-start` / voice-session-open so
1100
+ * the response-handler prompt is hot before STT finishes — items I1/C1.
1101
+ *
1102
+ * Best-effort end to end: returns false (no throw) when there's no
1103
+ * loaded local model, the active backend can't pre-warm (node-llama-cpp
1104
+ * pins by cache key already), or rendering/pre-warm fails. A miss just
1105
+ * means the real request cold-prefills.
1106
+ */
1107
+ export async function prewarmResponseHandler(
1108
+ runtime: IAgentRuntime,
1109
+ roomId: UUID,
1110
+ ): Promise<boolean> {
1111
+ if (!localInferenceEngine.hasLoadedModel()) return false;
1112
+ if (localInferenceEngine.activeBackendId() !== "llama-cpp") return false;
1113
+ try {
1114
+ const prefix = await renderMessageHandlerStablePrefix(runtime, roomId);
1115
+ if (!prefix) return false;
1116
+ return await localInferenceEngine.prewarmConversation(
1117
+ String(roomId),
1118
+ prefix,
1119
+ );
1120
+ } catch (err) {
1121
+ logger.debug(
1122
+ "[local-inference] prewarmResponseHandler failed (best-effort):",
1123
+ err instanceof Error ? err.message : String(err),
1124
+ );
1125
+ return false;
1126
+ }
1127
+ }
1128
+
1129
+ /**
1130
+ * Warm the Stage-1 stable prefix onto the deterministic
1131
+ * `conv:__system_prefix__` slot at model-load / boot time, before any
1132
+ * user message — item I3 (warm-on-load). The room id is irrelevant for
1133
+ * the stable prefix (it carries no per-room state), so a fixed synthetic
1134
+ * id is fine. No-op when no local model is loaded or the backend can't
1135
+ * pre-warm. Best-effort: failures are logged at debug and swallowed.
1136
+ */
1137
+ export async function prewarmSystemPrefix(
1138
+ runtime: IAgentRuntime,
1139
+ ): Promise<boolean> {
1140
+ if (!localInferenceEngine.hasLoadedModel()) return false;
1141
+ if (localInferenceEngine.activeBackendId() !== "llama-cpp") return false;
1142
+ try {
1143
+ const fixedRoomId = runtime.agentId as UUID;
1144
+ const prefix = await renderMessageHandlerStablePrefix(runtime, fixedRoomId);
1145
+ if (!prefix) return false;
1146
+ return await localInferenceEngine.prewarmConversation(
1147
+ SYSTEM_PREFIX_CONVERSATION_ID,
1148
+ prefix,
1149
+ );
1150
+ } catch (err) {
1151
+ logger.debug(
1152
+ "[local-inference] prewarmSystemPrefix failed (best-effort):",
1153
+ err instanceof Error ? err.message : String(err),
1154
+ );
1155
+ return false;
1156
+ }
1157
+ }
1158
+
1159
+ export async function ensureLocalInferenceHandler(
1160
+ runtime: AgentRuntime,
1161
+ ): Promise<void> {
1162
+ const runtimeMode = getRuntimeMode(runtime);
1163
+ if (!shouldRegisterLocalInferenceHandlers(runtimeMode)) {
1164
+ logger.info(
1165
+ `[local-inference] Runtime mode is ${runtimeMode}; skipping local model handler registration`,
1166
+ );
1167
+ return;
1168
+ }
1169
+
1170
+ const runtimeWithRegistration = runtime as RuntimeWithLocalInferenceFlag;
1171
+ if (
1172
+ typeof runtimeWithRegistration.getModel !== "function" ||
1173
+ typeof runtimeWithRegistration.registerModel !== "function"
1174
+ ) {
1175
+ return;
1176
+ }
1177
+ if (runtimeWithRegistration[LOCAL_INFERENCE_HANDLER_INSTALLED]) {
1178
+ logger.debug(
1179
+ "[local-inference] Local model handlers already registered on this runtime; skipping duplicate registration",
1180
+ );
1181
+ return;
1182
+ }
1183
+
1184
+ // Install the side-registry interception as early as possible so it
1185
+ // captures every subsequent `registerModel` call — including our own
1186
+ // handlers below, plus anything else that registers during the rest of
1187
+ // boot. Idempotent per-runtime.
1188
+ handlerRegistry.installOn(runtime);
1189
+
1190
+ // Loader precedence:
1191
+ // 1. AOSP native FFI loader when running inside the AOSP agent process
1192
+ // itself (ELIZA_LOCAL_LLAMA=1). This is the canonical AOSP path —
1193
+ // libllama.so is dlopen'd directly, no IPC.
1194
+ // 2. Capacitor native adapter when running on a mobile device with the
1195
+ // Capacitor APK shell.
1196
+ // 3. Device-bridge (WebSocket to a paired phone) when explicitly
1197
+ // opted in via ELIZA_DEVICE_BRIDGE_ENABLED=1.
1198
+ // 4. Standalone node-llama-cpp engine for desktop / server.
1199
+ //
1200
+ // All four satisfy the same `localInferenceLoader` service contract.
1201
+ // A later registration overrides an earlier one, so we register in
1202
+ // LOWEST-priority order first; the AOSP loader runs last so it wins on
1203
+ // AOSP builds. Each `try*Loader` is idempotent and gated on its own env
1204
+ // signal, so they're safe to chain.
1205
+ const aospRegistered = await tryRegisterAospLlamaLoader(runtime);
1206
+ const capacitorRegistered =
1207
+ !aospRegistered && (await tryRegisterCapacitorLoader(runtime));
1208
+ const deviceBridgeEnabled =
1209
+ process.env.ELIZA_DEVICE_BRIDGE_ENABLED?.trim() === "1";
1210
+ if (!aospRegistered && !capacitorRegistered && deviceBridgeEnabled) {
1211
+ registerDeviceBridgeLoader(runtime);
1212
+ logger.info(
1213
+ "[local-inference] Registered device-bridge loader; inference routes to paired mobile device when connected",
1214
+ );
1215
+ }
1216
+
1217
+ // Pre-flight: if no backend is available, skip handler registration
1218
+ // entirely so we don't advertise a handler that will throw. The device
1219
+ // bridge is always "available" in the sense that it parks calls until a
1220
+ // device connects, so if it is enabled we always register handlers.
1221
+ if (
1222
+ !aospRegistered &&
1223
+ !capacitorRegistered &&
1224
+ !deviceBridgeEnabled &&
1225
+ !(await localInferenceEngine.available())
1226
+ ) {
1227
+ logger.debug(
1228
+ "[local-inference] No local inference backend available; skipping model registration",
1229
+ );
1230
+ return;
1231
+ }
1232
+
1233
+ // First-light convenience: when exactly one model is installed and no
1234
+ // slot assignments exist, auto-fill TEXT_SMALL/TEXT_LARGE so the user
1235
+ // lands in chat without opening Settings. The downloader handles the
1236
+ // post-install case; this catches the user who pre-staged a model
1237
+ // (external scan, prior install) and is now booting fresh.
1238
+ try {
1239
+ const installed = await listInstalledModels();
1240
+ const filled = await autoAssignAtBoot(installed);
1241
+ if (filled) {
1242
+ logger.info(
1243
+ `[local-inference] Auto-assigned single installed model to empty slots: ${JSON.stringify(filled)}`,
1244
+ );
1245
+ }
1246
+ } catch (err) {
1247
+ logger.warn(
1248
+ "[local-inference] autoAssignAtBoot failed:",
1249
+ err instanceof Error ? err.message : String(err),
1250
+ );
1251
+ }
1252
+
1253
+ const provider = aospRegistered
1254
+ ? AOSP_LLAMA_PROVIDER
1255
+ : capacitorRegistered
1256
+ ? CAPACITOR_LLAMA_PROVIDER
1257
+ : deviceBridgeEnabled
1258
+ ? DEVICE_BRIDGE_PROVIDER
1259
+ : LOCAL_INFERENCE_PROVIDER;
1260
+
1261
+ const textGenerationSlots: Array<
1262
+ [(typeof ModelType)[keyof typeof ModelType], AgentModelSlot]
1263
+ > = [
1264
+ [ModelType.TEXT_SMALL, "TEXT_SMALL"],
1265
+ [ModelType.TEXT_LARGE, "TEXT_LARGE"],
1266
+ // V5 chat calls semantic text model types directly. Register them as
1267
+ // first-class local handlers so structured streaming sees the concrete
1268
+ // local provider instead of falling through TEXT_SMALL via the router.
1269
+ [ModelType.RESPONSE_HANDLER, "TEXT_SMALL"],
1270
+ [ModelType.ACTION_PLANNER, "TEXT_SMALL"],
1271
+ [ModelType.TEXT_COMPLETION, "TEXT_SMALL"],
1272
+ ];
1273
+ for (const [modelType, slot] of textGenerationSlots) {
1274
+ try {
1275
+ runtimeWithRegistration.registerModel(
1276
+ modelType,
1277
+ makeHandler(slot),
1278
+ provider,
1279
+ LOCAL_INFERENCE_PRIORITY,
1280
+ );
1281
+ } catch (err) {
1282
+ logger.warn(
1283
+ "[local-inference] Could not register ModelType",
1284
+ modelType,
1285
+ err instanceof Error ? err.message : String(err),
1286
+ );
1287
+ }
1288
+ }
1289
+
1290
+ // Register TEXT_EMBEDDING separately — the runtime contract returns
1291
+ // `number[]` instead of `string`, so it can't share `makeHandler`.
1292
+ // - AOSP / device-bridge loaders expose `embed()` on the
1293
+ // `localInferenceLoader` service → route through that.
1294
+ // - Desktop has no `localInferenceLoader`; it serves embeddings through
1295
+ // the fused `libelizainference` (`eliza_inference_embed`) over the
1296
+ // dedicated gte-small GGUF staged as an isolated embed bundle. libllama
1297
+ // is retired — there is no capacitor/libllama embedding fallback.
1298
+ // Neither path registers a handler that would serve a silent zero-vector:
1299
+ // both throw when there's nothing real to call, so the runtime falls
1300
+ // through to the operator-configured provider (Commandment 8).
1301
+ const loaderForEmbed = (
1302
+ runtime as { getService?: (name: string) => unknown }
1303
+ ).getService?.("localInferenceLoader") as
1304
+ | { embed?: unknown }
1305
+ | null
1306
+ | undefined;
1307
+ const embeddingHandler = isLocalEmbeddingDisabledByEnv()
1308
+ ? null
1309
+ : loaderForEmbed && typeof loaderForEmbed.embed === "function"
1310
+ ? makeEmbeddingHandler()
1311
+ : provider === LOCAL_INFERENCE_PROVIDER
1312
+ ? makeFusedEmbeddingHandler()
1313
+ : null;
1314
+ if (embeddingHandler) {
1315
+ try {
1316
+ runtimeWithRegistration.registerModel(
1317
+ ModelType.TEXT_EMBEDDING,
1318
+ embeddingHandler,
1319
+ provider,
1320
+ LOCAL_INFERENCE_PRIORITY,
1321
+ );
1322
+ logger.info(
1323
+ `[local-inference] Registered ${provider} embedding handler for TEXT_EMBEDDING at priority ${LOCAL_INFERENCE_PRIORITY}`,
1324
+ );
1325
+ } catch (err) {
1326
+ logger.warn(
1327
+ "[local-inference] Could not register TEXT_EMBEDDING handler",
1328
+ err instanceof Error ? err.message : String(err),
1329
+ );
1330
+ }
1331
+ } else if (isLocalEmbeddingDisabledByEnv()) {
1332
+ logger.info(
1333
+ "[local-inference] Local TEXT_EMBEDDING handler disabled by ELIZA_DISABLE_LOCAL_EMBEDDINGS",
1334
+ );
1335
+ }
1336
+
1337
+ try {
1338
+ runtimeWithRegistration.registerModel(
1339
+ ModelType.TEXT_TO_SPEECH,
1340
+ makeTextToSpeechHandler(),
1341
+ provider,
1342
+ LOCAL_INFERENCE_PRIORITY,
1343
+ );
1344
+ // TRANSCRIPTION is registered default-on at the local-inference floor
1345
+ // priority (0). It is the last-resort handler: any cloud / other-plugin
1346
+ // TRANSCRIPTION handler registers above 0 and wins. When the handler
1347
+ // does run, it drives the fused libelizainference ASR runtime — the sole
1348
+ // on-device transcriber (Qwen3-ASR streaming → fused batch interim →
1349
+ // AsrUnavailableError) via the engine's armed voice bridge — see
1350
+ // makeTranscriptionHandler / EngineVoiceBridge.createStreamingTranscriber.
1351
+ // (The old ELIZA_LOCAL_TRANSCRIPTION env gate is removed — voice is a
1352
+ // first-class Eliza-1 surface, not opt-in.)
1353
+ runtimeWithRegistration.registerModel(
1354
+ ModelType.TRANSCRIPTION,
1355
+ makeTranscriptionHandler(),
1356
+ provider,
1357
+ LOCAL_INFERENCE_PRIORITY,
1358
+ );
1359
+ runtimeWithRegistration.registerModel(
1360
+ ModelType.IMAGE_DESCRIPTION,
1361
+ makeImageDescriptionHandler(),
1362
+ provider,
1363
+ LOCAL_INFERENCE_PRIORITY,
1364
+ );
1365
+ logger.info(
1366
+ `[local-inference] Registered ${provider} voice and vision handlers for TEXT_TO_SPEECH / TRANSCRIPTION / IMAGE_DESCRIPTION at priority ${LOCAL_INFERENCE_PRIORITY}`,
1367
+ );
1368
+ } catch (err) {
1369
+ logger.warn(
1370
+ "[local-inference] Could not register local voice/vision handlers",
1371
+ err instanceof Error ? err.message : String(err),
1372
+ );
1373
+ }
1374
+
1375
+ logger.info(
1376
+ `[local-inference] Registered ${provider} llama.cpp text handlers at priority ${LOCAL_INFERENCE_PRIORITY}`,
1377
+ );
1378
+
1379
+ // Install the top-priority router AFTER everything else has registered.
1380
+ // The router sits at Number.MAX_SAFE_INTEGER so the runtime dispatches
1381
+ // to it first; at dispatch time it picks a real provider via
1382
+ // `routing-policy` and calls that handler directly.
1383
+ installRouterHandler(runtime, {
1384
+ skipSlots: isLocalEmbeddingDisabledByEnv() ? ["TEXT_EMBEDDING"] : [],
1385
+ });
1386
+ logger.info(
1387
+ "[local-inference] Installed top-priority router for cross-provider routing",
1388
+ );
1389
+ runtimeWithRegistration[LOCAL_INFERENCE_HANDLER_INSTALLED] = true;
1390
+
1391
+ // Warm-on-load (item I3): if a local model is already resident, KV-prefill
1392
+ // the Stage-1 stable prefix onto the deterministic system-prefix slot so
1393
+ // the system prompt + tool schema is hot before the first user turn.
1394
+ // Fire-and-forget — pre-warm is best-effort and must never block boot.
1395
+ void prewarmSystemPrefix(runtime).catch(() => {
1396
+ // Logged inside prewarmSystemPrefix at debug; nothing more to do here.
1397
+ });
1398
+ }