@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (893) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +157 -0
  3. package/dist/actions/generate-media.d.ts +59 -0
  4. package/dist/actions/generate-media.d.ts.map +1 -0
  5. package/dist/actions/identify-speaker.d.ts +23 -0
  6. package/dist/actions/identify-speaker.d.ts.map +1 -0
  7. package/dist/actions/transcription-control.d.ts +29 -0
  8. package/dist/actions/transcription-control.d.ts.map +1 -0
  9. package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
  10. package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
  11. package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
  12. package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
  13. package/dist/adapters/capacitor-llama/index.d.ts +18 -0
  14. package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
  15. package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
  16. package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
  17. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
  18. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
  19. package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
  20. package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
  21. package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
  22. package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
  23. package/dist/adapters/capacitor-llama/types.d.ts +338 -0
  24. package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
  25. package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
  26. package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
  27. package/dist/backends/apple-foundation.d.ts +56 -0
  28. package/dist/backends/apple-foundation.d.ts.map +1 -0
  29. package/dist/index.d.ts +8 -37
  30. package/dist/index.d.ts.map +1 -0
  31. package/dist/index.js +38979 -430
  32. package/dist/index.js.map +217 -0
  33. package/dist/local-inference-routes.d.ts +47 -0
  34. package/dist/local-inference-routes.d.ts.map +1 -0
  35. package/dist/provider.d.ts +21 -0
  36. package/dist/provider.d.ts.map +1 -0
  37. package/dist/routes/compat-helpers.d.ts +18 -0
  38. package/dist/routes/compat-helpers.d.ts.map +1 -0
  39. package/dist/routes/family-member-route.d.ts +62 -0
  40. package/dist/routes/family-member-route.d.ts.map +1 -0
  41. package/dist/routes/index.d.ts +20 -0
  42. package/dist/routes/index.d.ts.map +1 -0
  43. package/dist/routes/index.js +42040 -0
  44. package/dist/routes/index.js.map +236 -0
  45. package/dist/routes/live-diarization-route.d.ts +33 -0
  46. package/dist/routes/live-diarization-route.d.ts.map +1 -0
  47. package/dist/routes/local-inference-asr-route.d.ts +4 -0
  48. package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
  49. package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
  50. package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  51. package/dist/routes/local-inference-compat-routes.d.ts +16 -0
  52. package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
  53. package/dist/routes/local-inference-tts-route.d.ts +7 -0
  54. package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/dist/routes/native-pcm-turn-route.d.ts +3 -0
  56. package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
  57. package/dist/routes/transcript-audio-store.d.ts +15 -0
  58. package/dist/routes/transcript-audio-store.d.ts.map +1 -0
  59. package/dist/routes/transcripts-routes.d.ts +44 -0
  60. package/dist/routes/transcripts-routes.d.ts.map +1 -0
  61. package/dist/routes/voice-first-run-routes.d.ts +62 -0
  62. package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
  63. package/dist/routes/voice-models-routes.d.ts +62 -0
  64. package/dist/routes/voice-models-routes.d.ts.map +1 -0
  65. package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
  66. package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  67. package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
  68. package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
  69. package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/dist/runtime/embedding-manager-support.d.ts +77 -0
  72. package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
  73. package/dist/runtime/embedding-presets.d.ts +16 -0
  74. package/dist/runtime/embedding-presets.d.ts.map +1 -0
  75. package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
  76. package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
  77. package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
  78. package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  79. package/dist/runtime/index.d.ts +15 -0
  80. package/dist/runtime/index.d.ts.map +1 -0
  81. package/dist/runtime/index.js +38768 -0
  82. package/dist/runtime/index.js.map +217 -0
  83. package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
  84. package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  85. package/dist/runtime/voice-entity-binding.d.ts +113 -0
  86. package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
  87. package/dist/services/active-model.d.ts +310 -0
  88. package/dist/services/active-model.d.ts.map +1 -0
  89. package/dist/services/asr-provenance.d.ts +5 -0
  90. package/dist/services/asr-provenance.d.ts.map +1 -0
  91. package/dist/services/assignments.d.ts +84 -0
  92. package/dist/services/assignments.d.ts.map +1 -0
  93. package/dist/services/backend-selector.d.ts +55 -0
  94. package/dist/services/backend-selector.d.ts.map +1 -0
  95. package/dist/services/backend.d.ts +440 -0
  96. package/dist/services/backend.d.ts.map +1 -0
  97. package/dist/services/bionic-host-loader.d.ts +67 -0
  98. package/dist/services/bionic-host-loader.d.ts.map +1 -0
  99. package/dist/services/bundled-models.d.ts +34 -0
  100. package/dist/services/bundled-models.d.ts.map +1 -0
  101. package/dist/services/cache-bridge.d.ts +206 -0
  102. package/dist/services/cache-bridge.d.ts.map +1 -0
  103. package/dist/services/catalog.d.ts +10 -0
  104. package/dist/services/catalog.d.ts.map +1 -0
  105. package/dist/services/checkpoint-client.d.ts +109 -0
  106. package/dist/services/checkpoint-client.d.ts.map +1 -0
  107. package/dist/services/checkpoint-manager.d.ts +217 -0
  108. package/dist/services/checkpoint-manager.d.ts.map +1 -0
  109. package/dist/services/cloud-fallback.d.ts +102 -0
  110. package/dist/services/cloud-fallback.d.ts.map +1 -0
  111. package/dist/services/context-fit.d.ts +36 -0
  112. package/dist/services/context-fit.d.ts.map +1 -0
  113. package/dist/services/conversation-registry.d.ts +142 -0
  114. package/dist/services/conversation-registry.d.ts.map +1 -0
  115. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
  116. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  117. package/dist/services/device-bridge.d.ts +188 -0
  118. package/dist/services/device-bridge.d.ts.map +1 -0
  119. package/dist/services/device-resource-metrics.d.ts +149 -0
  120. package/dist/services/device-resource-metrics.d.ts.map +1 -0
  121. package/dist/services/device-tier.d.ts +133 -0
  122. package/dist/services/device-tier.d.ts.map +1 -0
  123. package/dist/services/downloader.d.ts +94 -0
  124. package/dist/services/downloader.d.ts.map +1 -0
  125. package/dist/services/engine.d.ts +579 -0
  126. package/dist/services/engine.d.ts.map +1 -0
  127. package/dist/services/ensure-local-artifacts.d.ts +82 -0
  128. package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
  129. package/dist/services/external-scanner.d.ts +17 -0
  130. package/dist/services/external-scanner.d.ts.map +1 -0
  131. package/dist/services/ffi-llm-mock.d.ts +90 -0
  132. package/dist/services/ffi-llm-mock.d.ts.map +1 -0
  133. package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
  134. package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
  135. package/dist/services/ffi-streaming-backend.d.ts +201 -0
  136. package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
  137. package/dist/services/ffi-streaming-runner.d.ts +146 -0
  138. package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
  139. package/dist/services/gpu-autotune.d.ts +150 -0
  140. package/dist/services/gpu-autotune.d.ts.map +1 -0
  141. package/dist/services/gpu-detect.d.ts +56 -0
  142. package/dist/services/gpu-detect.d.ts.map +1 -0
  143. package/dist/services/handler-registry.d.ts +72 -0
  144. package/dist/services/handler-registry.d.ts.map +1 -0
  145. package/dist/services/hardware.d.ts +63 -0
  146. package/dist/services/hardware.d.ts.map +1 -0
  147. package/dist/services/image-description-runtime.d.ts +14 -0
  148. package/dist/services/image-description-runtime.d.ts.map +1 -0
  149. package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
  150. package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  151. package/dist/services/imagegen/backend-selector.d.ts +118 -0
  152. package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
  153. package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
  154. package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  155. package/dist/services/imagegen/errors.d.ts +16 -0
  156. package/dist/services/imagegen/errors.d.ts.map +1 -0
  157. package/dist/services/imagegen/index.d.ts +58 -0
  158. package/dist/services/imagegen/index.d.ts.map +1 -0
  159. package/dist/services/imagegen/mflux.d.ts +74 -0
  160. package/dist/services/imagegen/mflux.d.ts.map +1 -0
  161. package/dist/services/imagegen/sd-cpp.d.ts +181 -0
  162. package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
  163. package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  164. package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  165. package/dist/services/imagegen/types.d.ts +181 -0
  166. package/dist/services/imagegen/types.d.ts.map +1 -0
  167. package/dist/services/index.d.ts +31 -0
  168. package/dist/services/index.d.ts.map +1 -0
  169. package/dist/services/index.js +39453 -0
  170. package/dist/services/index.js.map +227 -0
  171. package/dist/services/inference-capabilities.d.ts +132 -0
  172. package/dist/services/inference-capabilities.d.ts.map +1 -0
  173. package/dist/services/inference-telemetry.d.ts +59 -0
  174. package/dist/services/inference-telemetry.d.ts.map +1 -0
  175. package/dist/services/ios-llama-streaming.d.ts +119 -0
  176. package/dist/services/ios-llama-streaming.d.ts.map +1 -0
  177. package/dist/services/kv-spill.d.ts +189 -0
  178. package/dist/services/kv-spill.d.ts.map +1 -0
  179. package/dist/services/latency-trace.d.ts +346 -0
  180. package/dist/services/latency-trace.d.ts.map +1 -0
  181. package/dist/services/lib-target.d.ts +55 -0
  182. package/dist/services/lib-target.d.ts.map +1 -0
  183. package/dist/services/live-signals.d.ts +86 -0
  184. package/dist/services/live-signals.d.ts.map +1 -0
  185. package/dist/services/llama-server-metrics.d.ts +114 -0
  186. package/dist/services/llama-server-metrics.d.ts.map +1 -0
  187. package/dist/services/llm-streaming-binding.d.ts +96 -0
  188. package/dist/services/llm-streaming-binding.d.ts.map +1 -0
  189. package/dist/services/load-args.d.ts +82 -0
  190. package/dist/services/load-args.d.ts.map +1 -0
  191. package/dist/services/manifest/index.d.ts +4 -0
  192. package/dist/services/manifest/index.d.ts.map +1 -0
  193. package/dist/services/manifest/schema.d.ts +903 -0
  194. package/dist/services/manifest/schema.d.ts.map +1 -0
  195. package/dist/services/manifest/types.d.ts +32 -0
  196. package/dist/services/manifest/types.d.ts.map +1 -0
  197. package/dist/services/manifest/validator.d.ts +66 -0
  198. package/dist/services/manifest/validator.d.ts.map +1 -0
  199. package/dist/services/memory-arbiter.d.ts +348 -0
  200. package/dist/services/memory-arbiter.d.ts.map +1 -0
  201. package/dist/services/memory-benchmark.d.ts +76 -0
  202. package/dist/services/memory-benchmark.d.ts.map +1 -0
  203. package/dist/services/memory-monitor.d.ts +128 -0
  204. package/dist/services/memory-monitor.d.ts.map +1 -0
  205. package/dist/services/memory-pressure.d.ts +130 -0
  206. package/dist/services/memory-pressure.d.ts.map +1 -0
  207. package/dist/services/mtp-doctor.d.ts +13 -0
  208. package/dist/services/mtp-doctor.d.ts.map +1 -0
  209. package/dist/services/network-policy.d.ts +127 -0
  210. package/dist/services/network-policy.d.ts.map +1 -0
  211. package/dist/services/paths.d.ts +6 -0
  212. package/dist/services/paths.d.ts.map +1 -0
  213. package/dist/services/planner-skeleton.d.ts +124 -0
  214. package/dist/services/planner-skeleton.d.ts.map +1 -0
  215. package/dist/services/providers.d.ts +38 -0
  216. package/dist/services/providers.d.ts.map +1 -0
  217. package/dist/services/ram-budget.d.ts +110 -0
  218. package/dist/services/ram-budget.d.ts.map +1 -0
  219. package/dist/services/readiness.d.ts +9 -0
  220. package/dist/services/readiness.d.ts.map +1 -0
  221. package/dist/services/recommendation.d.ts +111 -0
  222. package/dist/services/recommendation.d.ts.map +1 -0
  223. package/dist/services/registry.d.ts +33 -0
  224. package/dist/services/registry.d.ts.map +1 -0
  225. package/dist/services/router-handler.d.ts +92 -0
  226. package/dist/services/router-handler.d.ts.map +1 -0
  227. package/dist/services/routing-policy.d.ts +92 -0
  228. package/dist/services/routing-policy.d.ts.map +1 -0
  229. package/dist/services/routing-preferences.d.ts +8 -0
  230. package/dist/services/routing-preferences.d.ts.map +1 -0
  231. package/dist/services/runtime-target.d.ts +98 -0
  232. package/dist/services/runtime-target.d.ts.map +1 -0
  233. package/dist/services/service.d.ts +128 -0
  234. package/dist/services/service.d.ts.map +1 -0
  235. package/dist/services/session-pool.d.ts +72 -0
  236. package/dist/services/session-pool.d.ts.map +1 -0
  237. package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
  238. package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
  239. package/dist/services/structured-output/index.d.ts +2 -0
  240. package/dist/services/structured-output/index.d.ts.map +1 -0
  241. package/dist/services/structured-output.d.ts +311 -0
  242. package/dist/services/structured-output.d.ts.map +1 -0
  243. package/dist/services/system-memory.d.ts +33 -0
  244. package/dist/services/system-memory.d.ts.map +1 -0
  245. package/dist/services/types.d.ts +19 -0
  246. package/dist/services/types.d.ts.map +1 -0
  247. package/dist/services/verify-on-device.d.ts +34 -0
  248. package/dist/services/verify-on-device.d.ts.map +1 -0
  249. package/dist/services/verify.d.ts +8 -0
  250. package/dist/services/verify.d.ts.map +1 -0
  251. package/dist/services/vision/aosp-unavailable.d.ts +115 -0
  252. package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
  253. package/dist/services/vision/capacitor-llama.d.ts +99 -0
  254. package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
  255. package/dist/services/vision/cloud-fallback.d.ts +47 -0
  256. package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
  257. package/dist/services/vision/hash.d.ts +71 -0
  258. package/dist/services/vision/hash.d.ts.map +1 -0
  259. package/dist/services/vision/index.d.ts +95 -0
  260. package/dist/services/vision/index.d.ts.map +1 -0
  261. package/dist/services/vision/llama-server.d.ts +73 -0
  262. package/dist/services/vision/llama-server.d.ts.map +1 -0
  263. package/dist/services/vision/types.d.ts +162 -0
  264. package/dist/services/vision/types.d.ts.map +1 -0
  265. package/dist/services/vision/vast-fallback.d.ts +18 -0
  266. package/dist/services/vision/vast-fallback.d.ts.map +1 -0
  267. package/dist/services/vision-embedding-cache.d.ts +98 -0
  268. package/dist/services/vision-embedding-cache.d.ts.map +1 -0
  269. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
  270. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
  271. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
  272. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
  273. package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
  274. package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
  275. package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
  276. package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
  277. package/dist/services/voice/barge-in.d.ts +112 -0
  278. package/dist/services/voice/barge-in.d.ts.map +1 -0
  279. package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
  280. package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
  281. package/dist/services/voice/checkpoint-manager.d.ts +199 -0
  282. package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
  283. package/dist/services/voice/checkpoint-policy.d.ts +178 -0
  284. package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
  285. package/dist/services/voice/corpus-augment.d.ts +111 -0
  286. package/dist/services/voice/corpus-augment.d.ts.map +1 -0
  287. package/dist/services/voice/corpus-generator.d.ts +134 -0
  288. package/dist/services/voice/corpus-generator.d.ts.map +1 -0
  289. package/dist/services/voice/diarization-error-rate.d.ts +40 -0
  290. package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
  291. package/dist/services/voice/e2e-harness.d.ts +297 -0
  292. package/dist/services/voice/e2e-harness.d.ts.map +1 -0
  293. package/dist/services/voice/eager-context-builder.d.ts +170 -0
  294. package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
  295. package/dist/services/voice/echo-delay.d.ts +67 -0
  296. package/dist/services/voice/echo-delay.d.ts.map +1 -0
  297. package/dist/services/voice/echo-metrics.d.ts +7 -0
  298. package/dist/services/voice/echo-metrics.d.ts.map +1 -0
  299. package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
  300. package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
  301. package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
  302. package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  303. package/dist/services/voice/embedding-server.d.ts +37 -0
  304. package/dist/services/voice/embedding-server.d.ts.map +1 -0
  305. package/dist/services/voice/embedding.d.ts +132 -0
  306. package/dist/services/voice/embedding.d.ts.map +1 -0
  307. package/dist/services/voice/emotion-attribution.d.ts +68 -0
  308. package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
  309. package/dist/services/voice/engine-bridge.d.ts +762 -0
  310. package/dist/services/voice/engine-bridge.d.ts.map +1 -0
  311. package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
  312. package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  313. package/dist/services/voice/eot-classifier.d.ts +211 -0
  314. package/dist/services/voice/eot-classifier.d.ts.map +1 -0
  315. package/dist/services/voice/errors.d.ts +20 -0
  316. package/dist/services/voice/errors.d.ts.map +1 -0
  317. package/dist/services/voice/expressive-tags.d.ts +158 -0
  318. package/dist/services/voice/expressive-tags.d.ts.map +1 -0
  319. package/dist/services/voice/ffi-bindings.d.ts +696 -0
  320. package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
  321. package/dist/services/voice/first-line-cache.d.ts +181 -0
  322. package/dist/services/voice/first-line-cache.d.ts.map +1 -0
  323. package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
  324. package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
  325. package/dist/services/voice/index.d.ts +96 -0
  326. package/dist/services/voice/index.d.ts.map +1 -0
  327. package/dist/services/voice/kokoro/index.d.ts +24 -0
  328. package/dist/services/voice/kokoro/index.d.ts.map +1 -0
  329. package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
  330. package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  331. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  332. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  333. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  334. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  335. package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  336. package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  337. package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
  338. package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
  339. package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
  340. package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  341. package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
  342. package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  343. package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
  344. package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  345. package/dist/services/voice/kokoro/types.d.ts +82 -0
  346. package/dist/services/voice/kokoro/types.d.ts.map +1 -0
  347. package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
  348. package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  349. package/dist/services/voice/kokoro/voices.d.ts +30 -0
  350. package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
  351. package/dist/services/voice/lifecycle.d.ts +135 -0
  352. package/dist/services/voice/lifecycle.d.ts.map +1 -0
  353. package/dist/services/voice/live-diarization-session.d.ts +196 -0
  354. package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
  355. package/dist/services/voice/metric-math.d.ts +10 -0
  356. package/dist/services/voice/metric-math.d.ts.map +1 -0
  357. package/dist/services/voice/mic-source.d.ts +136 -0
  358. package/dist/services/voice/mic-source.d.ts.map +1 -0
  359. package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
  360. package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
  361. package/dist/services/voice/optimistic-policy.d.ts +109 -0
  362. package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
  363. package/dist/services/voice/optimistic-rollback.d.ts +151 -0
  364. package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
  365. package/dist/services/voice/partial-stabilizer.d.ts +73 -0
  366. package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
  367. package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
  368. package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  369. package/dist/services/voice/phrase-cache.d.ts +76 -0
  370. package/dist/services/voice/phrase-cache.d.ts.map +1 -0
  371. package/dist/services/voice/phrase-chunker.d.ts +62 -0
  372. package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
  373. package/dist/services/voice/pipeline-impls.d.ts +151 -0
  374. package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
  375. package/dist/services/voice/pipeline.d.ts +216 -0
  376. package/dist/services/voice/pipeline.d.ts.map +1 -0
  377. package/dist/services/voice/prefill-client.d.ts +123 -0
  378. package/dist/services/voice/prefill-client.d.ts.map +1 -0
  379. package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
  380. package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  381. package/dist/services/voice/profile-store.d.ts +248 -0
  382. package/dist/services/voice/profile-store.d.ts.map +1 -0
  383. package/dist/services/voice/ring-buffer.d.ts +40 -0
  384. package/dist/services/voice/ring-buffer.d.ts.map +1 -0
  385. package/dist/services/voice/rollback-queue.d.ts +24 -0
  386. package/dist/services/voice/rollback-queue.d.ts.map +1 -0
  387. package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
  388. package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  389. package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
  390. package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  391. package/dist/services/voice/scheduler.d.ts +146 -0
  392. package/dist/services/voice/scheduler.d.ts.map +1 -0
  393. package/dist/services/voice/self-voice-imprint.d.ts +33 -0
  394. package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
  395. package/dist/services/voice/shared-resources.d.ts +204 -0
  396. package/dist/services/voice/shared-resources.d.ts.map +1 -0
  397. package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  398. package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  399. package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
  400. package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  401. package/dist/services/voice/speaker/diarizer.d.ts +75 -0
  402. package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
  403. package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
  404. package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  405. package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
  406. package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  407. package/dist/services/voice/speaker/encoder.d.ts +37 -0
  408. package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
  409. package/dist/services/voice/speaker-imprint.d.ts +83 -0
  410. package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
  411. package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
  412. package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
  413. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
  414. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
  415. package/dist/services/voice/system-audio-sink.d.ts +73 -0
  416. package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
  417. package/dist/services/voice/transcriber.d.ts +244 -0
  418. package/dist/services/voice/transcriber.d.ts.map +1 -0
  419. package/dist/services/voice/transcript-knowledge.d.ts +37 -0
  420. package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
  421. package/dist/services/voice/transcript-service.d.ts +60 -0
  422. package/dist/services/voice/transcript-service.d.ts.map +1 -0
  423. package/dist/services/voice/transcript-store.d.ts +64 -0
  424. package/dist/services/voice/transcript-store.d.ts.map +1 -0
  425. package/dist/services/voice/turn-controller.d.ts +183 -0
  426. package/dist/services/voice/turn-controller.d.ts.map +1 -0
  427. package/dist/services/voice/types.d.ts +643 -0
  428. package/dist/services/voice/types.d.ts.map +1 -0
  429. package/dist/services/voice/vad.d.ts +283 -0
  430. package/dist/services/voice/vad.d.ts.map +1 -0
  431. package/dist/services/voice/voice-budget.d.ts +241 -0
  432. package/dist/services/voice/voice-budget.d.ts.map +1 -0
  433. package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
  434. package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  435. package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
  436. package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
  437. package/dist/services/voice/voice-preset-format.d.ts +158 -0
  438. package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
  439. package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
  440. package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
  441. package/dist/services/voice/voice-profile-routes.d.ts +83 -0
  442. package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
  443. package/dist/services/voice/voice-scenario.d.ts +131 -0
  444. package/dist/services/voice/voice-scenario.d.ts.map +1 -0
  445. package/dist/services/voice/voice-state-machine.d.ts +364 -0
  446. package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
  447. package/dist/services/voice/voice-workbench-report.d.ts +117 -0
  448. package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
  449. package/dist/services/voice/wake-word-ggml.d.ts +100 -0
  450. package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
  451. package/dist/services/voice/wake-word.d.ts +255 -0
  452. package/dist/services/voice/wake-word.d.ts.map +1 -0
  453. package/dist/services/voice/wav-codec.d.ts +11 -0
  454. package/dist/services/voice/wav-codec.d.ts.map +1 -0
  455. package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
  456. package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
  457. package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
  458. package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
  459. package/dist/services/voice/workbench-logic-services.d.ts +36 -0
  460. package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
  461. package/dist/services/voice/workbench-real-services.d.ts +17 -0
  462. package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
  463. package/dist/services/voice/workbench-scenarios.d.ts +24 -0
  464. package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
  465. package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  466. package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  467. package/dist/services/voice-model-updater.d.ts +240 -0
  468. package/dist/services/voice-model-updater.d.ts.map +1 -0
  469. package/dist/services/voice-prewarm.d.ts +3 -0
  470. package/dist/services/voice-prewarm.d.ts.map +1 -0
  471. package/dist/voice-workbench.d.ts +18 -0
  472. package/dist/voice-workbench.d.ts.map +1 -0
  473. package/dist/voice-workbench.js +5259 -0
  474. package/dist/voice-workbench.js.map +34 -0
  475. package/package.json +101 -15
  476. package/registry-entry.json +137 -0
  477. package/src/actions/generate-media.ts +647 -0
  478. package/src/actions/identify-speaker.ts +171 -0
  479. package/src/actions/transcription-control.test.ts +100 -0
  480. package/src/actions/transcription-control.ts +127 -0
  481. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  482. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  483. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  484. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  485. package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
  486. package/src/adapters/capacitor-llama/environment.ts +71 -0
  487. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  488. package/src/adapters/capacitor-llama/index.ts +831 -0
  489. package/src/adapters/capacitor-llama/loader.ts +109 -0
  490. package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
  491. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  492. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  493. package/src/adapters/capacitor-llama/types.ts +374 -0
  494. package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
  495. package/src/backends/apple-foundation.ts +127 -0
  496. package/src/index.ts +62 -0
  497. package/src/local-inference-routes.test.ts +390 -0
  498. package/src/local-inference-routes.ts +1625 -0
  499. package/src/provider.ts +1111 -0
  500. package/src/routes/compat-helpers.ts +275 -0
  501. package/src/routes/family-member-route.ts +353 -0
  502. package/src/routes/index.ts +61 -0
  503. package/src/routes/live-diarization-route.test.ts +347 -0
  504. package/src/routes/live-diarization-route.ts +198 -0
  505. package/src/routes/local-inference-asr-route.test.ts +246 -0
  506. package/src/routes/local-inference-asr-route.ts +166 -0
  507. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  508. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  509. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  510. package/src/routes/local-inference-compat-routes.ts +775 -0
  511. package/src/routes/local-inference-tts-route.test.ts +179 -0
  512. package/src/routes/local-inference-tts-route.ts +230 -0
  513. package/src/routes/native-pcm-turn-route.test.ts +136 -0
  514. package/src/routes/native-pcm-turn-route.ts +121 -0
  515. package/src/routes/transcript-audio-store.ts +27 -0
  516. package/src/routes/transcripts-routes.test.ts +195 -0
  517. package/src/routes/transcripts-routes.ts +191 -0
  518. package/src/routes/voice-first-run-routes.ts +524 -0
  519. package/src/routes/voice-models-routes.ts +554 -0
  520. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  521. package/src/routes/voice-profiles-management-routes.ts +476 -0
  522. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  523. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  524. package/src/runtime/bionic-wire-encoding.test.ts +147 -0
  525. package/src/runtime/capacitor-llama.d.ts +25 -0
  526. package/src/runtime/embedding-manager-support.ts +497 -0
  527. package/src/runtime/embedding-presets.ts +81 -0
  528. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  529. package/src/runtime/embedding-warmup-policy.ts +48 -0
  530. package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
  531. package/src/runtime/ensure-local-inference-handler.ts +1640 -0
  532. package/src/runtime/index.ts +36 -0
  533. package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
  534. package/src/runtime/mobile-local-inference-gate.ts +99 -0
  535. package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
  536. package/src/runtime/voice-entity-binding.ts +368 -0
  537. package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
  538. package/src/services/README.md +71 -0
  539. package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
  540. package/src/services/__tests__/backend-selector.test.ts +101 -0
  541. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  542. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  543. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  544. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  545. package/src/services/__tests__/runtime-target.test.ts +176 -0
  546. package/src/services/active-model-context-fit.test.ts +125 -0
  547. package/src/services/active-model-switch-rollback.test.ts +183 -0
  548. package/src/services/active-model.ts +1416 -0
  549. package/src/services/asr-provenance.ts +68 -0
  550. package/src/services/assignment-validation.test.ts +118 -0
  551. package/src/services/assignments.test.ts +106 -0
  552. package/src/services/assignments.ts +278 -0
  553. package/src/services/backend-selector.ts +95 -0
  554. package/src/services/backend.test.ts +84 -0
  555. package/src/services/backend.ts +791 -0
  556. package/src/services/bionic-host-loader.test.ts +226 -0
  557. package/src/services/bionic-host-loader.ts +252 -0
  558. package/src/services/bundled-models.ts +129 -0
  559. package/src/services/cache-bridge.test.ts +516 -0
  560. package/src/services/cache-bridge.ts +423 -0
  561. package/src/services/catalog.test.ts +259 -0
  562. package/src/services/catalog.ts +33 -0
  563. package/src/services/checkpoint-client.ts +258 -0
  564. package/src/services/checkpoint-manager.ts +474 -0
  565. package/src/services/cloud-fallback.ts +230 -0
  566. package/src/services/context-fit.test.ts +121 -0
  567. package/src/services/context-fit.ts +113 -0
  568. package/src/services/conversation-registry.test.ts +235 -0
  569. package/src/services/conversation-registry.ts +264 -0
  570. package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
  571. package/src/services/device-bridge.ts +1237 -0
  572. package/src/services/device-resource-metrics.test.ts +98 -0
  573. package/src/services/device-resource-metrics.ts +346 -0
  574. package/src/services/device-tier.test.ts +458 -0
  575. package/src/services/device-tier.ts +502 -0
  576. package/src/services/downloader.test.ts +888 -0
  577. package/src/services/downloader.ts +1039 -0
  578. package/src/services/engine-direct-bundle.test.ts +90 -0
  579. package/src/services/engine-streaming.test.ts +80 -0
  580. package/src/services/engine.ts +2096 -0
  581. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  582. package/src/services/ensure-local-artifacts.test.ts +368 -0
  583. package/src/services/ensure-local-artifacts.ts +351 -0
  584. package/src/services/external-scanner.ts +312 -0
  585. package/src/services/ffi-llm-mock.ts +354 -0
  586. package/src/services/ffi-llm-streaming-abi.ts +445 -0
  587. package/src/services/ffi-streaming-backend.ts +418 -0
  588. package/src/services/ffi-streaming-runner.test.ts +220 -0
  589. package/src/services/ffi-streaming-runner.ts +407 -0
  590. package/src/services/ffi-unload-ordering.test.ts +166 -0
  591. package/src/services/fused-eliza1-no-regression.test.ts +144 -0
  592. package/src/services/gpu-autotune.ts +534 -0
  593. package/src/services/gpu-detect.ts +139 -0
  594. package/src/services/handler-registry.ts +240 -0
  595. package/src/services/hardware.test.ts +236 -0
  596. package/src/services/hardware.ts +438 -0
  597. package/src/services/image-description-runtime.test.ts +61 -0
  598. package/src/services/image-description-runtime.ts +118 -0
  599. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  600. package/src/services/imagegen/backend-selector.test.ts +190 -0
  601. package/src/services/imagegen/backend-selector.ts +277 -0
  602. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  603. package/src/services/imagegen/errors.ts +40 -0
  604. package/src/services/imagegen/index.ts +144 -0
  605. package/src/services/imagegen/mflux.ts +313 -0
  606. package/src/services/imagegen/sd-cpp.ts +715 -0
  607. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  608. package/src/services/imagegen/types.ts +193 -0
  609. package/src/services/index.ts +229 -0
  610. package/src/services/inference-capabilities.test.ts +75 -0
  611. package/src/services/inference-capabilities.ts +204 -0
  612. package/src/services/inference-telemetry.ts +143 -0
  613. package/src/services/ios-llama-streaming.ts +248 -0
  614. package/src/services/kv-spill.test.ts +222 -0
  615. package/src/services/kv-spill.ts +357 -0
  616. package/src/services/latency-trace.test.ts +266 -0
  617. package/src/services/latency-trace.ts +844 -0
  618. package/src/services/lib-target.test.ts +145 -0
  619. package/src/services/lib-target.ts +102 -0
  620. package/src/services/live-signals.test.ts +132 -0
  621. package/src/services/live-signals.ts +177 -0
  622. package/src/services/llama-server-metrics.test.ts +168 -0
  623. package/src/services/llama-server-metrics.ts +304 -0
  624. package/src/services/llm-streaming-binding.ts +136 -0
  625. package/src/services/load-args.ts +81 -0
  626. package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
  627. package/src/services/manifest/index.ts +72 -0
  628. package/src/services/manifest/manifest.test.ts +791 -0
  629. package/src/services/manifest/schema.ts +761 -0
  630. package/src/services/manifest/types.ts +61 -0
  631. package/src/services/manifest/validator.ts +633 -0
  632. package/src/services/memory-arbiter.test.ts +558 -0
  633. package/src/services/memory-arbiter.ts +991 -0
  634. package/src/services/memory-benchmark.test.ts +91 -0
  635. package/src/services/memory-benchmark.ts +354 -0
  636. package/src/services/memory-monitor.test.ts +232 -0
  637. package/src/services/memory-monitor.ts +309 -0
  638. package/src/services/memory-pressure.ts +414 -0
  639. package/src/services/mtp-doctor.ts +86 -0
  640. package/src/services/network-policy.ts +346 -0
  641. package/src/services/paths.ts +25 -0
  642. package/src/services/planner-skeleton.ts +175 -0
  643. package/src/services/providers.ts +507 -0
  644. package/src/services/ram-budget-cache.test.ts +164 -0
  645. package/src/services/ram-budget.ts +309 -0
  646. package/src/services/readiness.test.ts +87 -0
  647. package/src/services/readiness.ts +238 -0
  648. package/src/services/recommendation.test.ts +216 -0
  649. package/src/services/recommendation.ts +671 -0
  650. package/src/services/registry.ts +157 -0
  651. package/src/services/required-kernels-gate.test.ts +64 -0
  652. package/src/services/router-handler.test.ts +45 -0
  653. package/src/services/router-handler.ts +426 -0
  654. package/src/services/routing-policy.test.ts +352 -0
  655. package/src/services/routing-policy.ts +367 -0
  656. package/src/services/routing-preferences.ts +17 -0
  657. package/src/services/runtime-target.ts +154 -0
  658. package/src/services/service.test.ts +223 -0
  659. package/src/services/service.ts +750 -0
  660. package/src/services/session-pool.ts +153 -0
  661. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  662. package/src/services/structured-output/deterministic-repair.ts +443 -0
  663. package/src/services/structured-output/index.ts +4 -0
  664. package/src/services/structured-output.test.ts +483 -0
  665. package/src/services/structured-output.ts +712 -0
  666. package/src/services/system-memory.test.ts +47 -0
  667. package/src/services/system-memory.ts +67 -0
  668. package/src/services/transcription-priority.test.ts +211 -0
  669. package/src/services/types.ts +59 -0
  670. package/src/services/verify-on-device.test.ts +87 -0
  671. package/src/services/verify-on-device.ts +127 -0
  672. package/src/services/verify.ts +13 -0
  673. package/src/services/vision/aosp-unavailable.ts +163 -0
  674. package/src/services/vision/capacitor-llama.ts +255 -0
  675. package/src/services/vision/cloud-fallback.test.ts +243 -0
  676. package/src/services/vision/cloud-fallback.ts +268 -0
  677. package/src/services/vision/fallback-chain.test.ts +86 -0
  678. package/src/services/vision/hash.ts +157 -0
  679. package/src/services/vision/index.ts +251 -0
  680. package/src/services/vision/llama-server.ts +177 -0
  681. package/src/services/vision/types.ts +163 -0
  682. package/src/services/vision/vast-fallback.ts +127 -0
  683. package/src/services/vision-embedding-cache.ts +189 -0
  684. package/src/services/voice/VOICE_WORKBENCH.md +133 -0
  685. package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
  686. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  687. package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
  688. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  689. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  690. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  691. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  692. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  693. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  694. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  695. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  696. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  697. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  698. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  699. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  700. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  701. package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
  702. package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
  703. package/src/services/voice/asr-timed.real.test.ts +139 -0
  704. package/src/services/voice/audio-frame-consumer.test.ts +669 -0
  705. package/src/services/voice/audio-frame-consumer.ts +651 -0
  706. package/src/services/voice/barge-in.test.ts +244 -0
  707. package/src/services/voice/barge-in.ts +335 -0
  708. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  709. package/src/services/voice/cancellation-coordinator.ts +269 -0
  710. package/src/services/voice/checkpoint-manager.ts +401 -0
  711. package/src/services/voice/checkpoint-policy.ts +336 -0
  712. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  713. package/src/services/voice/corpus-augment.test.ts +276 -0
  714. package/src/services/voice/corpus-augment.ts +451 -0
  715. package/src/services/voice/corpus-generator.test.ts +201 -0
  716. package/src/services/voice/corpus-generator.ts +413 -0
  717. package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
  718. package/src/services/voice/diarization-error-rate.test.ts +100 -0
  719. package/src/services/voice/diarization-error-rate.ts +249 -0
  720. package/src/services/voice/e2e-harness.der.test.ts +94 -0
  721. package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
  722. package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
  723. package/src/services/voice/e2e-harness.test.ts +182 -0
  724. package/src/services/voice/e2e-harness.ts +902 -0
  725. package/src/services/voice/eager-context-builder.ts +262 -0
  726. package/src/services/voice/echo-delay.test.ts +118 -0
  727. package/src/services/voice/echo-delay.ts +135 -0
  728. package/src/services/voice/echo-metrics.test.ts +17 -0
  729. package/src/services/voice/echo-metrics.ts +20 -0
  730. package/src/services/voice/echo-reference-buffer.test.ts +86 -0
  731. package/src/services/voice/echo-reference-buffer.ts +165 -0
  732. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  733. package/src/services/voice/embedding-server.ts +200 -0
  734. package/src/services/voice/embedding.test.ts +131 -0
  735. package/src/services/voice/embedding.ts +242 -0
  736. package/src/services/voice/emotion-attribution.test.ts +129 -0
  737. package/src/services/voice/emotion-attribution.ts +361 -0
  738. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  739. package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
  740. package/src/services/voice/engine-bridge.test.ts +384 -0
  741. package/src/services/voice/engine-bridge.ts +2343 -0
  742. package/src/services/voice/eot-classifier-ggml.ts +569 -0
  743. package/src/services/voice/eot-classifier.test.ts +98 -0
  744. package/src/services/voice/eot-classifier.ts +422 -0
  745. package/src/services/voice/errors.ts +34 -0
  746. package/src/services/voice/expressive-tags.asr.test.ts +77 -0
  747. package/src/services/voice/expressive-tags.test.ts +102 -0
  748. package/src/services/voice/expressive-tags.ts +405 -0
  749. package/src/services/voice/ffi-bindings.test.ts +735 -0
  750. package/src/services/voice/ffi-bindings.ts +3387 -0
  751. package/src/services/voice/first-line-cache.ts +725 -0
  752. package/src/services/voice/fused-eot-scorer.ts +139 -0
  753. package/src/services/voice/index.ts +502 -0
  754. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
  755. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
  756. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  757. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  758. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  759. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  760. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  761. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
  762. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  763. package/src/services/voice/kokoro/index.ts +79 -0
  764. package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
  765. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  766. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  767. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  768. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  769. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  770. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  771. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  772. package/src/services/voice/kokoro/runtime-selection.ts +64 -0
  773. package/src/services/voice/kokoro/types.ts +95 -0
  774. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  775. package/src/services/voice/kokoro/voices.ts +64 -0
  776. package/src/services/voice/lifecycle.test.ts +315 -0
  777. package/src/services/voice/lifecycle.ts +301 -0
  778. package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
  779. package/src/services/voice/live-diarization-session.ts +622 -0
  780. package/src/services/voice/metric-math.test.ts +61 -0
  781. package/src/services/voice/metric-math.ts +25 -0
  782. package/src/services/voice/mic-source.test.ts +210 -0
  783. package/src/services/voice/mic-source.ts +503 -0
  784. package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
  785. package/src/services/voice/nlms-echo-canceller.ts +317 -0
  786. package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
  787. package/src/services/voice/optimistic-policy.test.ts +101 -0
  788. package/src/services/voice/optimistic-policy.ts +192 -0
  789. package/src/services/voice/optimistic-rollback.ts +343 -0
  790. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  791. package/src/services/voice/partial-stabilizer.ts +140 -0
  792. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  793. package/src/services/voice/phrase-cache.test.ts +242 -0
  794. package/src/services/voice/phrase-cache.ts +186 -0
  795. package/src/services/voice/phrase-chunker.test.ts +239 -0
  796. package/src/services/voice/phrase-chunker.ts +281 -0
  797. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  798. package/src/services/voice/pipeline-impls.test.ts +292 -0
  799. package/src/services/voice/pipeline-impls.ts +315 -0
  800. package/src/services/voice/pipeline.ts +504 -0
  801. package/src/services/voice/prefill-client.ts +316 -0
  802. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  803. package/src/services/voice/profile-store.ts +887 -0
  804. package/src/services/voice/real-audio-decode.test.ts +148 -0
  805. package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
  806. package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
  807. package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
  808. package/src/services/voice/ring-buffer.test.ts +129 -0
  809. package/src/services/voice/ring-buffer.ts +123 -0
  810. package/src/services/voice/rollback-queue.ts +74 -0
  811. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  812. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  813. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  814. package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
  815. package/src/services/voice/scheduler.t2.test.ts +141 -0
  816. package/src/services/voice/scheduler.ts +927 -0
  817. package/src/services/voice/self-voice-imprint.test.ts +59 -0
  818. package/src/services/voice/self-voice-imprint.ts +102 -0
  819. package/src/services/voice/shared-resources.ts +343 -0
  820. package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
  821. package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
  822. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  823. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  824. package/src/services/voice/speaker/diarizer.ts +218 -0
  825. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  826. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  827. package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
  828. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  829. package/src/services/voice/speaker/encoder.ts +105 -0
  830. package/src/services/voice/speaker-imprint.test.ts +185 -0
  831. package/src/services/voice/speaker-imprint.ts +312 -0
  832. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  833. package/src/services/voice/speaker-preset-cache.ts +195 -0
  834. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  835. package/src/services/voice/system-audio-sink.test.ts +29 -0
  836. package/src/services/voice/system-audio-sink.ts +366 -0
  837. package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
  838. package/src/services/voice/transcriber.test.ts +392 -0
  839. package/src/services/voice/transcriber.ts +704 -0
  840. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  841. package/src/services/voice/transcript-knowledge.ts +75 -0
  842. package/src/services/voice/transcript-service.test.ts +195 -0
  843. package/src/services/voice/transcript-service.ts +205 -0
  844. package/src/services/voice/transcript-store.test.ts +189 -0
  845. package/src/services/voice/transcript-store.ts +164 -0
  846. package/src/services/voice/turn-controller.test.ts +575 -0
  847. package/src/services/voice/turn-controller.ts +596 -0
  848. package/src/services/voice/types.ts +699 -0
  849. package/src/services/voice/vad.test.ts +498 -0
  850. package/src/services/voice/vad.ts +832 -0
  851. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  852. package/src/services/voice/voice-budget.test.ts +415 -0
  853. package/src/services/voice/voice-budget.ts +635 -0
  854. package/src/services/voice/voice-duet.test.ts +375 -0
  855. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  856. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  857. package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
  858. package/src/services/voice/voice-preload-predictor.test.ts +130 -0
  859. package/src/services/voice/voice-preload-predictor.ts +113 -0
  860. package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
  861. package/src/services/voice/voice-preset-format.test.ts +75 -0
  862. package/src/services/voice/voice-preset-format.ts +713 -0
  863. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  864. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  865. package/src/services/voice/voice-profile-artifact.ts +518 -0
  866. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  867. package/src/services/voice/voice-profile-routes.ts +425 -0
  868. package/src/services/voice/voice-scenario.test.ts +159 -0
  869. package/src/services/voice/voice-scenario.ts +280 -0
  870. package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
  871. package/src/services/voice/voice-state-machine.ts +727 -0
  872. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  873. package/src/services/voice/voice-workbench-report.ts +367 -0
  874. package/src/services/voice/voice-workbench.test.ts +158 -0
  875. package/src/services/voice/voice.test.ts +1070 -0
  876. package/src/services/voice/wake-word-ggml.ts +319 -0
  877. package/src/services/voice/wake-word.test.ts +298 -0
  878. package/src/services/voice/wake-word.ts +554 -0
  879. package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
  880. package/src/services/voice/wav-codec.test.ts +32 -0
  881. package/src/services/voice/wav-codec.ts +101 -0
  882. package/src/services/voice/workbench-entrypoint.test.ts +55 -0
  883. package/src/services/voice/workbench-entrypoint.ts +88 -0
  884. package/src/services/voice/workbench-headless-runner.test.ts +162 -0
  885. package/src/services/voice/workbench-headless-runner.ts +396 -0
  886. package/src/services/voice/workbench-logic-services.test.ts +225 -0
  887. package/src/services/voice/workbench-logic-services.ts +184 -0
  888. package/src/services/voice/workbench-real-services.ts +629 -0
  889. package/src/services/voice/workbench-scenarios.ts +407 -0
  890. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  891. package/src/services/voice-model-updater.ts +724 -0
  892. package/src/services/voice-prewarm.ts +51 -0
  893. package/src/voice-workbench.ts +71 -0
@@ -0,0 +1,1640 @@
1
+ /// <reference path="./capacitor-llama.d.ts" />
2
+
3
+ /**
4
+ * Registers the standalone llama.cpp engine as the runtime handler for
5
+ * `ModelType.TEXT_SMALL` and `ModelType.TEXT_LARGE`.
6
+ *
7
+ * Priority is 0 — same band as cloud and direct provider plugins. Tie-breaks
8
+ * between local and cloud are owned by the routing-policy layer
9
+ * (`router-handler.ts` + `routing-policy.ts`), not by this priority value:
10
+ * the router sits at MAX_SAFE_INTEGER and consults the user's policy
11
+ * (manual / cheapest / fastest / prefer-local / round-robin) on every call.
12
+ *
13
+ * Until the cuttlefish smoke landed this was -1 to "let cloud win by default,"
14
+ * but that conflated routing-policy (a user preference) with handler
15
+ * priority (a registration ordinal). The runtime's getModel() returns
16
+ * undefined when no priority-0 handler is registered, which manifested as
17
+ * "No handler found for delegate type: TEXT_SMALL" on AOSP builds where
18
+ * the AOSP local inference loader is the only provider. Both cloud-only and
19
+ * local-only deployments now have a registered priority-0 handler; the
20
+ * router decides which one fires per request.
21
+ *
22
+ * Parallels `ensure-text-to-speech-handler.ts` — same shape, same guards.
23
+ */
24
+
25
+ import { existsSync, linkSync, mkdirSync, symlinkSync } from "node:fs";
26
+ import path from "node:path";
27
+ import {
28
+ type AgentRuntime,
29
+ type GenerateTextParams,
30
+ type IAgentRuntime,
31
+ type ImageDescriptionParams,
32
+ type ImageDescriptionResult,
33
+ logger,
34
+ ModelType,
35
+ renderMessageHandlerStablePrefix,
36
+ type TextEmbeddingParams,
37
+ type TextToSpeechParams,
38
+ type TranscriptionParams,
39
+ type UUID,
40
+ } from "@elizaos/core";
41
+ import { LocalInferenceUnavailableError } from "../provider";
42
+ import {
43
+ type LocalInferenceLoader,
44
+ resolveLocalInferenceLoadArgs,
45
+ } from "../services/active-model";
46
+ import {
47
+ autoAssignAtBoot,
48
+ isEmbeddingModelId,
49
+ readEffectiveAssignments,
50
+ } from "../services/assignments";
51
+ import { BionicHostLoader } from "../services/bionic-host-loader";
52
+ import {
53
+ extractConversationId,
54
+ extractPromptCacheKey,
55
+ resolveLocalCacheKey,
56
+ } from "../services/cache-bridge";
57
+ import { deviceBridge } from "../services/device-bridge";
58
+ import { localInferenceEngine } from "../services/engine";
59
+ import { handlerRegistry } from "../services/handler-registry";
60
+ import { probeHardware } from "../services/hardware";
61
+ import { tryGetMemoryArbiter } from "../services/memory-arbiter";
62
+ import { listInstalledModels } from "../services/registry";
63
+ import { installRouterHandler } from "../services/router-handler";
64
+ import {
65
+ type ElizaHarnessSchema,
66
+ elizaHarnessSchemaFromSkeleton,
67
+ } from "../services/structured-output";
68
+ import type { AgentModelSlot } from "../services/types";
69
+ import { decodeMonoPcm16Wav, type TranscriptionAudio } from "../services/voice";
70
+ import { DEFAULT_MODELS_DIR } from "./embedding-manager-support";
71
+ import { EMBEDDING_PRESETS } from "./embedding-presets";
72
+ import { isLocalEmbeddingDisabledByEnv } from "./embedding-warmup-policy";
73
+
74
+ type GenerateTextHandler = (
75
+ runtime: IAgentRuntime,
76
+ params: GenerateTextParams,
77
+ ) => Promise<string>;
78
+
79
+ /**
80
+ * Embedding handler signature — accepts the same union the runtime hands
81
+ * to TEXT_EMBEDDING calls (`TextEmbeddingParams | string | null`) and
82
+ * returns the raw float vector.
83
+ */
84
+ type EmbeddingHandler = (
85
+ runtime: IAgentRuntime,
86
+ params: TextEmbeddingParams | string | null,
87
+ ) => Promise<number[]>;
88
+
89
+ type TextToSpeechHandler = (
90
+ runtime: IAgentRuntime,
91
+ params: TextToSpeechParams | string,
92
+ ) => Promise<Uint8Array>;
93
+
94
+ type TranscriptionHandler = (
95
+ runtime: IAgentRuntime,
96
+ params: TranscriptionParams | Buffer | string | LocalTranscriptionParams,
97
+ ) => Promise<string>;
98
+
99
+ type ImageDescriptionHandler = (
100
+ runtime: IAgentRuntime,
101
+ params: ImageDescriptionParams | string,
102
+ ) => Promise<ImageDescriptionResult>;
103
+
104
+ interface LocalTranscriptionParams {
105
+ pcm?: Float32Array;
106
+ audio?: Uint8Array | ArrayBuffer | Buffer;
107
+ sampleRateHz?: number;
108
+ sampleRate?: number;
109
+ signal?: AbortSignal;
110
+ }
111
+
112
+ type LocalModelHandler =
113
+ | GenerateTextHandler
114
+ | EmbeddingHandler
115
+ | TextToSpeechHandler
116
+ | TranscriptionHandler
117
+ | ImageDescriptionHandler;
118
+
119
+ type RuntimeWithModelRegistration = AgentRuntime & {
120
+ getModel: (modelType: string | number) => LocalModelHandler | undefined;
121
+ registerModel: (
122
+ modelType: string | number,
123
+ handler: LocalModelHandler,
124
+ provider: string,
125
+ priority?: number,
126
+ ) => void;
127
+ };
128
+
129
+ const LOCAL_INFERENCE_PROVIDER = "eliza-local-inference";
130
+ const DEVICE_BRIDGE_PROVIDER = "eliza-device-bridge";
131
+ const CAPACITOR_LLAMA_PROVIDER = "capacitor-llama";
132
+ const AOSP_LLAMA_PROVIDER = "eliza-aosp-llama";
133
+ const LOCAL_INFERENCE_HANDLER_INSTALLED = Symbol.for(
134
+ "elizaos.local-inference.handlers-installed",
135
+ );
136
+ type RuntimeWithLocalInferenceFlag = RuntimeWithModelRegistration & {
137
+ [LOCAL_INFERENCE_HANDLER_INSTALLED]?: boolean;
138
+ };
139
+ /**
140
+ * Same band as cloud / direct provider plugins. Tie-breaks between
141
+ * candidates live in `routing-policy.ts`, not in this number — the
142
+ * router (registered at MAX_SAFE_INTEGER) consults the user's
143
+ * per-slot policy on every dispatch.
144
+ *
145
+ * Was -1 historically, which made `runtime.getModel(TEXT_SMALL)` return
146
+ * undefined when the AOSP local-inference loader was the only registered
147
+ * provider. The smoke run failed with "No handler found for delegate
148
+ * type: TEXT_SMALL"; bumping to 0 unblocks AOSP without changing
149
+ * cloud-only deployments (cloud providers still register at 0 and the
150
+ * routing-policy layer picks between them).
151
+ */
152
+ const LOCAL_INFERENCE_PRIORITY = 0;
153
+
154
+ export function shouldRegisterLocalInferenceHandlers(mode: string): boolean {
155
+ return mode === "local" || mode === "local-only";
156
+ }
157
+
158
+ function normalizeRuntimeMode(value: unknown): string | null {
159
+ if (typeof value !== "string") return null;
160
+ const normalized = value.trim().toLowerCase();
161
+ if (normalized === "local-safe" || normalized === "local-yolo")
162
+ return "local";
163
+ if (
164
+ normalized === "local" ||
165
+ normalized === "local-only" ||
166
+ normalized === "cloud" ||
167
+ normalized === "remote"
168
+ ) {
169
+ return normalized;
170
+ }
171
+ return null;
172
+ }
173
+
174
+ function getRuntimeMode(runtime: IAgentRuntime): string {
175
+ for (const key of [
176
+ "ELIZA_DEPLOYMENT_RUNTIME",
177
+ "ELIZA_RUNTIME_MODE",
178
+ "RUNTIME_MODE",
179
+ ] as const) {
180
+ const fromSetting = normalizeRuntimeMode(runtime.getSetting(key));
181
+ if (fromSetting) return fromSetting;
182
+ const fromEnv = normalizeRuntimeMode(process.env[key]);
183
+ if (fromEnv) return fromEnv;
184
+ }
185
+ if (
186
+ process.env.ELIZA_CLOUD_PROVISIONED === "1" ||
187
+ process.env.ELIZAOS_CLOUD_ENABLED === "1"
188
+ ) {
189
+ return "cloud";
190
+ }
191
+ return "local";
192
+ }
193
+
194
+ function getLoader(runtime: IAgentRuntime): LocalInferenceLoader | null {
195
+ const candidate = (
196
+ runtime as { getService?: (name: string) => unknown }
197
+ ).getService?.("localInferenceLoader");
198
+ if (!candidate || typeof candidate !== "object") return null;
199
+ const loader = candidate as Partial<LocalInferenceLoader>;
200
+ if (
201
+ typeof loader.loadModel === "function" &&
202
+ typeof loader.unloadModel === "function"
203
+ ) {
204
+ return candidate as LocalInferenceLoader;
205
+ }
206
+ return null;
207
+ }
208
+
209
+ /**
210
+ * Look up the model assigned to a given agent slot and ensure it's the
211
+ * one loaded before generation runs. Loads lazily on first call; swaps
212
+ * when a different slot's assignment fires with a different model.
213
+ *
214
+ * If no assignment is set for the slot, falls back to whatever is
215
+ * currently loaded — UNLESS the loaded model is an embedding model and
216
+ * this is a chat/generative slot. That combination produces `[unused{N}]`
217
+ * garbage (a BERT model forced to autoregress), so we fail loudly with an
218
+ * actionable message instead. See elizaOS/eliza#7687.
219
+ */
220
+ async function ensureAssignedModelLoaded(
221
+ loader: LocalInferenceLoader | null,
222
+ slot: AgentModelSlot,
223
+ ): Promise<void> {
224
+ const assignments = await readEffectiveAssignments();
225
+ const assignedId = assignments[slot];
226
+ if (!assignedId) {
227
+ // Loud-failure guard: an unassigned chat slot must not silently
228
+ // dispatch to whatever model happens to be loaded — if that's an
229
+ // embedding model, completion emits reserved-token garbage.
230
+ if (slot === "TEXT_SMALL" || slot === "TEXT_LARGE") {
231
+ const installed = await listInstalledModels();
232
+ const currentPath =
233
+ loader?.currentModelPath() ?? localInferenceEngine.currentModelPath();
234
+ const current = currentPath
235
+ ? installed.find((m) => m.path === currentPath)
236
+ : undefined;
237
+ if (current && isEmbeddingModelId(current.id)) {
238
+ throw new Error(
239
+ `[local-inference] No chat model assigned for slot ${slot} — open Settings → Local models. The currently-loaded model (${current.id}) is an embedding model and cannot serve text generation.`,
240
+ );
241
+ }
242
+ }
243
+ return;
244
+ }
245
+
246
+ // Desktop fast path: check the engine state directly.
247
+ if (!loader && localInferenceEngine.currentModelPath()) {
248
+ const installed = await listInstalledModels();
249
+ const current = installed.find(
250
+ (m) => m.path === localInferenceEngine.currentModelPath(),
251
+ );
252
+ if (current?.id === assignedId) return;
253
+ }
254
+
255
+ // Via loader: compare reported path against assignment.
256
+ if (loader) {
257
+ const currentPath = loader.currentModelPath();
258
+ if (currentPath) {
259
+ const installed = await listInstalledModels();
260
+ const current = installed.find((m) => m.path === currentPath);
261
+ if (current?.id === assignedId) return;
262
+ }
263
+ }
264
+
265
+ const installed = await listInstalledModels();
266
+ const target = installed.find((m) => m.id === assignedId);
267
+ if (!target) {
268
+ throw new Error(
269
+ `[local-inference] Slot ${slot} assigned to ${assignedId}, but that model is not installed.`,
270
+ );
271
+ }
272
+
273
+ if (loader) {
274
+ const hardware = await probeHardware();
275
+ const resolved = await resolveLocalInferenceLoadArgs(target, undefined, {
276
+ hardware,
277
+ });
278
+ await loader.unloadModel();
279
+ await loader.loadModel(resolved);
280
+ } else {
281
+ const hardware = await probeHardware();
282
+ const resolved = await resolveLocalInferenceLoadArgs(target, undefined, {
283
+ hardware,
284
+ });
285
+ await localInferenceEngine.load(target.path, resolved);
286
+ }
287
+ }
288
+
289
+ /**
290
+ * True when the caller opted this generation into *guided structured decode* —
291
+ * the deterministic-token prefill-plan short-circuit on top of the GBNF
292
+ * constrained decode. Off by default: needs either an explicit
293
+ * `providerOptions.eliza.guidedDecode === true` (the planner / message service
294
+ * sets this when it built a forced skeleton) or the process-wide
295
+ * `ELIZA_LOCAL_GUIDED_DECODE=1` opt-in.
296
+ */
297
+ function guidedDecodeRequested(params: GenerateTextParams): boolean {
298
+ const providerOptions = (params as { providerOptions?: unknown })
299
+ .providerOptions;
300
+ const elizaOpts =
301
+ providerOptions && typeof providerOptions === "object"
302
+ ? (providerOptions as { eliza?: { guidedDecode?: unknown } }).eliza
303
+ : undefined;
304
+ if (elizaOpts && elizaOpts.guidedDecode === true) return true;
305
+ const env = process.env.ELIZA_LOCAL_GUIDED_DECODE;
306
+ return env === "1" || env === "true";
307
+ }
308
+
309
+ /**
310
+ * Build the {@link ElizaHarnessSchema} for this call — the bundle of the
311
+ * forced skeleton, the pre-built grammar (when the producer supplied one), and
312
+ * the derived deterministic-token prefill plan. Returns undefined unless guided
313
+ * decode is requested AND a `responseSkeleton` (or explicit `grammar`) is
314
+ * present (schema presence == the off-by-default switch for the prefill plan).
315
+ */
316
+ function elizaHarnessSchemaFromParams(
317
+ params: GenerateTextParams,
318
+ ): ElizaHarnessSchema | undefined {
319
+ if (!guidedDecodeRequested(params)) return undefined;
320
+ const skeleton = params.responseSkeleton;
321
+ if (!skeleton) return undefined;
322
+ return elizaHarnessSchemaFromSkeleton({
323
+ skeleton,
324
+ grammar: typeof params.grammar === "string" ? params.grammar : undefined,
325
+ });
326
+ }
327
+
328
+ function extractThinkingControl(
329
+ providerOptions: unknown,
330
+ ): "auto" | "on" | "off" | undefined {
331
+ const elizaOpts =
332
+ providerOptions && typeof providerOptions === "object"
333
+ ? (providerOptions as { eliza?: { thinking?: unknown } }).eliza
334
+ : undefined;
335
+ const thinking = elizaOpts?.thinking;
336
+ return thinking === "auto" || thinking === "on" || thinking === "off"
337
+ ? thinking
338
+ : undefined;
339
+ }
340
+
341
+ /**
342
+ * Project a `GenerateTextParams` onto the engine's `GenerateArgs`, threading
343
+ * the structure-forcing extensions (`prefill`, `responseSkeleton`, `grammar`,
344
+ * `streamStructured`, `elizaSchema`) and wiring `onStreamChunk` to the engine's
345
+ * per-token `onTextChunk`. Cloud adapters ignore these fields; the local engine
346
+ * honours them (the forced-span / prefill / grammar / prefill-plan path is
347
+ * local-model-only).
348
+ */
349
+ /**
350
+ * Per-step token cap for USER-VISIBLE local streaming (chat replies).
351
+ *
352
+ * Benchmarked on the fused eliza-1 model (#9174): the per-`llmStreamNext` step
353
+ * carries a large fixed FFI overhead, so the throughput↔smoothness curve has a
354
+ * knee around 8 — `8` yields ~10 UI updates per 80 tokens (clearly streaming)
355
+ * at a modest decode-throughput cost, whereas 1–4 fall off a throughput cliff
356
+ * and 16–32 look jumpy. Internal / planner / voice calls do NOT set this and
357
+ * keep the coarse, throughput-tuned runner default (32). Overridable via the
358
+ * shared `ELIZA_LOCAL_STREAM_TOKENS_PER_STEP` env knob; the runner clamps it.
359
+ */
360
+ const DEFAULT_CHAT_STREAM_TOKENS_PER_STEP = 8;
361
+ function resolveChatStreamTokensPerStep(): number {
362
+ const raw = process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP?.trim();
363
+ const parsed = raw ? Number.parseInt(raw, 10) : Number.NaN;
364
+ return Number.isFinite(parsed) && parsed > 0
365
+ ? parsed
366
+ : DEFAULT_CHAT_STREAM_TOKENS_PER_STEP;
367
+ }
368
+
369
+ function engineGenerateArgsFromParams(
370
+ params: GenerateTextParams,
371
+ cacheKey: string | undefined,
372
+ ): {
373
+ prompt: string;
374
+ stopSequences?: string[];
375
+ cacheKey?: string;
376
+ signal?: AbortSignal;
377
+ maxTokens?: number;
378
+ temperature?: number;
379
+ topP?: number;
380
+ prefill?: string;
381
+ responseSkeleton?: GenerateTextParams["responseSkeleton"];
382
+ grammar?: string;
383
+ streamStructured?: boolean;
384
+ elizaSchema?: ElizaHarnessSchema;
385
+ spanSamplerPlan?: GenerateTextParams["spanSamplerPlan"];
386
+ thinking?: "auto" | "on" | "off";
387
+ onTextChunk?: (chunk: string) => void | Promise<void>;
388
+ maxTokensPerStep?: number;
389
+ voiceOutput?: "user-visible" | "internal";
390
+ } {
391
+ const renderContent = (content: unknown): string => {
392
+ if (typeof content === "string") return content;
393
+ if (Array.isArray(content)) {
394
+ return content
395
+ .map((part) => {
396
+ if (typeof part === "string") return part;
397
+ if (
398
+ part &&
399
+ typeof part === "object" &&
400
+ typeof (part as { text?: unknown }).text === "string"
401
+ ) {
402
+ return (part as { text: string }).text;
403
+ }
404
+ return "";
405
+ })
406
+ .filter(Boolean)
407
+ .join("\n");
408
+ }
409
+ return "";
410
+ };
411
+ const promptFromSegments =
412
+ params.promptSegments && params.promptSegments.length > 0
413
+ ? params.promptSegments.map((segment) => segment.content).join("")
414
+ : "";
415
+ const promptFromMessages =
416
+ !promptFromSegments && params.messages && params.messages.length > 0
417
+ ? params.messages
418
+ .map((message) => {
419
+ const content = renderContent(message.content);
420
+ return content ? `${message.role}:\n${content}` : "";
421
+ })
422
+ .filter(Boolean)
423
+ .join("\n\n")
424
+ : "";
425
+ const streamStructured = params.streamStructured === true;
426
+ // Surface per-token chunks to the caller. The runtime passes the agent
427
+ // reply path's `onStreamChunk` here when it wants the LLM→TTS handoff —
428
+ // previously dropped at this layer. Only wire it when the caller asked
429
+ // for streaming (`stream` or `streamStructured`) so non-streaming callers
430
+ // don't pay the chunk-callback overhead.
431
+ const onTextChunk =
432
+ (params.stream === true || streamStructured) &&
433
+ typeof params.onStreamChunk === "function"
434
+ ? (chunk: string) => params.onStreamChunk?.(chunk)
435
+ : undefined;
436
+ return {
437
+ prompt: params.prompt ?? (promptFromSegments || promptFromMessages),
438
+ stopSequences: params.stopSequences,
439
+ cacheKey,
440
+ signal: params.signal,
441
+ maxTokens: params.maxTokens,
442
+ temperature: params.temperature,
443
+ topP: params.topP,
444
+ prefill: params.prefill,
445
+ responseSkeleton: params.responseSkeleton,
446
+ grammar: params.grammar,
447
+ streamStructured: streamStructured || undefined,
448
+ elizaSchema: elizaHarnessSchemaFromParams(params),
449
+ spanSamplerPlan: params.spanSamplerPlan,
450
+ thinking: extractThinkingControl(params.providerOptions),
451
+ onTextChunk,
452
+ // Stream user-visible replies in fine-grained steps so the dashboard
453
+ // renders token-by-token instead of in ~32-token jumps. Only when
454
+ // streaming (onTextChunk set) — internal/planner calls keep the coarse,
455
+ // throughput-tuned default. See resolveChatStreamTokensPerStep (#9174).
456
+ maxTokensPerStep: onTextChunk
457
+ ? resolveChatStreamTokensPerStep()
458
+ : undefined,
459
+ voiceOutput:
460
+ params.voiceOutput ??
461
+ (typeof params.onStreamChunk === "function" ? "user-visible" : undefined),
462
+ };
463
+ }
464
+
465
+ function makeHandler(slot: AgentModelSlot): GenerateTextHandler {
466
+ return async (runtime, params) => {
467
+ const loader = getLoader(runtime);
468
+
469
+ // Lazy-load the assigned model for this slot, if any. Swaps are
470
+ // expensive; the user is expected to assign a small number of models.
471
+ await ensureAssignedModelLoaded(loader, slot);
472
+
473
+ // Resolve the strongest cache key the runtime can give us. Order of
474
+ // precedence (see `resolveLocalCacheKey`):
475
+ // 1. Conversation id — survives any prompt drift
476
+ // 2. Stable-prefix hash — survives unstable-tail timestamps
477
+ // 3. Provider plan hashes — back-compat
478
+ const providerOptions = (params as { providerOptions?: unknown })
479
+ .providerOptions;
480
+ const conversationId = extractConversationId(providerOptions);
481
+ const cacheKey =
482
+ resolveLocalCacheKey(providerOptions) ??
483
+ extractPromptCacheKey(providerOptions) ??
484
+ undefined;
485
+ const engineArgs = engineGenerateArgsFromParams(params, cacheKey);
486
+
487
+ // Prefer a runtime-registered loader that implements `generate` — that's
488
+ // the mobile / device-bridge path. On desktop we fall back to the
489
+ // standalone engine.
490
+ if (loader?.generate) {
491
+ return loader.generate(engineArgs);
492
+ }
493
+ if (!(await localInferenceEngine.available())) {
494
+ // No native binding: signal UNAVAILABLE (typed) so the cross-provider
495
+ // router skips local inference and falls back to a registered cloud/API
496
+ // provider, instead of hard-failing the whole turn.
497
+ throw new LocalInferenceUnavailableError(
498
+ slot,
499
+ "backend_unavailable",
500
+ `[local-inference] No llama.cpp binding available for ${slot} request`,
501
+ );
502
+ }
503
+ if (!localInferenceEngine.hasLoadedModel()) {
504
+ // No local model loaded: signal UNAVAILABLE (typed) so the router falls
505
+ // back to a registered cloud/API provider (e.g. Anthropic) when one
506
+ // exists, rather than hard-failing while a usable provider is present.
507
+ throw new LocalInferenceUnavailableError(
508
+ slot,
509
+ "backend_unavailable",
510
+ `[local-inference] No local model is active. Assign a model to ${slot} or activate one in Settings → Local models.`,
511
+ );
512
+ }
513
+
514
+ // Long-lived conversation? Open / reuse a registry handle so this
515
+ // turn lands on the same slot every time, regardless of prompt
516
+ // hash drift. The handle API additionally returns Anthropic-shape
517
+ // usage telemetry, which we surface at INFO once per generation.
518
+ if (conversationId) {
519
+ const modelId =
520
+ localInferenceEngine.currentModelPath() ?? "default-local-model";
521
+ const handle =
522
+ localInferenceEngine.conversation(conversationId, modelId) ??
523
+ localInferenceEngine.openConversation({
524
+ conversationId,
525
+ modelId,
526
+ });
527
+ const { cacheKey: _drop, ...convArgs } = engineArgs;
528
+ const result = await localInferenceEngine.generateInConversation(
529
+ handle,
530
+ convArgs,
531
+ );
532
+ // Per-generation usage log. Match the Anthropic plugin's
533
+ // observability surface so cloud and local share the same
534
+ // mental model. Cache hit rate is reported when input_tokens > 0.
535
+ const u = result.usage;
536
+ const hitRate =
537
+ u.cache_hit_rate !== undefined
538
+ ? `${Math.round(u.cache_hit_rate * 100)}%`
539
+ : "n/a";
540
+ const mtpRate =
541
+ typeof u.mtp_acceptance_rate === "number"
542
+ ? ` mtp=${Math.round(u.mtp_acceptance_rate * 100)}%`
543
+ : "";
544
+ logger.info(
545
+ `[local-inference] usage conv=${conversationId} slot=${result.slotId} in=${u.input_tokens} out=${u.output_tokens} cache_read=${u.cache_read_input_tokens} cache_create=${u.cache_creation_input_tokens} hit=${hitRate}${mtpRate}`,
546
+ );
547
+ // Auto-tune signal — emits a one-line warn if the high-water mark
548
+ // outgrew the configured slot count this turn. Cheap to call,
549
+ // and the warning is what the operator needs to see.
550
+ localInferenceEngine.warnIfParallelTooLow({ warn: logger.warn });
551
+ return result.text;
552
+ }
553
+
554
+ // No conversation context: fall through to the existing hash-based
555
+ // slot allocation. Doesn't break any caller that wasn't aware of
556
+ // conversation handles.
557
+ return localInferenceEngine.generate(engineArgs);
558
+ };
559
+ }
560
+
561
+ /**
562
+ * Normalize the runtime's TEXT_EMBEDDING input shape — `params` may be the
563
+ * structured `TextEmbeddingParams` (when called from a typed plugin), a
564
+ * raw string (when called from action runners), or `null` (an internal
565
+ * warmup probe used to size the shipped embedding vector).
566
+ */
567
+ function extractEmbeddingText(
568
+ params: TextEmbeddingParams | string | null,
569
+ ): string {
570
+ if (params === null) return "";
571
+ if (typeof params === "string") return params;
572
+ return params.text;
573
+ }
574
+
575
+ /**
576
+ * Build the TEXT_EMBEDDING handler. Mirrors `makeHandler` for generate:
577
+ * routes through the loader's `embed` if available, otherwise throws so
578
+ * the runtime falls back to a non-local provider rather than serving a
579
+ * silent zero-vector (Commandment 8: don't hide broken pipelines).
580
+ */
581
+ function makeEmbeddingHandler(): EmbeddingHandler {
582
+ return async (runtime, params) => {
583
+ const loader = getLoader(runtime);
584
+ if (!loader?.embed) {
585
+ throw new Error(
586
+ "[local-inference] Active loader does not implement embed; falling through to next provider",
587
+ );
588
+ }
589
+ // Embeddings in this runtime are not slot-aware — there's a single
590
+ // active model. Make sure the user's TEXT_EMBEDDING assignment, if
591
+ // any, is loaded before we hit the loader.
592
+ await ensureAssignedModelLoaded(loader, "TEXT_EMBEDDING");
593
+ const text = extractEmbeddingText(params);
594
+ const result = await loader.embed({ input: text });
595
+ return result.embedding;
596
+ };
597
+ }
598
+
599
+ interface DesktopEmbeddingConfig {
600
+ modelsDir: string;
601
+ model: string;
602
+ contextSize: number;
603
+ gpuLayers: number;
604
+ }
605
+
606
+ /**
607
+ * Resolve the desktop embedding model + load params from the same
608
+ * `LOCAL_EMBEDDING_*` env that `configureLocalEmbeddingPlugin` and the boot
609
+ * warmup set, falling back to the compact gte-small preset.
610
+ */
611
+ function resolveDesktopEmbeddingConfig(): DesktopEmbeddingConfig {
612
+ const preset = EMBEDDING_PRESETS.performance;
613
+ const modelsDir = process.env.MODELS_DIR?.trim() || DEFAULT_MODELS_DIR;
614
+ const model = process.env.LOCAL_EMBEDDING_MODEL?.trim() || preset.model;
615
+ const ctxEnv = Number(process.env.LOCAL_EMBEDDING_CONTEXT_SIZE);
616
+ const contextSize =
617
+ Number.isFinite(ctxEnv) && ctxEnv > 0 ? ctxEnv : preset.contextSize;
618
+ const gpuLayersEnv = process.env.LOCAL_EMBEDDING_GPU_LAYERS?.trim();
619
+ const gpuLayersNum = Number(gpuLayersEnv);
620
+ // "999 = all layers on GPU" per llama.cpp; the desktop adapter clamps to
621
+ // the model's metadata layer count, so "auto"/"max" map to 999.
622
+ const gpuLayers =
623
+ gpuLayersEnv === "auto" || gpuLayersEnv === "max"
624
+ ? 999
625
+ : Number.isFinite(gpuLayersNum)
626
+ ? gpuLayersNum
627
+ : 0;
628
+ return { modelsDir, model, contextSize, gpuLayers };
629
+ }
630
+
631
+ /**
632
+ * Resolve (or stage) the bundle root the fused `eliza_inference_embed` should
633
+ * anchor at for the dedicated embedding model. The fused C side embeds over the
634
+ * single GGUF under `<root>/text/`, so we must point it at an isolated bundle
635
+ * that contains ONLY the embedding model — never the chat bundle's text model
636
+ * (whose decoder-as-embedder output has a different dimension). Resolution:
637
+ * 1. `ELIZA_EMBED_BUNDLE_ROOT` — explicit override.
638
+ * 2. The model already lives under a `text/` dir (`<root>/text/<model>.gguf`).
639
+ * 3. `<modelsDir>/text/<model>` exists → anchor at `<modelsDir>`.
640
+ * 4. Otherwise STAGE the dedicated embedding GGUF as the sole entry under
641
+ * `<modelsDir>/.eliza-embed-bundle/text/` (hardlink, symlink fallback) so
642
+ * the fused lib loads gte-small (384-dim bi-encoder, SQL dim384) — the
643
+ * same model the retired libllama path used, now through the fused lib.
644
+ * Returns null only when the embedding GGUF is not present (boot warmup may
645
+ * still be downloading) — the handler then raises LocalInferenceUnavailable and
646
+ * the runtime falls through to the next embedding provider.
647
+ */
648
+ function resolveFusedEmbedBundleRoot(
649
+ cfg: DesktopEmbeddingConfig,
650
+ ): string | null {
651
+ const override = process.env.ELIZA_EMBED_BUNDLE_ROOT?.trim();
652
+ if (override && existsSync(path.join(override, "text"))) return override;
653
+ const modelPath = path.resolve(cfg.modelsDir, cfg.model);
654
+ const parent = path.dirname(modelPath);
655
+ if (path.basename(parent) === "text" && existsSync(modelPath)) {
656
+ return path.dirname(parent);
657
+ }
658
+ if (existsSync(path.join(cfg.modelsDir, "text", cfg.model))) {
659
+ return cfg.modelsDir;
660
+ }
661
+ if (!existsSync(modelPath)) return null;
662
+ const root = path.join(cfg.modelsDir, ".eliza-embed-bundle");
663
+ const textDir = path.join(root, "text");
664
+ const staged = path.join(textDir, path.basename(cfg.model));
665
+ try {
666
+ mkdirSync(textDir, { recursive: true });
667
+ if (!existsSync(staged)) {
668
+ try {
669
+ linkSync(modelPath, staged);
670
+ } catch {
671
+ symlinkSync(modelPath, staged);
672
+ }
673
+ }
674
+ return root;
675
+ } catch (err) {
676
+ logger.warn(
677
+ `[local-inference] could not stage the fused embed bundle for "${cfg.model}": ${String(err)}`,
678
+ );
679
+ return null;
680
+ }
681
+ }
682
+
683
+ /**
684
+ * Lazily-resolved fused embedding handle. When the fused `libelizainference`
685
+ * (ABI v9) is present, reports `embedSupported()`, and a `<root>/text/` bundle
686
+ * root resolves for the embedding model, the desktop TEXT_EMBEDDING handler
687
+ * computes embeddings through `eliza_inference_embed` over the fused handle's
688
+ * resident text vocab — retiring the node-llama-cpp / libllama embedding path.
689
+ * `null` once resolution fails (the handler then falls back).
690
+ */
691
+ let fusedEmbedHandlePromise: Promise<{
692
+ ffi: import("../services/voice/ffi-bindings").ElizaInferenceFfi;
693
+ ctx: import("../services/voice/ffi-bindings").ElizaInferenceContextHandle;
694
+ embed: NonNullable<
695
+ import("../services/voice/ffi-bindings").ElizaInferenceFfi["embed"]
696
+ >;
697
+ } | null> | null;
698
+
699
+ async function getFusedEmbeddingHandle(cfg: DesktopEmbeddingConfig): Promise<{
700
+ embed: (text: string) => Float32Array;
701
+ } | null> {
702
+ if (fusedEmbedHandlePromise === null) {
703
+ fusedEmbedHandlePromise = (async () => {
704
+ try {
705
+ require.resolve("bun:ffi");
706
+ } catch {
707
+ return null;
708
+ }
709
+ const { resolveFusedLibraryPath } = await import(
710
+ "../services/desktop-fused-ffi-backend-runtime"
711
+ );
712
+ const bundleRoot = resolveFusedEmbedBundleRoot(cfg);
713
+ if (!bundleRoot) return null;
714
+ const libPath = resolveFusedLibraryPath(bundleRoot);
715
+ if (!libPath) return null;
716
+ const { loadElizaInferenceFfi } = await import(
717
+ "../services/voice/ffi-bindings"
718
+ );
719
+ const ffi = loadElizaInferenceFfi(libPath);
720
+ if (
721
+ typeof ffi.embedSupported !== "function" ||
722
+ ffi.embedSupported() !== true ||
723
+ typeof ffi.embed !== "function"
724
+ ) {
725
+ ffi.close();
726
+ return null;
727
+ }
728
+ const ctx = ffi.create(bundleRoot);
729
+ logger.info(
730
+ `[local-inference] Desktop embeddings via fused libelizainference (eliza_inference_embed) anchored at ${bundleRoot} — node-llama-cpp embedding path retired`,
731
+ );
732
+ return { ffi, ctx, embed: ffi.embed };
733
+ })().catch(() => {
734
+ fusedEmbedHandlePromise = null;
735
+ return null;
736
+ });
737
+ }
738
+ const handle = await fusedEmbedHandlePromise;
739
+ if (!handle) return null;
740
+ // gte-small / BERT bi-encoders use MEAN pooling; a decoder-as-embedder
741
+ // (`--pooling last`) is selected via ELIZA_EMBED_POOLING=last.
742
+ const pooling =
743
+ process.env.ELIZA_EMBED_POOLING?.trim().toLowerCase() === "last" ? 3 : 1;
744
+ return {
745
+ embed: (text: string) => handle.embed({ ctx: handle.ctx, text, pooling }),
746
+ };
747
+ }
748
+
749
+ /**
750
+ * Desktop TEXT_EMBEDDING handler over the FUSED `libelizainference`
751
+ * (`eliza_inference_embed`, ABI v9). The dedicated embedding GGUF (gte-small,
752
+ * 384-dim — an exact match for plugin-sql's dim384 column) is staged as the
753
+ * sole entry of an isolated fused embed bundle (see `resolveFusedEmbedBundleRoot`)
754
+ * so the fused lib loads it directly. libllama is retired: there is no
755
+ * capacitor/libllama fallback. When the fused embed cannot resolve (no bun:ffi,
756
+ * no fused lib, or the embedding GGUF is still downloading) this throws so the
757
+ * runtime falls through to the operator-configured provider — never a silent
758
+ * zero-vector (Commandment 8).
759
+ */
760
+ function makeFusedEmbeddingHandler(): EmbeddingHandler {
761
+ return async (_runtime, params) => {
762
+ const text = extractEmbeddingText(params);
763
+ const cfg = resolveDesktopEmbeddingConfig();
764
+ const fused = await getFusedEmbeddingHandle(cfg);
765
+ if (!fused) {
766
+ throw new LocalInferenceUnavailableError(
767
+ ModelType.TEXT_EMBEDDING,
768
+ "backend_unavailable",
769
+ `[local-inference] TEXT_EMBEDDING unavailable: the fused libelizainference ` +
770
+ `embed path could not resolve for "${cfg.model}" (needs bun:ffi, the fused ` +
771
+ `lib, and the embedding GGUF present). libllama is retired — falling through ` +
772
+ `to the next embedding provider.`,
773
+ );
774
+ }
775
+ return Array.from(fused.embed(text));
776
+ };
777
+ }
778
+
779
+ function extractSpeechText(params: TextToSpeechParams | string): string {
780
+ if (typeof params === "string") return params;
781
+ if (params && typeof params.text === "string") return params.text;
782
+ throw new Error(
783
+ "[local-inference] TEXT_TO_SPEECH requires a string or { text } input",
784
+ );
785
+ }
786
+
787
+ function extractSpeechSignal(
788
+ params: TextToSpeechParams | string,
789
+ ): AbortSignal | undefined {
790
+ return typeof params === "object" && params !== null
791
+ ? params.signal
792
+ : undefined;
793
+ }
794
+
795
+ function makeTextToSpeechHandler(): TextToSpeechHandler {
796
+ return async (_runtime, params) => {
797
+ const text = extractSpeechText(params);
798
+ if (text.length === 0) {
799
+ throw new Error(
800
+ "[local-inference] TEXT_TO_SPEECH text must be non-empty",
801
+ );
802
+ }
803
+ // Do not filter singing, emotion tags, or lyrical phrasing here. The
804
+ // local voice bundle advertises its expressive capability in the
805
+ // manifest; runtime safety policy lives above this model adapter.
806
+ await localInferenceEngine.ensureActiveBundleVoiceReady();
807
+ return localInferenceEngine.synthesizeSpeech(
808
+ text,
809
+ extractSpeechSignal(params),
810
+ );
811
+ };
812
+ }
813
+
814
+ function toUint8Array(value: Uint8Array | ArrayBuffer | Buffer): Uint8Array {
815
+ if (value instanceof Uint8Array) {
816
+ return new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
817
+ }
818
+ return new Uint8Array(value);
819
+ }
820
+
821
+ function extractTranscriptionAudio(
822
+ params: TranscriptionParams | Buffer | string | LocalTranscriptionParams,
823
+ ): TranscriptionAudio {
824
+ if (typeof params === "string") {
825
+ throw new Error(
826
+ "[local-inference] TRANSCRIPTION via the local voice runtime requires PCM/WAV bytes; URL/path strings are not fetched by this provider",
827
+ );
828
+ }
829
+ if (params instanceof Uint8Array || params instanceof ArrayBuffer) {
830
+ return decodeMonoPcm16Wav(toUint8Array(params));
831
+ }
832
+ if (!params || typeof params !== "object") {
833
+ throw new Error(
834
+ "[local-inference] TRANSCRIPTION requires PCM/WAV bytes or { pcm, sampleRateHz }",
835
+ );
836
+ }
837
+ if ("audioUrl" in params && typeof params.audioUrl === "string") {
838
+ throw new Error(
839
+ "[local-inference] TRANSCRIPTION audioUrl is not fetched by the local voice runtime; pass mono PCM16 WAV bytes or { pcm, sampleRateHz }",
840
+ );
841
+ }
842
+ if ("pcm" in params && params.pcm instanceof Float32Array) {
843
+ const sampleRate =
844
+ ("sampleRateHz" in params ? params.sampleRateHz : undefined) ??
845
+ ("sampleRate" in params ? params.sampleRate : undefined);
846
+ if (typeof sampleRate !== "number" || sampleRate <= 0) {
847
+ throw new Error(
848
+ "[local-inference] TRANSCRIPTION { pcm } requires a positive sampleRateHz",
849
+ );
850
+ }
851
+ return { pcm: params.pcm, sampleRate };
852
+ }
853
+ if (
854
+ "audio" in params &&
855
+ (params.audio instanceof Uint8Array || params.audio instanceof ArrayBuffer)
856
+ ) {
857
+ return decodeMonoPcm16Wav(toUint8Array(params.audio));
858
+ }
859
+ throw new Error(
860
+ "[local-inference] TRANSCRIPTION requires mono PCM16 WAV bytes or { pcm, sampleRateHz } for the local voice runtime",
861
+ );
862
+ }
863
+
864
+ function extractTranscriptionSignal(
865
+ params: TranscriptionParams | Buffer | string | LocalTranscriptionParams,
866
+ ): AbortSignal | undefined {
867
+ return typeof params === "object" && params !== null
868
+ ? (params as { signal?: AbortSignal }).signal
869
+ : undefined;
870
+ }
871
+
872
+ function throwIfAborted(signal: AbortSignal | undefined): void {
873
+ if (!signal?.aborted) return;
874
+ throw signal.reason instanceof Error
875
+ ? signal.reason
876
+ : new DOMException("Aborted", "AbortError");
877
+ }
878
+
879
+ function makeTranscriptionHandler(): TranscriptionHandler {
880
+ return async (_runtime, params) => {
881
+ const signal = extractTranscriptionSignal(params);
882
+ throwIfAborted(signal);
883
+ const audio = extractTranscriptionAudio(params);
884
+ // The fused libelizainference ASR runtime is the sole on-device
885
+ // transcriber. A startup/availability failure propagates (AGENTS.md §3) —
886
+ // there is no whisper.cpp second attempt and no silent empty transcript.
887
+ await localInferenceEngine.ensureActiveBundleAsrReady();
888
+ throwIfAborted(signal);
889
+ // Stream partial transcripts through the same pipe as chat text when the
890
+ // runtime wired a chunk sink (useModel injects onStreamChunk into local
891
+ // model params inside a streaming reply turn). The fused streaming-ASR
892
+ // session surfaces each running partial; we forward the deltas. Read the
893
+ // sink structurally so this stays robust to the core param type surface.
894
+ const streamSink =
895
+ params && typeof params === "object"
896
+ ? (
897
+ params as {
898
+ onStreamChunk?: (chunk: string) => void | Promise<void>;
899
+ }
900
+ ).onStreamChunk
901
+ : undefined;
902
+ const onPartial =
903
+ typeof streamSink === "function"
904
+ ? (delta: string) => {
905
+ void streamSink(delta);
906
+ }
907
+ : undefined;
908
+ const transcript = await localInferenceEngine.transcribePcm(
909
+ audio,
910
+ signal,
911
+ onPartial,
912
+ );
913
+ throwIfAborted(signal);
914
+ return transcript;
915
+ };
916
+ }
917
+
918
+ function paramsToVisionRequest(params: ImageDescriptionParams | string): {
919
+ image: { kind: "dataUrl"; dataUrl: string } | { kind: "url"; url: string };
920
+ prompt?: string;
921
+ signal?: AbortSignal;
922
+ onTextChunk?: (chunk: string) => void | Promise<void>;
923
+ } {
924
+ const url = typeof params === "string" ? params : params.imageUrl;
925
+ if (typeof url !== "string" || url.length === 0) {
926
+ throw new Error(
927
+ "[local-inference] IMAGE_DESCRIPTION requires a non-empty imageUrl",
928
+ );
929
+ }
930
+ const prompt = typeof params === "object" ? params.prompt : undefined;
931
+ const signal =
932
+ typeof params === "object"
933
+ ? (params as { signal?: AbortSignal }).signal
934
+ : undefined;
935
+ const wantsStream =
936
+ typeof params === "object" &&
937
+ (params as { stream?: boolean }).stream === true;
938
+ const streamSink =
939
+ wantsStream && typeof params === "object"
940
+ ? (params as { onStreamChunk?: (chunk: string) => void | Promise<void> })
941
+ .onStreamChunk
942
+ : undefined;
943
+ const onTextChunk =
944
+ typeof streamSink === "function"
945
+ ? (chunk: string) => streamSink(chunk)
946
+ : undefined;
947
+ if (url.startsWith("data:")) {
948
+ return {
949
+ image: { kind: "dataUrl", dataUrl: url },
950
+ prompt,
951
+ ...(signal ? { signal } : {}),
952
+ ...(onTextChunk ? { onTextChunk } : {}),
953
+ };
954
+ }
955
+ return {
956
+ image: { kind: "url", url },
957
+ prompt,
958
+ ...(signal ? { signal } : {}),
959
+ ...(onTextChunk ? { onTextChunk } : {}),
960
+ };
961
+ }
962
+
963
+ function normalizeImageDescription(
964
+ result: ImageDescriptionResult | string,
965
+ ): ImageDescriptionResult {
966
+ if (typeof result === "string") {
967
+ const description = result.trim();
968
+ if (!description) {
969
+ throw new Error(
970
+ "[local-inference] IMAGE_DESCRIPTION backend returned an empty description",
971
+ );
972
+ }
973
+ return {
974
+ title: description.split(/[.!?]/, 1)[0]?.trim() || "Image",
975
+ description,
976
+ };
977
+ }
978
+ if (
979
+ result &&
980
+ typeof result === "object" &&
981
+ typeof result.title === "string" &&
982
+ typeof result.description === "string" &&
983
+ result.title.trim().length > 0 &&
984
+ result.description.trim().length > 0
985
+ ) {
986
+ return {
987
+ title: result.title.trim(),
988
+ description: result.description.trim(),
989
+ };
990
+ }
991
+ throw new Error(
992
+ "[local-inference] IMAGE_DESCRIPTION backend returned an invalid description",
993
+ );
994
+ }
995
+
996
+ /**
997
+ * Runtime setting marker that plugin-vision polls before preferring the
998
+ * Eliza-1 vision path over its legacy Florence path. We set it only when
999
+ * the process-wide arbiter advertises the `vision-describe` capability.
1000
+ */
1001
+ const ELIZA1_VISION_MARKER = "ELIZA1_VISION_HANDLER_PRESENT";
1002
+
1003
+ function markEliza1VisionHandlerPresent(runtime: IAgentRuntime): void {
1004
+ const r = runtime as IAgentRuntime & {
1005
+ setSetting?: (key: string, value: unknown) => void;
1006
+ getSetting?: (key: string) => unknown;
1007
+ };
1008
+ if (typeof r.setSetting !== "function") return;
1009
+ if (typeof r.getSetting === "function") {
1010
+ const existing = r.getSetting(ELIZA1_VISION_MARKER);
1011
+ if (existing === "1" || existing === true) return;
1012
+ }
1013
+ try {
1014
+ r.setSetting(ELIZA1_VISION_MARKER, "1");
1015
+ } catch {
1016
+ // Some test runtimes don't accept setSetting at runtime — non-fatal.
1017
+ }
1018
+ }
1019
+
1020
+ function makeImageDescriptionHandler(): ImageDescriptionHandler {
1021
+ return async (runtime, params) => {
1022
+ const arbiter = tryGetMemoryArbiter();
1023
+ if (
1024
+ !arbiter?.hasCapability("vision-describe") ||
1025
+ typeof arbiter.requestVisionDescribe !== "function"
1026
+ ) {
1027
+ throw new Error(
1028
+ "[local-inference] IMAGE_DESCRIPTION requires an active Eliza-1 vision-capable bundle with the vision-describe capability registered",
1029
+ );
1030
+ }
1031
+ markEliza1VisionHandlerPresent(runtime);
1032
+ const modelKeyCandidate =
1033
+ typeof params === "object"
1034
+ ? (params as ImageDescriptionParams & { modelKey?: unknown }).modelKey
1035
+ : undefined;
1036
+ const modelKey =
1037
+ typeof modelKeyCandidate === "string" && modelKeyCandidate
1038
+ ? modelKeyCandidate
1039
+ : "gemma-vl";
1040
+ const request = paramsToVisionRequest(params);
1041
+ const result = await arbiter.requestVisionDescribe<
1042
+ typeof request,
1043
+ ImageDescriptionResult | string
1044
+ >({ modelKey, payload: request });
1045
+ return normalizeImageDescription(result);
1046
+ };
1047
+ }
1048
+
1049
+ // ── Bionic-host TRANSCRIPTION / IMAGE_DESCRIPTION (Android GPU delegation) ──
1050
+ //
1051
+ // On the bionic-delegated path the musl agent can't load the fused
1052
+ // libelizainference, so the engine-driven transcriber / memory-arbiter vision
1053
+ // paths above can't run here. Instead the audio / image bytes are forwarded to
1054
+ // the in-process bionic host over the UDS (op="asr" / op="image"), which runs
1055
+ // the fused Gemma ASR + mmproj vision path on the Mali GPU and returns text.
1056
+ // This is the same delegation `BionicHostLoader` already does for text
1057
+ // generation.
1058
+
1059
+ /** The bionic-host loader when registered (exposes transcribe + describeImage). */
1060
+ function getBionicHostLoader(runtime: IAgentRuntime): BionicHostLoader | null {
1061
+ const svc = (
1062
+ runtime as { getService?: (name: string) => unknown }
1063
+ ).getService?.("localInferenceLoader");
1064
+ if (
1065
+ svc &&
1066
+ typeof (svc as BionicHostLoader).transcribe === "function" &&
1067
+ typeof (svc as BionicHostLoader).describeImage === "function"
1068
+ ) {
1069
+ return svc as BionicHostLoader;
1070
+ }
1071
+ return null;
1072
+ }
1073
+
1074
+ /** Pack a mono fp32 PCM buffer little-endian and base64-encode it for the UDS frame. */
1075
+ export function float32ToBase64LE(pcm: Float32Array): string {
1076
+ const buf = Buffer.allocUnsafe(pcm.length * 4);
1077
+ for (let i = 0; i < pcm.length; i++) {
1078
+ buf.writeFloatLE(pcm[i] ?? 0, i * 4);
1079
+ }
1080
+ return buf.toString("base64");
1081
+ }
1082
+
1083
+ /** Resolve a vision request to base64 image bytes for the bionic host. */
1084
+ export async function imageRequestToBase64(image: {
1085
+ kind: "dataUrl" | "url";
1086
+ dataUrl?: string;
1087
+ url?: string;
1088
+ }): Promise<string> {
1089
+ if (image.kind === "dataUrl" && image.dataUrl) {
1090
+ const comma = image.dataUrl.indexOf(",");
1091
+ return comma >= 0 ? image.dataUrl.slice(comma + 1) : image.dataUrl;
1092
+ }
1093
+ if (image.kind === "url" && image.url) {
1094
+ const resp = await fetch(image.url);
1095
+ if (!resp.ok) {
1096
+ throw new Error(
1097
+ `[local-inference] IMAGE_DESCRIPTION failed to fetch ${image.url}: ${resp.status}`,
1098
+ );
1099
+ }
1100
+ return Buffer.from(await resp.arrayBuffer()).toString("base64");
1101
+ }
1102
+ throw new Error(
1103
+ "[local-inference] IMAGE_DESCRIPTION could not resolve image bytes",
1104
+ );
1105
+ }
1106
+
1107
+ function makeBionicTranscriptionHandler(): TranscriptionHandler {
1108
+ return async (runtime, params) => {
1109
+ const signal = extractTranscriptionSignal(params);
1110
+ throwIfAborted(signal);
1111
+ const loader = getBionicHostLoader(runtime);
1112
+ if (!loader) {
1113
+ throw new Error(
1114
+ "[local-inference] bionic-host TRANSCRIPTION requires the bionic-host loader (localInferenceLoader service)",
1115
+ );
1116
+ }
1117
+ const audio = extractTranscriptionAudio(params);
1118
+ throwIfAborted(signal);
1119
+ const transcript = await loader.transcribe({
1120
+ pcmBase64: float32ToBase64LE(audio.pcm),
1121
+ sampleRate: audio.sampleRate,
1122
+ });
1123
+ throwIfAborted(signal);
1124
+ return transcript;
1125
+ };
1126
+ }
1127
+
1128
+ function makeBionicImageDescriptionHandler(): ImageDescriptionHandler {
1129
+ return async (runtime, params) => {
1130
+ const loader = getBionicHostLoader(runtime);
1131
+ if (!loader) {
1132
+ throw new Error(
1133
+ "[local-inference] bionic-host IMAGE_DESCRIPTION requires the bionic-host loader (localInferenceLoader service)",
1134
+ );
1135
+ }
1136
+ const request = paramsToVisionRequest(params);
1137
+ const description = await loader.describeImage({
1138
+ imageBase64: await imageRequestToBase64(request.image),
1139
+ prompt: request.prompt,
1140
+ });
1141
+ return normalizeImageDescription(description);
1142
+ };
1143
+ }
1144
+
1145
+ /**
1146
+ * Register the device-bridge loader on the runtime. Accepts load/generate
1147
+ * calls whether or not a mobile device is currently connected — parked
1148
+ * calls resolve on reconnect (up to a timeout). Cheaper than waiting for
1149
+ * the first device register to register the service: ordering is already
1150
+ * handled inside `DeviceBridge.generate`.
1151
+ */
1152
+ function registerDeviceBridgeLoader(runtime: AgentRuntime): void {
1153
+ const withRegistration = runtime as AgentRuntime & {
1154
+ registerService?: (name: string, impl: unknown) => unknown;
1155
+ };
1156
+ if (typeof withRegistration.registerService !== "function") return;
1157
+ const loader: LocalInferenceLoader = {
1158
+ loadModel: (args) => deviceBridge.loadModel(args),
1159
+ unloadModel: () => deviceBridge.unloadModel(),
1160
+ currentModelPath: () => deviceBridge.currentModelPath(),
1161
+ generate: (args) => deviceBridge.generate(args),
1162
+ embed: (args) => deviceBridge.embed(args),
1163
+ };
1164
+ // Expose the process-wide MemoryArbiter through the registered
1165
+ // `localInferenceLoader` service so provider.ts can route
1166
+ // IMAGE_DESCRIPTION (WS2) and IMAGE (WS3) requests to the arbiter.
1167
+ // Without this accessor the IMAGE handler unconditionally surfaces
1168
+ // `capability_unavailable` because the registered service has no
1169
+ // arbiter accessor — the singleton `localInferenceService` is not
1170
+ // the same object that gets registered with the runtime.
1171
+ const loaderWithArbiter = Object.assign(loader, {
1172
+ getMemoryArbiter: () => tryGetMemoryArbiter(),
1173
+ });
1174
+ withRegistration.registerService("localInferenceLoader", loaderWithArbiter);
1175
+ }
1176
+
1177
+ /**
1178
+ * AOSP / generic-FFI path: load the fused `libelizainference.so` into the bun
1179
+ * process via `bun:ffi` (the AOSP plugin's loader; libllama is retired). The
1180
+ * loader stays inactive at runtime when neither `ELIZA_LOCAL_LLAMA === "1"`
1181
+ * (kept as the legacy opt-in env name) nor `process.arch === "riscv64"` is
1182
+ * true (see `isAospEnabled` in `@elizaos/plugin-aosp-local-inference`), so the
1183
+ * dynamic import below is safe on every platform; we only attempt registration
1184
+ * when one of the triggers fires.
1185
+ *
1186
+ * riscv64 rationale: `capacitor-llama` ships prebuilts only for
1187
+ * linux-{x64,arm64}, darwin-arm64, win-x64. Riscv64 hosts have no native NAPI
1188
+ * binding option; the cross-built fused `libelizainference.so` is the only
1189
+ * in-process llama.cpp path. The FFI loader satisfies the same
1190
+ * `localInferenceLoader` service contract, so the rest of the engine —
1191
+ * model handlers, embedding routing, response handler — works unchanged.
1192
+ *
1193
+ * The `try`/`catch` is justified because the AOSP build can ship the .so on
1194
+ * one ABI but be invoked on another (e.g. cuttlefish_x86_64 reporting both
1195
+ * x86_64 and arm64-v8a). When `ELIZA_LOCAL_LLAMA=1` is set but registration
1196
+ * fails, the loader logs at `error` level — we must NOT silently fall
1197
+ * through to the device-bridge or stock engine: the operator opted in and
1198
+ * deserves the failure surfaced clearly. The riscv64 auto-trigger uses the
1199
+ * same path; if the bundled `libelizainference.so` is missing the failure is
1200
+ * logged but inference falls through to Cloud routing (per CLAUDE.md deployment
1201
+ * topologies — local-only is supported but Cloud is an acceptable fallback
1202
+ * when the on-device backend is unavailable).
1203
+ */
1204
+ export function shouldAttemptAospLlamaLoader(
1205
+ env: NodeJS.ProcessEnv = process.env,
1206
+ arch: NodeJS.Architecture = process.arch,
1207
+ ): boolean {
1208
+ if (env.ELIZA_DISABLE_FFI_LLAMA?.trim() === "1") return false;
1209
+ if (env.ELIZA_LOCAL_LLAMA?.trim() === "1") return true;
1210
+ if (arch === "riscv64") return true;
1211
+ return false;
1212
+ }
1213
+
1214
+ /**
1215
+ * Bionic-host delegation gate. On Android the app shell sets
1216
+ * `ELIZA_BIONIC_HOST_DELEGATED=1` + `ELIZA_BIONIC_INFERENCE_SOCK=<name>` when a
1217
+ * dynamic-Vulkan `libelizainference.so` is staged — meaning the GPU is reachable
1218
+ * only from the bionic app process, never this musl agent. When set, the agent
1219
+ * delegates inference to that in-process host over the abstract UDS instead of
1220
+ * dlopen'ing the native lib itself (which would hit the Vulkan/HIDL wall).
1221
+ */
1222
+ export function bionicInferenceSocketName(
1223
+ env: NodeJS.ProcessEnv = process.env,
1224
+ ): string | null {
1225
+ if (env.ELIZA_BIONIC_HOST_DELEGATED?.trim() !== "1") return null;
1226
+ const sock = env.ELIZA_BIONIC_INFERENCE_SOCK?.trim();
1227
+ return sock ? sock : null;
1228
+ }
1229
+
1230
+ /**
1231
+ * Register the bionic-host loader when delegation is enabled. Wins over the
1232
+ * AOSP / Capacitor / device-bridge loaders: the whole point is that the GPU is
1233
+ * out of reach for the in-process FFI path on this (musl) process.
1234
+ */
1235
+ function tryRegisterBionicHostLoader(runtime: AgentRuntime): boolean {
1236
+ const socketName = bionicInferenceSocketName();
1237
+ if (!socketName) return false;
1238
+ const withRegistration = runtime as AgentRuntime & {
1239
+ registerService?: (name: string, impl: unknown) => unknown;
1240
+ };
1241
+ if (typeof withRegistration.registerService !== "function") return false;
1242
+ const loader: LocalInferenceLoader = new BionicHostLoader(socketName);
1243
+ const loaderWithArbiter = Object.assign(loader, {
1244
+ getMemoryArbiter: () => tryGetMemoryArbiter(),
1245
+ });
1246
+ withRegistration.registerService("localInferenceLoader", loaderWithArbiter);
1247
+ logger.info(
1248
+ `[local-inference] Registered bionic-host loader; text generation delegates to the in-process GPU host over UDS "${socketName}"`,
1249
+ );
1250
+ return true;
1251
+ }
1252
+
1253
+ async function tryRegisterAospLlamaLoader(
1254
+ runtime: AgentRuntime,
1255
+ ): Promise<boolean> {
1256
+ if (!shouldAttemptAospLlamaLoader()) return false;
1257
+ try {
1258
+ const dynamicImport = new Function("id", "return import(id)") as (
1259
+ id: string,
1260
+ ) => Promise<{
1261
+ registerAospLlamaLoader?: (r: AgentRuntime) => Promise<boolean> | boolean;
1262
+ }>;
1263
+ const mod = await dynamicImport("@elizaos/plugin-aosp-local-inference");
1264
+ if (typeof mod.registerAospLlamaLoader !== "function") {
1265
+ logger.error(
1266
+ "[local-inference] AOSP llama adapter import resolved but missing registerAospLlamaLoader export",
1267
+ );
1268
+ return false;
1269
+ }
1270
+ const result = await mod.registerAospLlamaLoader(runtime);
1271
+ return Boolean(result);
1272
+ } catch (err) {
1273
+ logger.error(
1274
+ "[local-inference] AOSP llama adapter unavailable while ELIZA_LOCAL_LLAMA=1:",
1275
+ err instanceof Error ? err.message : String(err),
1276
+ );
1277
+ return false;
1278
+ }
1279
+ }
1280
+
1281
+ async function tryRegisterCapacitorLoader(
1282
+ runtime: AgentRuntime,
1283
+ ): Promise<boolean> {
1284
+ // Only meaningful under Capacitor (iOS/Android). Dynamic import so web /
1285
+ // desktop bundlers don't choke on the native plugin metadata.
1286
+ const cap = (globalThis as Record<string, unknown>).Capacitor as
1287
+ | { isNativePlatform?: () => boolean }
1288
+ | undefined;
1289
+ if (!cap?.isNativePlatform?.()) return false;
1290
+ try {
1291
+ const { registerCapacitorLlamaLoader } = await import(
1292
+ "@elizaos/capacitor-llama"
1293
+ );
1294
+ const capacitorRuntime: Parameters<typeof registerCapacitorLlamaLoader>[0] =
1295
+ Object.create(runtime);
1296
+ registerCapacitorLlamaLoader(capacitorRuntime);
1297
+ logger.info(
1298
+ "[local-inference] Registered capacitor-llama loader for mobile on-device inference",
1299
+ );
1300
+ return true;
1301
+ } catch (err) {
1302
+ logger.debug(
1303
+ "[local-inference] capacitor-llama not available:",
1304
+ err instanceof Error ? err.message : String(err),
1305
+ );
1306
+ }
1307
+ return false;
1308
+ }
1309
+
1310
+ /**
1311
+ * Synthetic conversation id used to keep the Stage-1 stable prefix
1312
+ * (system prompt + tool/action schema block + stable provider blocks)
1313
+ * resident on a deterministic slot before any real conversation lands.
1314
+ * `deriveSlotId("conv:__system_prefix__", parallel)` is stable, so this
1315
+ * always warms the same slot; per-room conversations get their own slot
1316
+ * via `conv:<roomId>` and inherit the radix-shared prefix tokens.
1317
+ */
1318
+ const SYSTEM_PREFIX_CONVERSATION_ID = "__system_prefix__";
1319
+
1320
+ /**
1321
+ * Render the Stage-1 stable prefix for `roomId` and KV-prefill the
1322
+ * local-inference slot that conversation pins to. Wire this from the
1323
+ * voice turn controller (W9) on `speech-start` / voice-session-open so
1324
+ * the response-handler prompt is hot before STT finishes — items I1/C1.
1325
+ *
1326
+ * Best-effort end to end: returns false (no throw) when there's no
1327
+ * loaded local model, the active backend can't pre-warm (node-llama-cpp
1328
+ * pins by cache key already), or rendering/pre-warm fails. A miss just
1329
+ * means the real request cold-prefills.
1330
+ */
1331
+ export async function prewarmResponseHandler(
1332
+ runtime: IAgentRuntime,
1333
+ roomId: UUID,
1334
+ ): Promise<boolean> {
1335
+ if (!localInferenceEngine.hasLoadedModel()) return false;
1336
+ if (localInferenceEngine.activeBackendId() !== "llama-cpp") return false;
1337
+ try {
1338
+ const prefix = await renderMessageHandlerStablePrefix(runtime, roomId);
1339
+ if (!prefix) return false;
1340
+ return await localInferenceEngine.prewarmConversation(
1341
+ String(roomId),
1342
+ prefix,
1343
+ );
1344
+ } catch (err) {
1345
+ logger.debug(
1346
+ "[local-inference] prewarmResponseHandler failed (best-effort):",
1347
+ err instanceof Error ? err.message : String(err),
1348
+ );
1349
+ return false;
1350
+ }
1351
+ }
1352
+
1353
+ /**
1354
+ * Warm the Stage-1 stable prefix onto the deterministic
1355
+ * `conv:__system_prefix__` slot at model-load / boot time, before any
1356
+ * user message — item I3 (warm-on-load). The room id is irrelevant for
1357
+ * the stable prefix (it carries no per-room state), so a fixed synthetic
1358
+ * id is fine. No-op when no local model is loaded or the backend can't
1359
+ * pre-warm. Best-effort: failures are logged at debug and swallowed.
1360
+ */
1361
+ export async function prewarmSystemPrefix(
1362
+ runtime: IAgentRuntime,
1363
+ ): Promise<boolean> {
1364
+ if (!localInferenceEngine.hasLoadedModel()) return false;
1365
+ if (localInferenceEngine.activeBackendId() !== "llama-cpp") return false;
1366
+ try {
1367
+ const fixedRoomId = runtime.agentId as UUID;
1368
+ const prefix = await renderMessageHandlerStablePrefix(runtime, fixedRoomId);
1369
+ if (!prefix) return false;
1370
+ return await localInferenceEngine.prewarmConversation(
1371
+ SYSTEM_PREFIX_CONVERSATION_ID,
1372
+ prefix,
1373
+ );
1374
+ } catch (err) {
1375
+ logger.debug(
1376
+ "[local-inference] prewarmSystemPrefix failed (best-effort):",
1377
+ err instanceof Error ? err.message : String(err),
1378
+ );
1379
+ return false;
1380
+ }
1381
+ }
1382
+
1383
+ export async function ensureLocalInferenceHandler(
1384
+ runtime: AgentRuntime,
1385
+ ): Promise<void> {
1386
+ const runtimeMode = getRuntimeMode(runtime);
1387
+ if (!shouldRegisterLocalInferenceHandlers(runtimeMode)) {
1388
+ logger.info(
1389
+ `[local-inference] Runtime mode is ${runtimeMode}; skipping local model handler registration`,
1390
+ );
1391
+ return;
1392
+ }
1393
+
1394
+ const runtimeWithRegistration = runtime as RuntimeWithLocalInferenceFlag;
1395
+ if (
1396
+ typeof runtimeWithRegistration.getModel !== "function" ||
1397
+ typeof runtimeWithRegistration.registerModel !== "function"
1398
+ ) {
1399
+ return;
1400
+ }
1401
+ if (runtimeWithRegistration[LOCAL_INFERENCE_HANDLER_INSTALLED]) {
1402
+ logger.debug(
1403
+ "[local-inference] Local model handlers already registered on this runtime; skipping duplicate registration",
1404
+ );
1405
+ return;
1406
+ }
1407
+
1408
+ // Install the side-registry interception as early as possible so it
1409
+ // captures every subsequent `registerModel` call — including our own
1410
+ // handlers below, plus anything else that registers during the rest of
1411
+ // boot. Idempotent per-runtime.
1412
+ handlerRegistry.installOn(runtime);
1413
+
1414
+ // Loader precedence:
1415
+ // 1. AOSP native FFI loader when running inside the AOSP agent process
1416
+ // itself (ELIZA_LOCAL_LLAMA=1). This is the canonical AOSP path —
1417
+ // libllama.so is dlopen'd directly, no IPC.
1418
+ // 2. Capacitor native adapter when running on a mobile device with the
1419
+ // Capacitor APK shell.
1420
+ // 3. Device-bridge (WebSocket to a paired phone) when explicitly
1421
+ // opted in via ELIZA_DEVICE_BRIDGE_ENABLED=1.
1422
+ // 4. Standalone node-llama-cpp engine for desktop / server.
1423
+ //
1424
+ // All four satisfy the same `localInferenceLoader` service contract.
1425
+ // A later registration overrides an earlier one, so we register in
1426
+ // LOWEST-priority order first; the AOSP loader runs last so it wins on
1427
+ // AOSP builds. Each `try*Loader` is idempotent and gated on its own env
1428
+ // signal, so they're safe to chain.
1429
+ // Bionic-host delegation wins over every other loader: when set, the GPU is
1430
+ // only reachable from the in-process app host, so the musl agent must NOT try
1431
+ // the in-process FFI / device-bridge paths (the app shell already suppressed
1432
+ // ELIZA_LOCAL_LLAMA in this case).
1433
+ const bionicHostRegistered = tryRegisterBionicHostLoader(runtime);
1434
+ const aospRegistered =
1435
+ !bionicHostRegistered && (await tryRegisterAospLlamaLoader(runtime));
1436
+ const capacitorRegistered =
1437
+ !bionicHostRegistered &&
1438
+ !aospRegistered &&
1439
+ (await tryRegisterCapacitorLoader(runtime));
1440
+ const deviceBridgeEnabled =
1441
+ !bionicHostRegistered &&
1442
+ process.env.ELIZA_DEVICE_BRIDGE_ENABLED?.trim() === "1";
1443
+ if (!aospRegistered && !capacitorRegistered && deviceBridgeEnabled) {
1444
+ registerDeviceBridgeLoader(runtime);
1445
+ logger.info(
1446
+ "[local-inference] Registered device-bridge loader; inference routes to paired mobile device when connected",
1447
+ );
1448
+ }
1449
+
1450
+ // Pre-flight: if no backend is available, skip handler registration
1451
+ // entirely so we don't advertise a handler that will throw. The device
1452
+ // bridge is always "available" in the sense that it parks calls until a
1453
+ // device connects, so if it is enabled we always register handlers.
1454
+ if (
1455
+ !bionicHostRegistered &&
1456
+ !aospRegistered &&
1457
+ !capacitorRegistered &&
1458
+ !deviceBridgeEnabled &&
1459
+ !(await localInferenceEngine.available())
1460
+ ) {
1461
+ logger.debug(
1462
+ "[local-inference] No local inference backend available; skipping model registration",
1463
+ );
1464
+ return;
1465
+ }
1466
+
1467
+ // First-light convenience: when exactly one model is installed and no
1468
+ // slot assignments exist, auto-fill TEXT_SMALL/TEXT_LARGE so the user
1469
+ // lands in chat without opening Settings. The downloader handles the
1470
+ // post-install case; this catches the user who pre-staged a model
1471
+ // (external scan, prior install) and is now booting fresh.
1472
+ try {
1473
+ const installed = await listInstalledModels();
1474
+ const filled = await autoAssignAtBoot(installed);
1475
+ if (filled) {
1476
+ logger.info(
1477
+ `[local-inference] Auto-assigned single installed model to empty slots: ${JSON.stringify(filled)}`,
1478
+ );
1479
+ }
1480
+ } catch (err) {
1481
+ logger.warn(
1482
+ "[local-inference] autoAssignAtBoot failed:",
1483
+ err instanceof Error ? err.message : String(err),
1484
+ );
1485
+ }
1486
+
1487
+ const provider = aospRegistered
1488
+ ? AOSP_LLAMA_PROVIDER
1489
+ : capacitorRegistered
1490
+ ? CAPACITOR_LLAMA_PROVIDER
1491
+ : deviceBridgeEnabled
1492
+ ? DEVICE_BRIDGE_PROVIDER
1493
+ : LOCAL_INFERENCE_PROVIDER;
1494
+
1495
+ const textGenerationSlots: Array<
1496
+ [(typeof ModelType)[keyof typeof ModelType], AgentModelSlot]
1497
+ > = [
1498
+ [ModelType.TEXT_SMALL, "TEXT_SMALL"],
1499
+ [ModelType.TEXT_LARGE, "TEXT_LARGE"],
1500
+ // V5 chat calls semantic text model types directly. Register them as
1501
+ // first-class local handlers so structured streaming sees the concrete
1502
+ // local provider instead of falling through TEXT_SMALL via the router.
1503
+ [ModelType.RESPONSE_HANDLER, "TEXT_SMALL"],
1504
+ [ModelType.ACTION_PLANNER, "TEXT_SMALL"],
1505
+ [ModelType.TEXT_COMPLETION, "TEXT_SMALL"],
1506
+ ];
1507
+ for (const [modelType, slot] of textGenerationSlots) {
1508
+ try {
1509
+ runtimeWithRegistration.registerModel(
1510
+ modelType,
1511
+ makeHandler(slot),
1512
+ provider,
1513
+ LOCAL_INFERENCE_PRIORITY,
1514
+ );
1515
+ } catch (err) {
1516
+ logger.warn(
1517
+ "[local-inference] Could not register ModelType",
1518
+ modelType,
1519
+ err instanceof Error ? err.message : String(err),
1520
+ );
1521
+ }
1522
+ }
1523
+
1524
+ // Register TEXT_EMBEDDING separately — the runtime contract returns
1525
+ // `number[]` instead of `string`, so it can't share `makeHandler`.
1526
+ // - AOSP / device-bridge loaders expose `embed()` on the
1527
+ // `localInferenceLoader` service → route through that.
1528
+ // - Desktop has no `localInferenceLoader`; it serves embeddings through
1529
+ // the fused `libelizainference` (`eliza_inference_embed`) over the
1530
+ // dedicated gte-small GGUF staged as an isolated embed bundle. libllama
1531
+ // is retired — there is no capacitor/libllama embedding fallback.
1532
+ // Neither path registers a handler that would serve a silent zero-vector:
1533
+ // both throw when there's nothing real to call, so the runtime falls
1534
+ // through to the operator-configured provider (Commandment 8).
1535
+ const loaderForEmbed = (
1536
+ runtime as { getService?: (name: string) => unknown }
1537
+ ).getService?.("localInferenceLoader") as
1538
+ | { embed?: unknown }
1539
+ | null
1540
+ | undefined;
1541
+ const embeddingHandler = isLocalEmbeddingDisabledByEnv()
1542
+ ? null
1543
+ : loaderForEmbed && typeof loaderForEmbed.embed === "function"
1544
+ ? makeEmbeddingHandler()
1545
+ : provider === LOCAL_INFERENCE_PROVIDER
1546
+ ? makeFusedEmbeddingHandler()
1547
+ : null;
1548
+ if (embeddingHandler) {
1549
+ try {
1550
+ runtimeWithRegistration.registerModel(
1551
+ ModelType.TEXT_EMBEDDING,
1552
+ embeddingHandler,
1553
+ provider,
1554
+ LOCAL_INFERENCE_PRIORITY,
1555
+ );
1556
+ logger.info(
1557
+ `[local-inference] Registered ${provider} embedding handler for TEXT_EMBEDDING at priority ${LOCAL_INFERENCE_PRIORITY}`,
1558
+ );
1559
+ } catch (err) {
1560
+ logger.warn(
1561
+ "[local-inference] Could not register TEXT_EMBEDDING handler",
1562
+ err instanceof Error ? err.message : String(err),
1563
+ );
1564
+ }
1565
+ } else if (isLocalEmbeddingDisabledByEnv()) {
1566
+ logger.info(
1567
+ "[local-inference] Local TEXT_EMBEDDING handler disabled by ELIZA_DISABLE_LOCAL_EMBEDDINGS",
1568
+ );
1569
+ }
1570
+
1571
+ try {
1572
+ runtimeWithRegistration.registerModel(
1573
+ ModelType.TEXT_TO_SPEECH,
1574
+ makeTextToSpeechHandler(),
1575
+ provider,
1576
+ LOCAL_INFERENCE_PRIORITY,
1577
+ );
1578
+ // TRANSCRIPTION is registered default-on at the local-inference floor
1579
+ // priority (0). It is the last-resort handler: any cloud / other-plugin
1580
+ // TRANSCRIPTION handler registers above 0 and wins. When the handler
1581
+ // does run, it drives the fused libelizainference ASR runtime — the sole
1582
+ // on-device transcriber (Gemma ASR streaming → fused batch interim →
1583
+ // AsrUnavailableError) via the engine's armed voice bridge — see
1584
+ // makeTranscriptionHandler / EngineVoiceBridge.createStreamingTranscriber.
1585
+ // (The old ELIZA_LOCAL_TRANSCRIPTION env gate is removed — voice is a
1586
+ // first-class Eliza-1 surface, not opt-in.)
1587
+ // On the bionic-delegated path the fused lib lives in the app process, not
1588
+ // this musl agent — so transcription + vision must forward audio/image
1589
+ // bytes to the bionic host (op="asr" / op="image") rather than the
1590
+ // in-process engine / memory-arbiter, which can't load the lib here.
1591
+ runtimeWithRegistration.registerModel(
1592
+ ModelType.TRANSCRIPTION,
1593
+ bionicHostRegistered
1594
+ ? makeBionicTranscriptionHandler()
1595
+ : makeTranscriptionHandler(),
1596
+ provider,
1597
+ LOCAL_INFERENCE_PRIORITY,
1598
+ );
1599
+ runtimeWithRegistration.registerModel(
1600
+ ModelType.IMAGE_DESCRIPTION,
1601
+ bionicHostRegistered
1602
+ ? makeBionicImageDescriptionHandler()
1603
+ : makeImageDescriptionHandler(),
1604
+ provider,
1605
+ LOCAL_INFERENCE_PRIORITY,
1606
+ );
1607
+ logger.info(
1608
+ `[local-inference] Registered ${provider} voice and vision handlers for TEXT_TO_SPEECH / TRANSCRIPTION / IMAGE_DESCRIPTION at priority ${LOCAL_INFERENCE_PRIORITY}${bionicHostRegistered ? " (bionic-host delegated)" : ""}`,
1609
+ );
1610
+ } catch (err) {
1611
+ logger.warn(
1612
+ "[local-inference] Could not register local voice/vision handlers",
1613
+ err instanceof Error ? err.message : String(err),
1614
+ );
1615
+ }
1616
+
1617
+ logger.info(
1618
+ `[local-inference] Registered ${provider} llama.cpp text handlers at priority ${LOCAL_INFERENCE_PRIORITY}`,
1619
+ );
1620
+
1621
+ // Install the top-priority router AFTER everything else has registered.
1622
+ // The router sits at Number.MAX_SAFE_INTEGER so the runtime dispatches
1623
+ // to it first; at dispatch time it picks a real provider via
1624
+ // `routing-policy` and calls that handler directly.
1625
+ installRouterHandler(runtime, {
1626
+ skipSlots: isLocalEmbeddingDisabledByEnv() ? ["TEXT_EMBEDDING"] : [],
1627
+ });
1628
+ logger.info(
1629
+ "[local-inference] Installed top-priority router for cross-provider routing",
1630
+ );
1631
+ runtimeWithRegistration[LOCAL_INFERENCE_HANDLER_INSTALLED] = true;
1632
+
1633
+ // Warm-on-load (item I3): if a local model is already resident, KV-prefill
1634
+ // the Stage-1 stable prefix onto the deterministic system-prefix slot so
1635
+ // the system prompt + tool schema is hot before the first user turn.
1636
+ // Fire-and-forget — pre-warm is best-effort and must never block boot.
1637
+ void prewarmSystemPrefix(runtime).catch(() => {
1638
+ // Logged inside prewarmSystemPrefix at debug; nothing more to do here.
1639
+ });
1640
+ }