@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,1171 @@
1
+ import {
2
+ type AudioStreamResult,
3
+ EventType,
4
+ type GenerateTextParams,
5
+ type IAgentRuntime,
6
+ type ImageDescriptionParams,
7
+ type ImageDescriptionResult,
8
+ type ImageGenerationParams,
9
+ type ImageGenerationResult,
10
+ logger,
11
+ ModelType,
12
+ type Plugin,
13
+ type TextEmbeddingParams,
14
+ type TextToSpeechParams,
15
+ type TranscriptionParams,
16
+ } from "@elizaos/core";
17
+
18
+ import { generateMediaAction } from "./actions/generate-media.js";
19
+ import { identifySpeakerAction } from "./actions/identify-speaker.js";
20
+ import { voiceProfilePluginRoutes } from "./routes/voice-profile-plugin-routes.js";
21
+ import { handleVoiceEntityBound } from "./runtime/voice-entity-binding.js";
22
+
23
+ export const LOCAL_INFERENCE_PROVIDER_ID = "eliza-local-inference";
24
+ export const LOCAL_INFERENCE_PRIORITY = -100;
25
+
26
+ export const LOCAL_INFERENCE_TEXT_MODEL_TYPES = [
27
+ ModelType.TEXT_SMALL,
28
+ ModelType.TEXT_LARGE,
29
+ ] as const;
30
+
31
+ export const LOCAL_INFERENCE_MODEL_TYPES = [
32
+ ...LOCAL_INFERENCE_TEXT_MODEL_TYPES,
33
+ ModelType.TEXT_EMBEDDING,
34
+ ModelType.IMAGE,
35
+ ModelType.IMAGE_DESCRIPTION,
36
+ ModelType.TEXT_TO_SPEECH,
37
+ ModelType.TRANSCRIPTION,
38
+ ] as const;
39
+
40
+ export type LocalInferenceUnavailableReason =
41
+ | "backend_unavailable"
42
+ | "capability_unavailable"
43
+ | "invalid_input"
44
+ | "invalid_output";
45
+
46
+ export class LocalInferenceUnavailableError extends Error {
47
+ readonly code = "LOCAL_INFERENCE_UNAVAILABLE";
48
+ readonly provider = LOCAL_INFERENCE_PROVIDER_ID;
49
+
50
+ constructor(
51
+ readonly modelType: string,
52
+ readonly reason: LocalInferenceUnavailableReason,
53
+ message: string,
54
+ options?: { cause?: unknown },
55
+ ) {
56
+ super(message, options);
57
+ this.name = "LocalInferenceUnavailableError";
58
+ }
59
+
60
+ toJSON(): Record<string, string> {
61
+ return {
62
+ code: this.code,
63
+ provider: this.provider,
64
+ modelType: this.modelType,
65
+ reason: this.reason,
66
+ message: this.message,
67
+ };
68
+ }
69
+ }
70
+
71
+ export function isLocalInferenceUnavailableError(
72
+ error: unknown,
73
+ ): error is LocalInferenceUnavailableError {
74
+ return (
75
+ error instanceof LocalInferenceUnavailableError ||
76
+ (typeof error === "object" &&
77
+ error !== null &&
78
+ (error as { code?: unknown }).code === "LOCAL_INFERENCE_UNAVAILABLE")
79
+ );
80
+ }
81
+
82
+ interface LocalInferenceGenerateArgs {
83
+ prompt: string;
84
+ stopSequences?: string[];
85
+ maxTokens?: number;
86
+ temperature?: number;
87
+ topP?: number;
88
+ signal?: AbortSignal;
89
+ onTextChunk?: (chunk: string) => void | Promise<void>;
90
+ }
91
+
92
+ interface LocalInferenceEmbedResult {
93
+ embedding: number[];
94
+ }
95
+
96
+ interface LocalInferenceTextToSpeechService {
97
+ synthesizeSpeech?: (
98
+ text: string,
99
+ signal?: AbortSignal,
100
+ ) => Promise<Uint8Array | ArrayBuffer | Buffer>;
101
+ textToSpeech?: (args: {
102
+ text: string;
103
+ signal?: AbortSignal;
104
+ }) => Promise<Uint8Array | ArrayBuffer | Buffer>;
105
+ /**
106
+ * Optional streaming synth seam: yields audio (PCM/WAV) chunks as they are
107
+ * produced so playback can start before the whole clip is ready. When a
108
+ * backend implements it, the TEXT_TO_SPEECH handler returns an
109
+ * {@link AudioStreamResult} for `audioStream` callers; otherwise it falls
110
+ * back to a single-chunk result around the buffered synth.
111
+ */
112
+ synthesizeSpeechStream?: (
113
+ text: string,
114
+ signal?: AbortSignal,
115
+ ) => AsyncIterable<Uint8Array>;
116
+ }
117
+
118
+ interface LocalInferenceTranscriptionService {
119
+ transcribe?: (params: unknown) => Promise<string | { text?: string }>;
120
+ transcribePcm?: (
121
+ params: {
122
+ pcm: Float32Array;
123
+ sampleRate: number;
124
+ signal?: AbortSignal;
125
+ },
126
+ signal?: AbortSignal,
127
+ ) => Promise<string | { text?: string }>;
128
+ }
129
+
130
+ /**
131
+ * Optional arbiter accessor. When the local-inference plugin's runtime
132
+ * service registers a MemoryArbiter (WS1) on the IAgentRuntime, this
133
+ * field returns it. Cross-plugin consumers (plugin-vision, plugin-image-gen,
134
+ * plugin-aosp-local-inference) call `service.getMemoryArbiter()` to
135
+ * register their capability handlers and request model swaps without
136
+ * knowing which backend is loaded.
137
+ *
138
+ * The concrete return type is intentionally `unknown` here to keep this
139
+ * provider file free of a hard dependency on `./services/memory-arbiter`;
140
+ * consumers should import the `MemoryArbiter` type from
141
+ * `@elizaos/plugin-local-inference/services` and cast.
142
+ */
143
+ interface LocalInferenceArbiterAccessor {
144
+ getMemoryArbiter?: () => unknown;
145
+ }
146
+
147
+ interface LocalInferenceRuntimeService
148
+ extends LocalInferenceTextToSpeechService,
149
+ LocalInferenceTranscriptionService,
150
+ LocalInferenceArbiterAccessor {
151
+ generate?: (args: LocalInferenceGenerateArgs) => Promise<string>;
152
+ embed?: (args: {
153
+ input: string;
154
+ }) => Promise<number[] | LocalInferenceEmbedResult>;
155
+ describeImage?: (
156
+ params: ImageDescriptionParams | string,
157
+ ) => Promise<ImageDescriptionResult | string>;
158
+ imageDescription?: (
159
+ params: ImageDescriptionParams | string,
160
+ ) => Promise<ImageDescriptionResult | string>;
161
+ }
162
+
163
+ type RuntimeWithServices = IAgentRuntime & {
164
+ getService?: (name: string) => unknown;
165
+ };
166
+
167
+ function serviceFromRuntime(
168
+ runtime: IAgentRuntime,
169
+ ): LocalInferenceRuntimeService | null {
170
+ const withServices = runtime as RuntimeWithServices;
171
+ if (typeof withServices.getService !== "function") return null;
172
+
173
+ for (const name of [
174
+ "localInferenceLoader",
175
+ "localInference",
176
+ "LOCAL_INFERENCE",
177
+ ]) {
178
+ const candidate = withServices.getService(name);
179
+ if (candidate && typeof candidate === "object") {
180
+ return candidate as LocalInferenceRuntimeService;
181
+ }
182
+ }
183
+ return null;
184
+ }
185
+
186
+ function unavailable(
187
+ modelType: string,
188
+ reason: LocalInferenceUnavailableReason,
189
+ message: string,
190
+ cause?: unknown,
191
+ ): LocalInferenceUnavailableError {
192
+ return new LocalInferenceUnavailableError(modelType, reason, message, {
193
+ cause,
194
+ });
195
+ }
196
+
197
+ function requireService(
198
+ runtime: IAgentRuntime,
199
+ modelType: string,
200
+ ): LocalInferenceRuntimeService {
201
+ const service = serviceFromRuntime(runtime);
202
+ if (!service) {
203
+ throw unavailable(
204
+ modelType,
205
+ "backend_unavailable",
206
+ `[local-inference] ${modelType} requires an active Eliza-1 local inference backend. Activate an Eliza-1 bundle or enable an AOSP/device local loader.`,
207
+ );
208
+ }
209
+ return service;
210
+ }
211
+
212
+ type MessageLike = {
213
+ role?: unknown;
214
+ content?: unknown;
215
+ };
216
+
217
+ type PromptSegmentLike = {
218
+ content?: unknown;
219
+ };
220
+
221
+ function renderPromptContent(content: unknown): string {
222
+ if (typeof content === "string") return content;
223
+ if (Array.isArray(content)) {
224
+ return content
225
+ .map((part) => {
226
+ if (typeof part === "string") return part;
227
+ if (
228
+ part &&
229
+ typeof part === "object" &&
230
+ typeof (part as { text?: unknown }).text === "string"
231
+ ) {
232
+ return (part as { text: string }).text;
233
+ }
234
+ return "";
235
+ })
236
+ .filter(Boolean)
237
+ .join("\n");
238
+ }
239
+ return "";
240
+ }
241
+
242
+ function promptFromMessages(messages: readonly MessageLike[]): string {
243
+ return messages
244
+ .map((message) => {
245
+ const content = renderPromptContent(message.content);
246
+ if (!content) return "";
247
+ const role =
248
+ typeof message.role === "string" && message.role.trim()
249
+ ? message.role.trim()
250
+ : "message";
251
+ return `${role}:\n${content}`;
252
+ })
253
+ .filter(Boolean)
254
+ .join("\n\n");
255
+ }
256
+
257
+ function promptFromParams(params: GenerateTextParams): string {
258
+ const record = params as GenerateTextParams & {
259
+ messages?: readonly MessageLike[];
260
+ promptSegments?: readonly PromptSegmentLike[];
261
+ };
262
+ const prompt =
263
+ typeof params.prompt === "string" && params.prompt.length > 0
264
+ ? params.prompt
265
+ : Array.isArray(record.promptSegments) && record.promptSegments.length > 0
266
+ ? record.promptSegments
267
+ .map((segment) => renderPromptContent(segment.content))
268
+ .join("")
269
+ : Array.isArray(record.messages) && record.messages.length > 0
270
+ ? promptFromMessages(record.messages)
271
+ : "";
272
+ if (typeof prompt !== "string" || prompt.trim().length === 0) {
273
+ throw unavailable(
274
+ ModelType.TEXT_SMALL,
275
+ "invalid_input",
276
+ "[local-inference] TEXT generation requires a non-empty prompt",
277
+ );
278
+ }
279
+ return prompt;
280
+ }
281
+
282
+ function textGenerationArgsFromParams(
283
+ params: GenerateTextParams,
284
+ ): LocalInferenceGenerateArgs {
285
+ return {
286
+ prompt: promptFromParams(params),
287
+ stopSequences: params.stopSequences,
288
+ maxTokens: params.maxTokens,
289
+ temperature: params.temperature,
290
+ topP: params.topP,
291
+ signal: params.signal,
292
+ onTextChunk:
293
+ (params.stream === true || params.streamStructured === true) &&
294
+ typeof params.onStreamChunk === "function"
295
+ ? (chunk) => params.onStreamChunk?.(chunk)
296
+ : undefined,
297
+ };
298
+ }
299
+
300
+ function extractEmbeddingText(
301
+ params: TextEmbeddingParams | string | null,
302
+ ): string {
303
+ if (typeof params === "string") return params;
304
+ if (params && typeof params === "object" && typeof params.text === "string") {
305
+ return params.text;
306
+ }
307
+ throw unavailable(
308
+ ModelType.TEXT_EMBEDDING,
309
+ "invalid_input",
310
+ "[local-inference] TEXT_EMBEDDING requires { text } or a non-empty string; null warmup probes are not served with fake vectors",
311
+ );
312
+ }
313
+
314
+ function extractSpeechText(params: TextToSpeechParams | string): string {
315
+ if (typeof params === "string") return params;
316
+ if (params && typeof params === "object" && typeof params.text === "string") {
317
+ return params.text;
318
+ }
319
+ throw unavailable(
320
+ ModelType.TEXT_TO_SPEECH,
321
+ "invalid_input",
322
+ "[local-inference] TEXT_TO_SPEECH requires a string or { text } input",
323
+ );
324
+ }
325
+
326
+ function extractSpeechSignal(
327
+ params: TextToSpeechParams | string,
328
+ ): AbortSignal | undefined {
329
+ return typeof params === "object" && params !== null
330
+ ? params.signal
331
+ : undefined;
332
+ }
333
+
334
+ function ensureNonEmptyText(modelType: string, text: string): string {
335
+ const trimmed = text.trim();
336
+ if (!trimmed) {
337
+ throw unavailable(
338
+ modelType,
339
+ "invalid_input",
340
+ `[local-inference] ${modelType} requires non-empty text`,
341
+ );
342
+ }
343
+ return trimmed;
344
+ }
345
+
346
+ function normalizeEmbeddingResult(
347
+ result: number[] | LocalInferenceEmbedResult,
348
+ ): number[] {
349
+ const embedding = Array.isArray(result) ? result : result.embedding;
350
+ if (
351
+ !Array.isArray(embedding) ||
352
+ embedding.some((value) => typeof value !== "number")
353
+ ) {
354
+ throw unavailable(
355
+ ModelType.TEXT_EMBEDDING,
356
+ "invalid_output",
357
+ "[local-inference] TEXT_EMBEDDING backend returned an invalid embedding",
358
+ );
359
+ }
360
+ return embedding;
361
+ }
362
+
363
+ function normalizeAudioBytes(
364
+ result: Uint8Array | ArrayBuffer | Buffer,
365
+ ): Uint8Array {
366
+ if (result instanceof Uint8Array) {
367
+ return new Uint8Array(result.buffer, result.byteOffset, result.byteLength);
368
+ }
369
+ if (result instanceof ArrayBuffer) {
370
+ return new Uint8Array(result);
371
+ }
372
+ throw unavailable(
373
+ ModelType.TEXT_TO_SPEECH,
374
+ "invalid_output",
375
+ "[local-inference] TEXT_TO_SPEECH backend returned non-audio output",
376
+ );
377
+ }
378
+
379
+ function concatAudioChunks(chunks: Uint8Array[]): Uint8Array {
380
+ const total = chunks.reduce((sum, chunk) => sum + chunk.byteLength, 0);
381
+ const out = new Uint8Array(total);
382
+ let offset = 0;
383
+ for (const chunk of chunks) {
384
+ out.set(chunk, offset);
385
+ offset += chunk.byteLength;
386
+ }
387
+ return out;
388
+ }
389
+
390
+ /** A single-chunk {@link AudioStreamResult} around already-synthesized bytes —
391
+ * satisfies the streaming contract when the backend has no streaming synth. */
392
+ function bufferedAudioStreamResult(
393
+ bytes: Uint8Array,
394
+ mimeType: string,
395
+ ): AudioStreamResult {
396
+ async function* generate(): AsyncGenerator<Uint8Array> {
397
+ if (bytes.byteLength > 0) yield bytes;
398
+ }
399
+ return { audioStream: generate(), bytes: Promise.resolve(bytes), mimeType };
400
+ }
401
+
402
+ /** Wrap a backend streaming synth as an {@link AudioStreamResult}, accumulating
403
+ * the chunks so `bytes` resolves to the full clip after the stream is drained. */
404
+ function streamingAudioStreamResult(
405
+ source: AsyncIterable<Uint8Array>,
406
+ mimeType: string,
407
+ ): AudioStreamResult {
408
+ const collected: Uint8Array[] = [];
409
+ let resolveBytes!: (value: Uint8Array) => void;
410
+ let rejectBytes!: (reason: unknown) => void;
411
+ const bytes = new Promise<Uint8Array>((resolve, reject) => {
412
+ resolveBytes = resolve;
413
+ rejectBytes = reject;
414
+ });
415
+ async function* generate(): AsyncGenerator<Uint8Array> {
416
+ try {
417
+ for await (const value of source) {
418
+ const chunk = normalizeAudioBytes(value);
419
+ collected.push(chunk);
420
+ yield chunk;
421
+ }
422
+ resolveBytes(concatAudioChunks(collected));
423
+ } catch (err) {
424
+ rejectBytes(err);
425
+ throw err;
426
+ }
427
+ }
428
+ return { audioStream: generate(), bytes, mimeType };
429
+ }
430
+
431
+ const LOCAL_TTS_MIME = "audio/wav";
432
+
433
+ function extractPcmTranscriptionParams(
434
+ params: TranscriptionParams | Buffer | string | unknown,
435
+ ): { pcm: Float32Array; sampleRate: number; signal?: AbortSignal } {
436
+ if (!params || typeof params !== "object" || params instanceof Uint8Array) {
437
+ throw unavailable(
438
+ ModelType.TRANSCRIPTION,
439
+ "invalid_input",
440
+ "[local-inference] TRANSCRIPTION requires { pcm, sampleRateHz } when only transcribePcm is available",
441
+ );
442
+ }
443
+ const record = params as {
444
+ pcm?: unknown;
445
+ sampleRateHz?: unknown;
446
+ sampleRate?: unknown;
447
+ signal?: AbortSignal;
448
+ };
449
+ if (!(record.pcm instanceof Float32Array)) {
450
+ throw unavailable(
451
+ ModelType.TRANSCRIPTION,
452
+ "invalid_input",
453
+ "[local-inference] TRANSCRIPTION requires Float32Array pcm when only transcribePcm is available",
454
+ );
455
+ }
456
+ const sampleRate =
457
+ typeof record.sampleRateHz === "number"
458
+ ? record.sampleRateHz
459
+ : typeof record.sampleRate === "number"
460
+ ? record.sampleRate
461
+ : 0;
462
+ if (!Number.isFinite(sampleRate) || sampleRate <= 0) {
463
+ throw unavailable(
464
+ ModelType.TRANSCRIPTION,
465
+ "invalid_input",
466
+ "[local-inference] TRANSCRIPTION { pcm } requires a positive sampleRateHz",
467
+ );
468
+ }
469
+ return record.signal
470
+ ? { pcm: record.pcm, sampleRate, signal: record.signal }
471
+ : { pcm: record.pcm, sampleRate };
472
+ }
473
+
474
+ function extractTranscriptionSignal(params: unknown): AbortSignal | undefined {
475
+ return typeof params === "object" && params !== null
476
+ ? (params as { signal?: AbortSignal }).signal
477
+ : undefined;
478
+ }
479
+
480
+ function throwIfAborted(signal: AbortSignal | undefined): void {
481
+ if (!signal?.aborted) return;
482
+ throw signal.reason instanceof Error
483
+ ? signal.reason
484
+ : new DOMException("Aborted", "AbortError");
485
+ }
486
+
487
+ function normalizeTranscript(result: string | { text?: string }): string {
488
+ const text = typeof result === "string" ? result : result.text;
489
+ if (typeof text !== "string") {
490
+ throw unavailable(
491
+ ModelType.TRANSCRIPTION,
492
+ "invalid_output",
493
+ "[local-inference] TRANSCRIPTION backend returned an invalid transcript",
494
+ );
495
+ }
496
+ return text;
497
+ }
498
+
499
+ function normalizeImageDescription(
500
+ result: ImageDescriptionResult | string,
501
+ ): ImageDescriptionResult {
502
+ if (typeof result === "string") {
503
+ const description = ensureNonEmptyText(ModelType.IMAGE_DESCRIPTION, result);
504
+ return {
505
+ title: description.split(/[.!?]/, 1)[0]?.trim() || "Image",
506
+ description,
507
+ };
508
+ }
509
+ if (
510
+ result &&
511
+ typeof result === "object" &&
512
+ typeof result.title === "string" &&
513
+ typeof result.description === "string"
514
+ ) {
515
+ return {
516
+ title: ensureNonEmptyText(ModelType.IMAGE_DESCRIPTION, result.title),
517
+ description: ensureNonEmptyText(
518
+ ModelType.IMAGE_DESCRIPTION,
519
+ result.description,
520
+ ),
521
+ };
522
+ }
523
+ throw unavailable(
524
+ ModelType.IMAGE_DESCRIPTION,
525
+ "invalid_output",
526
+ "[local-inference] IMAGE_DESCRIPTION backend returned an invalid description",
527
+ );
528
+ }
529
+
530
+ function createTextHandler(modelType: string) {
531
+ return async (
532
+ runtime: IAgentRuntime,
533
+ params: GenerateTextParams,
534
+ ): Promise<string> => {
535
+ const service = requireService(runtime, modelType);
536
+ if (typeof service.generate !== "function") {
537
+ throw unavailable(
538
+ modelType,
539
+ "capability_unavailable",
540
+ `[local-inference] Active local backend does not implement ${modelType} generation`,
541
+ );
542
+ }
543
+ return service.generate(textGenerationArgsFromParams(params));
544
+ };
545
+ }
546
+
547
+ function createEmbeddingHandler() {
548
+ return async (
549
+ runtime: IAgentRuntime,
550
+ params: TextEmbeddingParams | string | null,
551
+ ): Promise<number[]> => {
552
+ const service = serviceFromRuntime(runtime);
553
+ if (!service) {
554
+ throw unavailable(
555
+ ModelType.TEXT_EMBEDDING,
556
+ "backend_unavailable",
557
+ "[local-inference] TEXT_EMBEDDING requires an active Eliza-1 backend or another embedding provider; refusing to synthesize zero-vectors.",
558
+ );
559
+ }
560
+ if (typeof service.embed !== "function") {
561
+ throw unavailable(
562
+ ModelType.TEXT_EMBEDDING,
563
+ "capability_unavailable",
564
+ "[local-inference] Active local backend does not implement TEXT_EMBEDDING",
565
+ );
566
+ }
567
+ const input = ensureNonEmptyText(
568
+ ModelType.TEXT_EMBEDDING,
569
+ extractEmbeddingText(params),
570
+ );
571
+ return normalizeEmbeddingResult(await service.embed({ input }));
572
+ };
573
+ }
574
+
575
+ function createTextToSpeechHandler() {
576
+ return async (
577
+ runtime: IAgentRuntime,
578
+ params: TextToSpeechParams | string,
579
+ ): Promise<Uint8Array | AudioStreamResult> => {
580
+ const service = requireService(runtime, ModelType.TEXT_TO_SPEECH);
581
+ const text = ensureNonEmptyText(
582
+ ModelType.TEXT_TO_SPEECH,
583
+ extractSpeechText(params),
584
+ );
585
+ const signal = extractSpeechSignal(params);
586
+ // Explicit opt-in (NOT the generic `stream` useModel injects from an
587
+ // ambient text-streaming turn) so byte-expecting callers keep a buffer.
588
+ const wantsStream =
589
+ typeof params === "object" &&
590
+ params !== null &&
591
+ (params as { audioStream?: boolean }).audioStream === true;
592
+
593
+ // Real chunked streaming when the backend implements the seam.
594
+ if (wantsStream && typeof service.synthesizeSpeechStream === "function") {
595
+ return streamingAudioStreamResult(
596
+ service.synthesizeSpeechStream(text, signal),
597
+ LOCAL_TTS_MIME,
598
+ );
599
+ }
600
+
601
+ const synthesizeBuffered = async (): Promise<Uint8Array> => {
602
+ const arbiter = _tryGetTtsArbiter(service);
603
+ if (arbiter) {
604
+ const request = { text, ...(signal ? { signal } : {}) };
605
+ const requestSpeech =
606
+ arbiter.requestTextToSpeech ?? arbiter.requestSpeak;
607
+ if (!requestSpeech) {
608
+ throw unavailable(
609
+ ModelType.TEXT_TO_SPEECH,
610
+ "capability_unavailable",
611
+ "[local-inference] Active local arbiter does not implement TEXT_TO_SPEECH",
612
+ );
613
+ }
614
+ const modelKeyCandidate =
615
+ typeof params === "object"
616
+ ? (params as unknown as { modelKey?: unknown }).modelKey
617
+ : undefined;
618
+ const modelKey =
619
+ typeof modelKeyCandidate === "string" && modelKeyCandidate
620
+ ? modelKeyCandidate
621
+ : "eliza-1-voice";
622
+ const result = await requestSpeech<typeof request, Uint8Array>({
623
+ modelKey,
624
+ payload: request,
625
+ });
626
+ return normalizeAudioBytes(result);
627
+ }
628
+ if (typeof service.synthesizeSpeech === "function") {
629
+ return normalizeAudioBytes(
630
+ await service.synthesizeSpeech(text, signal),
631
+ );
632
+ }
633
+ if (typeof service.textToSpeech === "function") {
634
+ return normalizeAudioBytes(
635
+ await service.textToSpeech({ text, ...(signal ? { signal } : {}) }),
636
+ );
637
+ }
638
+ throw unavailable(
639
+ ModelType.TEXT_TO_SPEECH,
640
+ "capability_unavailable",
641
+ "[local-inference] Active local backend does not implement TEXT_TO_SPEECH",
642
+ );
643
+ };
644
+
645
+ const bytes = await synthesizeBuffered();
646
+ // Streaming asked but no streaming backend — satisfy the contract with a
647
+ // single chunk so consumers use one code path for cloud + local.
648
+ return wantsStream
649
+ ? bufferedAudioStreamResult(bytes, LOCAL_TTS_MIME)
650
+ : bytes;
651
+ };
652
+ }
653
+
654
+ function createTranscriptionHandler() {
655
+ return async (
656
+ runtime: IAgentRuntime,
657
+ params: TranscriptionParams | Buffer | string | unknown,
658
+ ): Promise<string> => {
659
+ const service = requireService(runtime, ModelType.TRANSCRIPTION);
660
+ const signal = extractTranscriptionSignal(params);
661
+ throwIfAborted(signal);
662
+ const arbiter = _tryGetTranscribeArbiter(service);
663
+ if (arbiter?.requestTranscribe) {
664
+ const modelKeyCandidate =
665
+ typeof params === "object" && params !== null
666
+ ? (params as { modelKey?: unknown }).modelKey
667
+ : undefined;
668
+ const modelKey =
669
+ typeof modelKeyCandidate === "string" && modelKeyCandidate
670
+ ? modelKeyCandidate
671
+ : "eliza-1-transcribe";
672
+ const transcript = normalizeTranscript(
673
+ await arbiter.requestTranscribe<
674
+ TranscriptionParams | Buffer | string | unknown,
675
+ string | { text?: string }
676
+ >({ modelKey, payload: params }),
677
+ );
678
+ throwIfAborted(signal);
679
+ return transcript;
680
+ }
681
+ if (typeof service.transcribe === "function") {
682
+ const transcript = normalizeTranscript(await service.transcribe(params));
683
+ throwIfAborted(signal);
684
+ return transcript;
685
+ }
686
+ if (typeof service.transcribePcm === "function") {
687
+ const pcmParams = extractPcmTranscriptionParams(params);
688
+ const transcript = normalizeTranscript(
689
+ await (signal
690
+ ? service.transcribePcm(pcmParams, signal)
691
+ : service.transcribePcm(pcmParams)),
692
+ );
693
+ throwIfAborted(signal);
694
+ return transcript;
695
+ }
696
+ throw unavailable(
697
+ ModelType.TRANSCRIPTION,
698
+ "capability_unavailable",
699
+ "[local-inference] Active local backend does not implement TRANSCRIPTION",
700
+ );
701
+ };
702
+ }
703
+
704
+ /**
705
+ * Arbiter accessor shape used by the IMAGE_DESCRIPTION handler. Two
706
+ * call paths converge here:
707
+ *
708
+ * (a) The WS2 arbiter path. When the loader service exposes
709
+ * `getMemoryArbiter()` AND that arbiter has the `vision-describe`
710
+ * capability registered, IMAGE_DESCRIPTION dispatches through
711
+ * `arbiter.requestVisionDescribe(...)`.
712
+ *
713
+ * (b) Legacy `service.describeImage(...)` / `service.imageDescription`.
714
+ * Pre-WS2 callers (the AOSP bootstrap, Florence-2 LocalAIManager)
715
+ * still hit this fallback.
716
+ */
717
+ interface ArbiterLike {
718
+ hasCapability?: (capability: string) => boolean;
719
+ requestVisionDescribe?: <Req, Res>(req: {
720
+ modelKey: string;
721
+ payload: Req;
722
+ }) => Promise<Res>;
723
+ requestImageGen?: <Req, Res>(req: {
724
+ modelKey: string;
725
+ payload: Req;
726
+ }) => Promise<Res>;
727
+ requestTranscribe?: <Req, Res>(req: {
728
+ modelKey: string;
729
+ payload: Req;
730
+ }) => Promise<Res>;
731
+ requestTextToSpeech?: <Req, Res>(req: {
732
+ modelKey: string;
733
+ payload: Req;
734
+ }) => Promise<Res>;
735
+ requestSpeak?: <Req, Res>(req: {
736
+ modelKey: string;
737
+ payload: Req;
738
+ }) => Promise<Res>;
739
+ }
740
+
741
+ function tryGetArbiter(
742
+ service: LocalInferenceRuntimeService | null,
743
+ ): ArbiterLike | null {
744
+ if (!service?.getMemoryArbiter) return null;
745
+ const arbiter = service.getMemoryArbiter();
746
+ if (!arbiter || typeof arbiter !== "object") return null;
747
+ const cand = arbiter as ArbiterLike;
748
+ if (
749
+ typeof cand.hasCapability === "function" &&
750
+ typeof cand.requestVisionDescribe === "function" &&
751
+ cand.hasCapability("vision-describe")
752
+ ) {
753
+ return cand;
754
+ }
755
+ return null;
756
+ }
757
+
758
+ function tryGetImageGenArbiter(
759
+ service: LocalInferenceRuntimeService | null,
760
+ ): ArbiterLike | null {
761
+ if (!service?.getMemoryArbiter) return null;
762
+ const arbiter = service.getMemoryArbiter();
763
+ if (!arbiter || typeof arbiter !== "object") return null;
764
+ const cand = arbiter as ArbiterLike;
765
+ if (
766
+ typeof cand.hasCapability === "function" &&
767
+ typeof cand.requestImageGen === "function" &&
768
+ cand.hasCapability("image-gen")
769
+ ) {
770
+ return cand;
771
+ }
772
+ return null;
773
+ }
774
+
775
+ /**
776
+ * Return the arbiter if it has the WS5 `"speak"` capability registered.
777
+ * Mirrors `tryGetArbiter` / `tryGetImageGenArbiter`. Either of
778
+ * `requestTextToSpeech` or `requestSpeak` is sufficient — they both
779
+ * route through the same `"speak"` queue.
780
+ */
781
+ function _tryGetTtsArbiter(
782
+ service: LocalInferenceRuntimeService | null,
783
+ ): ArbiterLike | null {
784
+ if (!service?.getMemoryArbiter) return null;
785
+ const arbiter = service.getMemoryArbiter();
786
+ if (!arbiter || typeof arbiter !== "object") return null;
787
+ const cand = arbiter as ArbiterLike;
788
+ if (
789
+ typeof cand.hasCapability === "function" &&
790
+ (typeof cand.requestTextToSpeech === "function" ||
791
+ typeof cand.requestSpeak === "function") &&
792
+ cand.hasCapability("speak")
793
+ ) {
794
+ return cand;
795
+ }
796
+ return null;
797
+ }
798
+
799
+ function _tryGetTranscribeArbiter(
800
+ service: LocalInferenceRuntimeService | null,
801
+ ): ArbiterLike | null {
802
+ if (!service?.getMemoryArbiter) return null;
803
+ const arbiter = service.getMemoryArbiter();
804
+ if (!arbiter || typeof arbiter !== "object") return null;
805
+ const cand = arbiter as ArbiterLike;
806
+ if (
807
+ typeof cand.hasCapability === "function" &&
808
+ typeof cand.requestTranscribe === "function" &&
809
+ cand.hasCapability("transcribe")
810
+ ) {
811
+ return cand;
812
+ }
813
+ return null;
814
+ }
815
+
816
+ function paramsToVisionRequest(params: ImageDescriptionParams | string): {
817
+ image: { kind: "dataUrl"; dataUrl: string } | { kind: "url"; url: string };
818
+ prompt?: string;
819
+ } {
820
+ const url = typeof params === "string" ? params : params.imageUrl;
821
+ if (typeof url !== "string" || !url) {
822
+ throw unavailable(
823
+ ModelType.IMAGE_DESCRIPTION,
824
+ "invalid_input",
825
+ "[local-inference] IMAGE_DESCRIPTION requires a non-empty imageUrl",
826
+ );
827
+ }
828
+ const prompt = typeof params === "object" ? params.prompt : undefined;
829
+ if (url.startsWith("data:")) {
830
+ return {
831
+ image: { kind: "dataUrl", dataUrl: url },
832
+ prompt,
833
+ };
834
+ }
835
+ return {
836
+ image: { kind: "url", url },
837
+ prompt,
838
+ };
839
+ }
840
+
841
+ /**
842
+ * Runtime setting marker that plugin-vision's `hasEliza1VisionHandler`
843
+ * polls. Setting this to `"1"` makes VisionService prefer the eliza-1
844
+ * IMAGE_DESCRIPTION handler over local Florence-2. We set it the first
845
+ * time the handler runs against an arbiter that has the
846
+ * `vision-describe` capability registered, so the marker reflects
847
+ * actual capability rather than plugin presence.
848
+ */
849
+ const ELIZA1_VISION_MARKER = "ELIZA1_VISION_HANDLER_PRESENT";
850
+
851
+ function markEliza1VisionHandlerPresent(runtime: IAgentRuntime): void {
852
+ const r = runtime as IAgentRuntime & {
853
+ setSetting?: (key: string, value: unknown) => void;
854
+ getSetting?: (key: string) => unknown;
855
+ };
856
+ if (typeof r.setSetting !== "function") return;
857
+ if (typeof r.getSetting === "function") {
858
+ const existing = r.getSetting(ELIZA1_VISION_MARKER);
859
+ if (existing === "1" || existing === true) return;
860
+ }
861
+ try {
862
+ r.setSetting(ELIZA1_VISION_MARKER, "1");
863
+ } catch {
864
+ // Some test runtimes don't accept setSetting at runtime — non-fatal.
865
+ }
866
+ }
867
+
868
+ function createImageDescriptionHandler() {
869
+ return async (
870
+ runtime: IAgentRuntime,
871
+ params: ImageDescriptionParams | string,
872
+ ): Promise<ImageDescriptionResult> => {
873
+ const service = requireService(runtime, ModelType.IMAGE_DESCRIPTION);
874
+ const arbiter = tryGetArbiter(service);
875
+ if (arbiter?.requestVisionDescribe) {
876
+ // WS2 path. The arbiter owns the model handle and the projector
877
+ // cache; we forward the request and let it dispatch.
878
+ markEliza1VisionHandlerPresent(runtime);
879
+ const modelKeyCandidate =
880
+ typeof params === "object"
881
+ ? (params as unknown as { modelKey?: unknown }).modelKey
882
+ : undefined;
883
+ const modelKey =
884
+ typeof modelKeyCandidate === "string" && modelKeyCandidate
885
+ ? modelKeyCandidate
886
+ : "qwen3-vl";
887
+ const request = paramsToVisionRequest(params);
888
+ const result = await arbiter.requestVisionDescribe<
889
+ typeof request,
890
+ ImageDescriptionResult | string
891
+ >({ modelKey, payload: request });
892
+ return normalizeImageDescription(result);
893
+ }
894
+ if (typeof service.describeImage === "function") {
895
+ return normalizeImageDescription(await service.describeImage(params));
896
+ }
897
+ if (typeof service.imageDescription === "function") {
898
+ return normalizeImageDescription(await service.imageDescription(params));
899
+ }
900
+ throw unavailable(
901
+ ModelType.IMAGE_DESCRIPTION,
902
+ "capability_unavailable",
903
+ "[local-inference] Active local backend does not implement IMAGE_DESCRIPTION",
904
+ );
905
+ };
906
+ }
907
+
908
+ /**
909
+ * Image-gen request shape the WS3 arbiter capability accepts. Mirrors
910
+ * `ImageGenRequest` from `./services/imagegen/types` without importing
911
+ * the full module here — we want this provider file to stay free of a
912
+ * hard dependency on the imagegen subpackage so the type surface
913
+ * doesn't reach across plugins.
914
+ */
915
+ interface ProviderImageGenRequest {
916
+ prompt: string;
917
+ negativePrompt?: string;
918
+ width?: number;
919
+ height?: number;
920
+ steps?: number;
921
+ guidanceScale?: number;
922
+ seed?: number;
923
+ scheduler?: string;
924
+ signal?: AbortSignal;
925
+ }
926
+
927
+ interface ProviderImageGenResult {
928
+ image: Uint8Array;
929
+ mime: "image/png" | "image/jpeg";
930
+ seed: number;
931
+ metadata: {
932
+ model: string;
933
+ prompt: string;
934
+ steps: number;
935
+ guidanceScale: number;
936
+ inferenceTimeMs: number;
937
+ };
938
+ }
939
+
940
+ function paramsToImageGenRequest(
941
+ params: ImageGenerationParams,
942
+ ): ProviderImageGenRequest {
943
+ if (typeof params.prompt !== "string" || !params.prompt.trim()) {
944
+ throw unavailable(
945
+ ModelType.IMAGE,
946
+ "invalid_input",
947
+ "[local-inference] IMAGE requires a non-empty prompt",
948
+ );
949
+ }
950
+ const out: ProviderImageGenRequest = { prompt: params.prompt };
951
+ if (typeof params.size === "string" && /^\d+x\d+$/i.test(params.size)) {
952
+ const [w, h] = params.size
953
+ .toLowerCase()
954
+ .split("x")
955
+ .map((n) => Number(n));
956
+ if (Number.isFinite(w) && w > 0) out.width = w;
957
+ if (Number.isFinite(h) && h > 0) out.height = h;
958
+ }
959
+ // Forward optional extended knobs when callers pass them through
960
+ // the `ImageGenerationParams` extension fields. We intentionally
961
+ // don't enrich `ImageGenerationParams` in @elizaos/core for this —
962
+ // see "Hand-off" in the WS3 report.
963
+ const extended = params as ImageGenerationParams & {
964
+ negativePrompt?: unknown;
965
+ steps?: unknown;
966
+ guidanceScale?: unknown;
967
+ seed?: unknown;
968
+ scheduler?: unknown;
969
+ signal?: unknown;
970
+ };
971
+ if (typeof extended.negativePrompt === "string") {
972
+ out.negativePrompt = extended.negativePrompt;
973
+ }
974
+ if (typeof extended.steps === "number" && extended.steps > 0) {
975
+ out.steps = Math.floor(extended.steps);
976
+ }
977
+ if (
978
+ typeof extended.guidanceScale === "number" &&
979
+ extended.guidanceScale >= 0
980
+ ) {
981
+ out.guidanceScale = extended.guidanceScale;
982
+ }
983
+ if (typeof extended.seed === "number" && Number.isFinite(extended.seed)) {
984
+ out.seed = Math.floor(extended.seed);
985
+ }
986
+ if (typeof extended.scheduler === "string") {
987
+ out.scheduler = extended.scheduler;
988
+ }
989
+ if (extended.signal instanceof AbortSignal) {
990
+ out.signal = extended.signal;
991
+ }
992
+ return out;
993
+ }
994
+
995
+ function imageGenResultToUrls(
996
+ result: ProviderImageGenResult,
997
+ ): ImageGenerationResult[] {
998
+ if (!(result.image instanceof Uint8Array) || result.image.length === 0) {
999
+ throw unavailable(
1000
+ ModelType.IMAGE,
1001
+ "invalid_output",
1002
+ "[local-inference] IMAGE backend returned an empty image buffer",
1003
+ );
1004
+ }
1005
+ const mime = result.mime === "image/jpeg" ? "image/jpeg" : "image/png";
1006
+ const base64 = Buffer.from(result.image).toString("base64");
1007
+ return [{ url: `data:${mime};base64,${base64}` }];
1008
+ }
1009
+
1010
+ function createImageGenerationHandler() {
1011
+ return async (
1012
+ runtime: IAgentRuntime,
1013
+ params: ImageGenerationParams,
1014
+ ): Promise<ImageGenerationResult[]> => {
1015
+ const service = requireService(runtime, ModelType.IMAGE);
1016
+ const arbiter = tryGetImageGenArbiter(service);
1017
+ if (!arbiter?.requestImageGen) {
1018
+ throw unavailable(
1019
+ ModelType.IMAGE,
1020
+ "capability_unavailable",
1021
+ "[local-inference] IMAGE generation requires the WS3 arbiter image-gen capability. Register it via createImageGenCapabilityRegistration at plugin init.",
1022
+ );
1023
+ }
1024
+ const request = paramsToImageGenRequest(params);
1025
+ // The local-inference IMAGE handler only ever returns a single
1026
+ // image — local diffusion runtimes serialize batch-1 by default,
1027
+ // and an N>1 request would just be N back-to-back generates. We
1028
+ // honour `params.count` by looping the request rather than
1029
+ // pretending the backend supports batched output.
1030
+ const count = Math.max(1, Math.min(8, params.count ?? 1));
1031
+ // Resolve modelKey from the active tier the loader knows about.
1032
+ // We prefer the optional `modelKey` extension; otherwise the
1033
+ // runtime's active tier from `service.activeTier` / the
1034
+ // `LOCAL_INFERENCE_ACTIVE_TIER` setting; otherwise the safe
1035
+ // small-tier default. Callers that want to pin a specific
1036
+ // diffusion model pass `modelKey` through the params extension.
1037
+ const modelKeyCandidate = (
1038
+ params as ImageGenerationParams & { modelKey?: unknown }
1039
+ ).modelKey;
1040
+ const modelKey =
1041
+ typeof modelKeyCandidate === "string" && modelKeyCandidate
1042
+ ? modelKeyCandidate
1043
+ : resolveImageGenModelKeyFromRuntime(runtime);
1044
+
1045
+ const results: ImageGenerationResult[] = [];
1046
+ for (let i = 0; i < count; i += 1) {
1047
+ const seeded: ProviderImageGenRequest =
1048
+ typeof request.seed === "number" && i > 0
1049
+ ? { ...request, seed: request.seed + i }
1050
+ : request;
1051
+ const result = await arbiter.requestImageGen<
1052
+ ProviderImageGenRequest,
1053
+ ProviderImageGenResult
1054
+ >({ modelKey, payload: seeded });
1055
+ results.push(...imageGenResultToUrls(result));
1056
+ }
1057
+ return results;
1058
+ };
1059
+ }
1060
+
1061
+ /**
1062
+ * Resolve the active tier-bound image-gen model id without importing
1063
+ * the imagegen subpackage. We look at:
1064
+ *
1065
+ * 1. `runtime.getSetting("LOCAL_INFERENCE_IMAGE_MODEL_KEY")` — explicit pin.
1066
+ * 2. `runtime.getSetting("LOCAL_INFERENCE_ACTIVE_TIER")` mapped through the
1067
+ * same tier → default-model map that lives in `backend-selector.ts`.
1068
+ * 3. Fall back to the small-tier default (`imagegen-sd-1_5-q5_0`).
1069
+ */
1070
+ function resolveImageGenModelKeyFromRuntime(runtime: IAgentRuntime): string {
1071
+ const r = runtime as IAgentRuntime & {
1072
+ getSetting?: (key: string) => unknown;
1073
+ };
1074
+ const pinned = r.getSetting("LOCAL_INFERENCE_IMAGE_MODEL_KEY");
1075
+ if (typeof pinned === "string" && pinned.trim()) return pinned.trim();
1076
+ const tier = r.getSetting("LOCAL_INFERENCE_ACTIVE_TIER");
1077
+ if (typeof tier === "string" && tier.trim()) {
1078
+ const mapped = TIER_TO_DEFAULT_IMAGE_MODEL_KEY[tier.trim()];
1079
+ if (mapped) return mapped;
1080
+ }
1081
+ return "imagegen-sd-1_5-q5_0";
1082
+ }
1083
+
1084
+ /**
1085
+ * Inlined tier → default image-gen model id map. Duplicates the
1086
+ * `TIER_TO_DEFAULT_IMAGE_MODEL` entries in `backend-selector.ts` —
1087
+ * provider.ts intentionally avoids importing the imagegen subpackage
1088
+ * so the provider stays loadable on runtimes that don't ship
1089
+ * the WS3 capability. The two maps are kept in sync by the WS3
1090
+ * routing test (`imagegen-routing.test.ts`).
1091
+ */
1092
+ const TIER_TO_DEFAULT_IMAGE_MODEL_KEY: Readonly<Record<string, string>> = {
1093
+ "eliza-1-0_8b": "imagegen-sd-1_5-q5_0",
1094
+ "eliza-1-2b": "imagegen-sd-1_5-q5_0",
1095
+ "eliza-1-4b": "imagegen-sd-1_5-q5_0",
1096
+ "eliza-1-9b": "imagegen-z-image-turbo-q4_k_m",
1097
+ "eliza-1-27b": "imagegen-z-image-turbo-q4_k_m",
1098
+ "eliza-1-27b-256k": "imagegen-z-image-turbo-q4_k_m",
1099
+ };
1100
+
1101
+ export function createLocalInferenceModelHandlers(): NonNullable<
1102
+ Plugin["models"]
1103
+ > {
1104
+ return {
1105
+ [ModelType.TEXT_SMALL]: createTextHandler(ModelType.TEXT_SMALL),
1106
+ [ModelType.TEXT_LARGE]: createTextHandler(ModelType.TEXT_LARGE),
1107
+ [ModelType.TEXT_EMBEDDING]: createEmbeddingHandler(),
1108
+ [ModelType.IMAGE]: createImageGenerationHandler(),
1109
+ [ModelType.IMAGE_DESCRIPTION]: createImageDescriptionHandler(),
1110
+ [ModelType.TEXT_TO_SPEECH]: createTextToSpeechHandler(),
1111
+ [ModelType.TRANSCRIPTION]: createTranscriptionHandler(),
1112
+ };
1113
+ }
1114
+
1115
+ function createStaticPluginModelHandlers(): NonNullable<Plugin["models"]> {
1116
+ const { [ModelType.TEXT_EMBEDDING]: _embedding, ...handlers } =
1117
+ createLocalInferenceModelHandlers();
1118
+ return handlers;
1119
+ }
1120
+
1121
+ export const localInferencePlugin: Plugin = {
1122
+ name: LOCAL_INFERENCE_PROVIDER_ID,
1123
+ description:
1124
+ "Eliza-1 local provider for text, embeddings, text-to-speech, and transcription.",
1125
+ priority: LOCAL_INFERENCE_PRIORITY,
1126
+ actions: [generateMediaAction, identifySpeakerAction],
1127
+ events: {
1128
+ // Round-trip half of the voice→entity binding: when the merge engine
1129
+ // (plugin-lifeops) reports a binding, persist entityId onto the matching
1130
+ // voice profile(s). See runtime/voice-entity-binding.ts.
1131
+ [EventType.VOICE_ENTITY_BOUND]: [handleVoiceEntityBound],
1132
+ },
1133
+ // Voice-profile HTTP surface (speaker→entity bind/unbind + the
1134
+ // VoiceProfileSection management UI). Registered as rawPath plugin routes
1135
+ // because no server forwards these namespaces to the local-inference
1136
+ // route dispatcher. See routes/voice-profile-plugin-routes.ts.
1137
+ routes: voiceProfilePluginRoutes,
1138
+ // TEXT_EMBEDDING is wired by ensureLocalInferenceHandler(), not the static
1139
+ // plugin object. Runtime bootstrap probes embeddings before the user has
1140
+ // activated an Eliza-1 bundle; registering the static handler there claims a
1141
+ // provider that cannot embed yet and aborts startup instead of letting the
1142
+ // app come online.
1143
+ models: createStaticPluginModelHandlers(),
1144
+ async init(_config: unknown, runtime: IAgentRuntime) {
1145
+ const service = serviceFromRuntime(runtime);
1146
+ if (!service) {
1147
+ logger.info(
1148
+ "[local-inference] Provider registered; no active backend service is exposed yet. Model calls will return LOCAL_INFERENCE_UNAVAILABLE until an Eliza-1 backend is activated.",
1149
+ );
1150
+ return;
1151
+ }
1152
+ logger.info(
1153
+ {
1154
+ generate: typeof service.generate === "function",
1155
+ embed: typeof service.embed === "function",
1156
+ textToSpeech:
1157
+ typeof service.synthesizeSpeech === "function" ||
1158
+ typeof service.textToSpeech === "function",
1159
+ imageDescription:
1160
+ typeof service.describeImage === "function" ||
1161
+ typeof service.imageDescription === "function",
1162
+ transcription:
1163
+ typeof service.transcribe === "function" ||
1164
+ typeof service.transcribePcm === "function",
1165
+ },
1166
+ "[local-inference] Provider connected to runtime backend service",
1167
+ );
1168
+ },
1169
+ };
1170
+
1171
+ export default localInferencePlugin;