@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,1082 @@
1
+ import {
2
+ type AudioStreamResult,
3
+ EventType,
4
+ type GenerateTextParams,
5
+ type IAgentRuntime,
6
+ type ImageDescriptionParams,
7
+ type ImageDescriptionResult,
8
+ type ImageGenerationParams,
9
+ type ImageGenerationResult,
10
+ logger,
11
+ ModelType,
12
+ type Plugin,
13
+ type TextEmbeddingParams,
14
+ type TextToSpeechParams,
15
+ type TranscriptionParams,
16
+ } from "@elizaos/core";
17
+
18
+ import { generateMediaAction } from "./actions/generate-media.js";
19
+ import { identifySpeakerAction } from "./actions/identify-speaker.js";
20
+ import {
21
+ startTranscriptionAction,
22
+ stopTranscriptionAction,
23
+ } from "./actions/transcription-control.js";
24
+ import { transcriptsRoutes } from "./routes/transcripts-routes.js";
25
+ import { voiceProfilePluginRoutes } from "./routes/voice-profile-plugin-routes.js";
26
+ import { handleVoiceEntityBound } from "./runtime/voice-entity-binding.js";
27
+
28
+ export const LOCAL_INFERENCE_PROVIDER_ID = "eliza-local-inference";
29
+ export const LOCAL_INFERENCE_PRIORITY = -100;
30
+
31
+ export const LOCAL_INFERENCE_TEXT_MODEL_TYPES = [
32
+ ModelType.TEXT_SMALL,
33
+ ModelType.TEXT_LARGE,
34
+ ] as const;
35
+
36
+ export const LOCAL_INFERENCE_MODEL_TYPES = [
37
+ ...LOCAL_INFERENCE_TEXT_MODEL_TYPES,
38
+ ModelType.TEXT_EMBEDDING,
39
+ ModelType.IMAGE,
40
+ ModelType.IMAGE_DESCRIPTION,
41
+ ModelType.TEXT_TO_SPEECH,
42
+ ModelType.TRANSCRIPTION,
43
+ ] as const;
44
+
45
+ export type LocalInferenceUnavailableReason =
46
+ | "backend_unavailable"
47
+ | "capability_unavailable"
48
+ | "invalid_input"
49
+ | "invalid_output";
50
+
51
+ export class LocalInferenceUnavailableError extends Error {
52
+ readonly code = "LOCAL_INFERENCE_UNAVAILABLE";
53
+ readonly provider = LOCAL_INFERENCE_PROVIDER_ID;
54
+
55
+ constructor(
56
+ readonly modelType: string,
57
+ readonly reason: LocalInferenceUnavailableReason,
58
+ message: string,
59
+ options?: { cause?: unknown },
60
+ ) {
61
+ super(message, options);
62
+ this.name = "LocalInferenceUnavailableError";
63
+ }
64
+
65
+ toJSON(): Record<string, string> {
66
+ return {
67
+ code: this.code,
68
+ provider: this.provider,
69
+ modelType: this.modelType,
70
+ reason: this.reason,
71
+ message: this.message,
72
+ };
73
+ }
74
+ }
75
+
76
+ export function isLocalInferenceUnavailableError(
77
+ error: unknown,
78
+ ): error is LocalInferenceUnavailableError {
79
+ return (
80
+ error instanceof LocalInferenceUnavailableError ||
81
+ (typeof error === "object" &&
82
+ error !== null &&
83
+ (error as { code?: unknown }).code === "LOCAL_INFERENCE_UNAVAILABLE")
84
+ );
85
+ }
86
+
87
+ interface LocalInferenceGenerateArgs {
88
+ prompt: string;
89
+ stopSequences?: string[];
90
+ maxTokens?: number;
91
+ temperature?: number;
92
+ topP?: number;
93
+ signal?: AbortSignal;
94
+ onTextChunk?: (chunk: string) => void | Promise<void>;
95
+ }
96
+
97
+ interface LocalInferenceEmbedResult {
98
+ embedding: number[];
99
+ }
100
+
101
+ interface LocalInferenceTextToSpeechService {
102
+ synthesizeSpeech?: (
103
+ text: string,
104
+ signal?: AbortSignal,
105
+ ) => Promise<Uint8Array | ArrayBuffer | Buffer>;
106
+ textToSpeech?: (args: {
107
+ text: string;
108
+ signal?: AbortSignal;
109
+ }) => Promise<Uint8Array | ArrayBuffer | Buffer>;
110
+ /**
111
+ * Optional streaming synth seam: yields audio (PCM/WAV) chunks as they are
112
+ * produced so playback can start before the whole clip is ready. When a
113
+ * backend implements it, the TEXT_TO_SPEECH handler returns an
114
+ * {@link AudioStreamResult} for `audioStream` callers; otherwise it falls
115
+ * back to a single-chunk result around the buffered synth.
116
+ */
117
+ synthesizeSpeechStream?: (
118
+ text: string,
119
+ signal?: AbortSignal,
120
+ ) => AsyncIterable<Uint8Array>;
121
+ }
122
+
123
+ interface LocalInferenceTranscriptionService {
124
+ transcribe?: (params: unknown) => Promise<string | { text?: string }>;
125
+ transcribePcm?: (
126
+ params: {
127
+ pcm: Float32Array;
128
+ sampleRate: number;
129
+ signal?: AbortSignal;
130
+ },
131
+ signal?: AbortSignal,
132
+ ) => Promise<string | { text?: string }>;
133
+ }
134
+
135
+ /**
136
+ * Optional arbiter accessor. When the local-inference plugin's runtime
137
+ * service registers a MemoryArbiter (WS1) on the IAgentRuntime, this
138
+ * field returns it. Cross-plugin consumers (plugin-vision, plugin-image-gen,
139
+ * plugin-aosp-local-inference) call `service.getMemoryArbiter()` to
140
+ * register their capability handlers and request model swaps without
141
+ * knowing which backend is loaded.
142
+ *
143
+ * The concrete return type is intentionally `unknown` here to keep this
144
+ * provider file free of a hard dependency on `./services/memory-arbiter`;
145
+ * consumers should import the `MemoryArbiter` type from
146
+ * `@elizaos/plugin-local-inference/services` and cast.
147
+ */
148
+ interface LocalInferenceArbiterAccessor {
149
+ getMemoryArbiter?: () => unknown;
150
+ }
151
+
152
+ interface LocalInferenceRuntimeService
153
+ extends LocalInferenceTextToSpeechService,
154
+ LocalInferenceTranscriptionService,
155
+ LocalInferenceArbiterAccessor {
156
+ generate?: (args: LocalInferenceGenerateArgs) => Promise<string>;
157
+ embed?: (args: {
158
+ input: string;
159
+ }) => Promise<number[] | LocalInferenceEmbedResult>;
160
+ describeImage?: (
161
+ params: ImageDescriptionParams | string,
162
+ ) => Promise<ImageDescriptionResult | string>;
163
+ imageDescription?: (
164
+ params: ImageDescriptionParams | string,
165
+ ) => Promise<ImageDescriptionResult | string>;
166
+ }
167
+
168
+ type RuntimeWithServices = IAgentRuntime & {
169
+ getService?: (name: string) => unknown;
170
+ };
171
+
172
+ function serviceFromRuntime(
173
+ runtime: IAgentRuntime,
174
+ ): LocalInferenceRuntimeService | null {
175
+ const withServices = runtime as RuntimeWithServices;
176
+ if (typeof withServices.getService !== "function") return null;
177
+
178
+ for (const name of [
179
+ "localInferenceLoader",
180
+ "localInference",
181
+ "LOCAL_INFERENCE",
182
+ ]) {
183
+ const candidate = withServices.getService(name);
184
+ if (candidate && typeof candidate === "object") {
185
+ return candidate as LocalInferenceRuntimeService;
186
+ }
187
+ }
188
+ return null;
189
+ }
190
+
191
+ function unavailable(
192
+ modelType: string,
193
+ reason: LocalInferenceUnavailableReason,
194
+ message: string,
195
+ cause?: unknown,
196
+ ): LocalInferenceUnavailableError {
197
+ return new LocalInferenceUnavailableError(modelType, reason, message, {
198
+ cause,
199
+ });
200
+ }
201
+
202
+ function requireService(
203
+ runtime: IAgentRuntime,
204
+ modelType: string,
205
+ ): LocalInferenceRuntimeService {
206
+ const service = serviceFromRuntime(runtime);
207
+ if (!service) {
208
+ throw unavailable(
209
+ modelType,
210
+ "backend_unavailable",
211
+ `[local-inference] ${modelType} requires an active Eliza-1 local inference backend. Activate an Eliza-1 bundle or enable an AOSP/device local loader.`,
212
+ );
213
+ }
214
+ return service;
215
+ }
216
+
217
+ type MessageLike = {
218
+ role?: unknown;
219
+ content?: unknown;
220
+ };
221
+
222
+ type PromptSegmentLike = {
223
+ content?: unknown;
224
+ };
225
+
226
+ function renderPromptContent(content: unknown): string {
227
+ if (typeof content === "string") return content;
228
+ if (Array.isArray(content)) {
229
+ return content
230
+ .map((part) => {
231
+ if (typeof part === "string") return part;
232
+ if (
233
+ part &&
234
+ typeof part === "object" &&
235
+ typeof (part as { text?: unknown }).text === "string"
236
+ ) {
237
+ return (part as { text: string }).text;
238
+ }
239
+ return "";
240
+ })
241
+ .filter(Boolean)
242
+ .join("\n");
243
+ }
244
+ return "";
245
+ }
246
+
247
+ function promptFromMessages(messages: readonly MessageLike[]): string {
248
+ return messages
249
+ .map((message) => {
250
+ const content = renderPromptContent(message.content);
251
+ if (!content) return "";
252
+ const role =
253
+ typeof message.role === "string" && message.role.trim()
254
+ ? message.role.trim()
255
+ : "message";
256
+ return `${role}:\n${content}`;
257
+ })
258
+ .filter(Boolean)
259
+ .join("\n\n");
260
+ }
261
+
262
+ function promptFromParams(params: GenerateTextParams): string {
263
+ const record = params as GenerateTextParams & {
264
+ messages?: readonly MessageLike[];
265
+ promptSegments?: readonly PromptSegmentLike[];
266
+ };
267
+ const prompt =
268
+ typeof params.prompt === "string" && params.prompt.length > 0
269
+ ? params.prompt
270
+ : Array.isArray(record.promptSegments) && record.promptSegments.length > 0
271
+ ? record.promptSegments
272
+ .map((segment) => renderPromptContent(segment.content))
273
+ .join("")
274
+ : Array.isArray(record.messages) && record.messages.length > 0
275
+ ? promptFromMessages(record.messages)
276
+ : "";
277
+ if (typeof prompt !== "string" || prompt.trim().length === 0) {
278
+ throw unavailable(
279
+ ModelType.TEXT_SMALL,
280
+ "invalid_input",
281
+ "[local-inference] TEXT generation requires a non-empty prompt",
282
+ );
283
+ }
284
+ return prompt;
285
+ }
286
+
287
+ function textGenerationArgsFromParams(
288
+ params: GenerateTextParams,
289
+ ): LocalInferenceGenerateArgs {
290
+ return {
291
+ prompt: promptFromParams(params),
292
+ stopSequences: params.stopSequences,
293
+ maxTokens: params.maxTokens,
294
+ temperature: params.temperature,
295
+ topP: params.topP,
296
+ signal: params.signal,
297
+ onTextChunk:
298
+ (params.stream === true || params.streamStructured === true) &&
299
+ typeof params.onStreamChunk === "function"
300
+ ? (chunk) => params.onStreamChunk?.(chunk)
301
+ : undefined,
302
+ };
303
+ }
304
+
305
+ function extractEmbeddingText(
306
+ params: TextEmbeddingParams | string | null,
307
+ ): string {
308
+ if (typeof params === "string") return params;
309
+ if (params && typeof params === "object" && typeof params.text === "string") {
310
+ return params.text;
311
+ }
312
+ throw unavailable(
313
+ ModelType.TEXT_EMBEDDING,
314
+ "invalid_input",
315
+ "[local-inference] TEXT_EMBEDDING requires { text } or a non-empty string; null warmup probes are not served with fake vectors",
316
+ );
317
+ }
318
+
319
+ function extractSpeechText(params: TextToSpeechParams | string): string {
320
+ if (typeof params === "string") return params;
321
+ if (params && typeof params === "object" && typeof params.text === "string") {
322
+ return params.text;
323
+ }
324
+ throw unavailable(
325
+ ModelType.TEXT_TO_SPEECH,
326
+ "invalid_input",
327
+ "[local-inference] TEXT_TO_SPEECH requires a string or { text } input",
328
+ );
329
+ }
330
+
331
+ function extractSpeechSignal(
332
+ params: TextToSpeechParams | string,
333
+ ): AbortSignal | undefined {
334
+ return typeof params === "object" && params !== null
335
+ ? params.signal
336
+ : undefined;
337
+ }
338
+
339
+ function ensureNonEmptyText(modelType: string, text: string): string {
340
+ const trimmed = text.trim();
341
+ if (!trimmed) {
342
+ throw unavailable(
343
+ modelType,
344
+ "invalid_input",
345
+ `[local-inference] ${modelType} requires non-empty text`,
346
+ );
347
+ }
348
+ return trimmed;
349
+ }
350
+
351
+ function normalizeEmbeddingResult(
352
+ result: number[] | LocalInferenceEmbedResult,
353
+ ): number[] {
354
+ const embedding = Array.isArray(result) ? result : result.embedding;
355
+ if (
356
+ !Array.isArray(embedding) ||
357
+ embedding.some((value) => typeof value !== "number")
358
+ ) {
359
+ throw unavailable(
360
+ ModelType.TEXT_EMBEDDING,
361
+ "invalid_output",
362
+ "[local-inference] TEXT_EMBEDDING backend returned an invalid embedding",
363
+ );
364
+ }
365
+ return embedding;
366
+ }
367
+
368
+ function normalizeAudioBytes(
369
+ result: Uint8Array | ArrayBuffer | Buffer,
370
+ ): Uint8Array {
371
+ if (result instanceof Uint8Array) {
372
+ return new Uint8Array(result.buffer, result.byteOffset, result.byteLength);
373
+ }
374
+ if (result instanceof ArrayBuffer) {
375
+ return new Uint8Array(result);
376
+ }
377
+ throw unavailable(
378
+ ModelType.TEXT_TO_SPEECH,
379
+ "invalid_output",
380
+ "[local-inference] TEXT_TO_SPEECH backend returned non-audio output",
381
+ );
382
+ }
383
+
384
+ function concatAudioChunks(chunks: Uint8Array[]): Uint8Array {
385
+ const total = chunks.reduce((sum, chunk) => sum + chunk.byteLength, 0);
386
+ const out = new Uint8Array(total);
387
+ let offset = 0;
388
+ for (const chunk of chunks) {
389
+ out.set(chunk, offset);
390
+ offset += chunk.byteLength;
391
+ }
392
+ return out;
393
+ }
394
+
395
+ /** A single-chunk {@link AudioStreamResult} around already-synthesized bytes —
396
+ * satisfies the streaming contract when the backend has no streaming synth. */
397
+ function bufferedAudioStreamResult(
398
+ bytes: Uint8Array,
399
+ mimeType: string,
400
+ ): AudioStreamResult {
401
+ async function* generate(): AsyncGenerator<Uint8Array> {
402
+ if (bytes.byteLength > 0) yield bytes;
403
+ }
404
+ return { audioStream: generate(), bytes: Promise.resolve(bytes), mimeType };
405
+ }
406
+
407
+ /** Wrap a backend streaming synth as an {@link AudioStreamResult}, accumulating
408
+ * the chunks so `bytes` resolves to the full clip after the stream is drained. */
409
+ function streamingAudioStreamResult(
410
+ source: AsyncIterable<Uint8Array>,
411
+ mimeType: string,
412
+ ): AudioStreamResult {
413
+ const collected: Uint8Array[] = [];
414
+ let resolveBytes!: (value: Uint8Array) => void;
415
+ let rejectBytes!: (reason: unknown) => void;
416
+ const bytes = new Promise<Uint8Array>((resolve, reject) => {
417
+ resolveBytes = resolve;
418
+ rejectBytes = reject;
419
+ });
420
+ async function* generate(): AsyncGenerator<Uint8Array> {
421
+ try {
422
+ for await (const value of source) {
423
+ const chunk = normalizeAudioBytes(value);
424
+ collected.push(chunk);
425
+ yield chunk;
426
+ }
427
+ resolveBytes(concatAudioChunks(collected));
428
+ } catch (err) {
429
+ rejectBytes(err);
430
+ throw err;
431
+ }
432
+ }
433
+ return { audioStream: generate(), bytes, mimeType };
434
+ }
435
+
436
+ const LOCAL_TTS_MIME = "audio/wav";
437
+
438
+ function extractPcmTranscriptionParams(
439
+ params: TranscriptionParams | Buffer | string | unknown,
440
+ ): { pcm: Float32Array; sampleRate: number; signal?: AbortSignal } {
441
+ if (!params || typeof params !== "object" || params instanceof Uint8Array) {
442
+ throw unavailable(
443
+ ModelType.TRANSCRIPTION,
444
+ "invalid_input",
445
+ "[local-inference] TRANSCRIPTION requires { pcm, sampleRateHz } when only transcribePcm is available",
446
+ );
447
+ }
448
+ const record = params as {
449
+ pcm?: unknown;
450
+ sampleRateHz?: unknown;
451
+ sampleRate?: unknown;
452
+ signal?: AbortSignal;
453
+ };
454
+ if (!(record.pcm instanceof Float32Array)) {
455
+ throw unavailable(
456
+ ModelType.TRANSCRIPTION,
457
+ "invalid_input",
458
+ "[local-inference] TRANSCRIPTION requires Float32Array pcm when only transcribePcm is available",
459
+ );
460
+ }
461
+ const sampleRate =
462
+ typeof record.sampleRateHz === "number"
463
+ ? record.sampleRateHz
464
+ : typeof record.sampleRate === "number"
465
+ ? record.sampleRate
466
+ : 0;
467
+ if (!Number.isFinite(sampleRate) || sampleRate <= 0) {
468
+ throw unavailable(
469
+ ModelType.TRANSCRIPTION,
470
+ "invalid_input",
471
+ "[local-inference] TRANSCRIPTION { pcm } requires a positive sampleRateHz",
472
+ );
473
+ }
474
+ return record.signal
475
+ ? { pcm: record.pcm, sampleRate, signal: record.signal }
476
+ : { pcm: record.pcm, sampleRate };
477
+ }
478
+
479
+ function extractTranscriptionSignal(params: unknown): AbortSignal | undefined {
480
+ return typeof params === "object" && params !== null
481
+ ? (params as { signal?: AbortSignal }).signal
482
+ : undefined;
483
+ }
484
+
485
+ function throwIfAborted(signal: AbortSignal | undefined): void {
486
+ if (!signal?.aborted) return;
487
+ throw signal.reason instanceof Error
488
+ ? signal.reason
489
+ : new DOMException("Aborted", "AbortError");
490
+ }
491
+
492
+ function normalizeTranscript(result: string | { text?: string }): string {
493
+ const text = typeof result === "string" ? result : result.text;
494
+ if (typeof text !== "string") {
495
+ throw unavailable(
496
+ ModelType.TRANSCRIPTION,
497
+ "invalid_output",
498
+ "[local-inference] TRANSCRIPTION backend returned an invalid transcript",
499
+ );
500
+ }
501
+ return text;
502
+ }
503
+
504
+ function normalizeImageDescription(
505
+ result: ImageDescriptionResult | string,
506
+ ): ImageDescriptionResult {
507
+ if (typeof result === "string") {
508
+ const description = ensureNonEmptyText(ModelType.IMAGE_DESCRIPTION, result);
509
+ return {
510
+ title: description.split(/[.!?]/, 1)[0]?.trim() || "Image",
511
+ description,
512
+ };
513
+ }
514
+ if (
515
+ result &&
516
+ typeof result === "object" &&
517
+ typeof result.title === "string" &&
518
+ typeof result.description === "string"
519
+ ) {
520
+ return {
521
+ title: ensureNonEmptyText(ModelType.IMAGE_DESCRIPTION, result.title),
522
+ description: ensureNonEmptyText(
523
+ ModelType.IMAGE_DESCRIPTION,
524
+ result.description,
525
+ ),
526
+ };
527
+ }
528
+ throw unavailable(
529
+ ModelType.IMAGE_DESCRIPTION,
530
+ "invalid_output",
531
+ "[local-inference] IMAGE_DESCRIPTION backend returned an invalid description",
532
+ );
533
+ }
534
+
535
+ function createTextHandler(modelType: string) {
536
+ return async (
537
+ runtime: IAgentRuntime,
538
+ params: GenerateTextParams,
539
+ ): Promise<string> => {
540
+ const service = requireService(runtime, modelType);
541
+ if (typeof service.generate !== "function") {
542
+ throw unavailable(
543
+ modelType,
544
+ "capability_unavailable",
545
+ `[local-inference] Active local backend does not implement ${modelType} generation`,
546
+ );
547
+ }
548
+ return service.generate(textGenerationArgsFromParams(params));
549
+ };
550
+ }
551
+
552
+ function createEmbeddingHandler() {
553
+ return async (
554
+ runtime: IAgentRuntime,
555
+ params: TextEmbeddingParams | string | null,
556
+ ): Promise<number[]> => {
557
+ const service = serviceFromRuntime(runtime);
558
+ if (!service) {
559
+ throw unavailable(
560
+ ModelType.TEXT_EMBEDDING,
561
+ "backend_unavailable",
562
+ "[local-inference] TEXT_EMBEDDING requires an active Eliza-1 backend or another embedding provider; refusing to synthesize zero-vectors.",
563
+ );
564
+ }
565
+ if (typeof service.embed !== "function") {
566
+ throw unavailable(
567
+ ModelType.TEXT_EMBEDDING,
568
+ "capability_unavailable",
569
+ "[local-inference] Active local backend does not implement TEXT_EMBEDDING",
570
+ );
571
+ }
572
+ const input = ensureNonEmptyText(
573
+ ModelType.TEXT_EMBEDDING,
574
+ extractEmbeddingText(params),
575
+ );
576
+ return normalizeEmbeddingResult(await service.embed({ input }));
577
+ };
578
+ }
579
+
580
+ function createTextToSpeechHandler() {
581
+ return async (
582
+ runtime: IAgentRuntime,
583
+ params: TextToSpeechParams | string,
584
+ ): Promise<Uint8Array | AudioStreamResult> => {
585
+ const service = requireService(runtime, ModelType.TEXT_TO_SPEECH);
586
+ const text = ensureNonEmptyText(
587
+ ModelType.TEXT_TO_SPEECH,
588
+ extractSpeechText(params),
589
+ );
590
+ const signal = extractSpeechSignal(params);
591
+ // Explicit opt-in (NOT the generic `stream` useModel injects from an
592
+ // ambient text-streaming turn) so byte-expecting callers keep a buffer.
593
+ const wantsStream =
594
+ typeof params === "object" &&
595
+ params !== null &&
596
+ (params as { audioStream?: boolean }).audioStream === true;
597
+
598
+ // Real chunked streaming when the backend implements the seam.
599
+ if (wantsStream && typeof service.synthesizeSpeechStream === "function") {
600
+ return streamingAudioStreamResult(
601
+ service.synthesizeSpeechStream(text, signal),
602
+ LOCAL_TTS_MIME,
603
+ );
604
+ }
605
+
606
+ const synthesizeBuffered = async (): Promise<Uint8Array> => {
607
+ if (typeof service.synthesizeSpeech === "function") {
608
+ return normalizeAudioBytes(
609
+ await service.synthesizeSpeech(text, signal),
610
+ );
611
+ }
612
+ if (typeof service.textToSpeech === "function") {
613
+ return normalizeAudioBytes(
614
+ await service.textToSpeech({ text, ...(signal ? { signal } : {}) }),
615
+ );
616
+ }
617
+ throw unavailable(
618
+ ModelType.TEXT_TO_SPEECH,
619
+ "capability_unavailable",
620
+ "[local-inference] Active local backend does not implement TEXT_TO_SPEECH",
621
+ );
622
+ };
623
+
624
+ const bytes = await synthesizeBuffered();
625
+ // Streaming asked but no streaming backend — satisfy the contract with a
626
+ // single chunk so consumers use one code path for cloud + local.
627
+ return wantsStream
628
+ ? bufferedAudioStreamResult(bytes, LOCAL_TTS_MIME)
629
+ : bytes;
630
+ };
631
+ }
632
+
633
+ function createTranscriptionHandler() {
634
+ return async (
635
+ runtime: IAgentRuntime,
636
+ params: TranscriptionParams | Buffer | string | unknown,
637
+ ): Promise<string> => {
638
+ const service = requireService(runtime, ModelType.TRANSCRIPTION);
639
+ const signal = extractTranscriptionSignal(params);
640
+ throwIfAborted(signal);
641
+ if (typeof service.transcribe === "function") {
642
+ const transcript = normalizeTranscript(await service.transcribe(params));
643
+ throwIfAborted(signal);
644
+ return transcript;
645
+ }
646
+ if (typeof service.transcribePcm === "function") {
647
+ const pcmParams = extractPcmTranscriptionParams(params);
648
+ const transcript = normalizeTranscript(
649
+ await (signal
650
+ ? service.transcribePcm(pcmParams, signal)
651
+ : service.transcribePcm(pcmParams)),
652
+ );
653
+ throwIfAborted(signal);
654
+ return transcript;
655
+ }
656
+ throw unavailable(
657
+ ModelType.TRANSCRIPTION,
658
+ "capability_unavailable",
659
+ "[local-inference] Active local backend does not implement TRANSCRIPTION",
660
+ );
661
+ };
662
+ }
663
+
664
+ /**
665
+ * Arbiter accessor shape used by the IMAGE_DESCRIPTION handler. Two
666
+ * call paths converge here:
667
+ *
668
+ * (a) The WS2 arbiter path. When the loader service exposes
669
+ * `getMemoryArbiter()` AND that arbiter has the `vision-describe`
670
+ * capability registered, IMAGE_DESCRIPTION dispatches through
671
+ * `arbiter.requestVisionDescribe(...)`.
672
+ *
673
+ * (b) Legacy `service.describeImage(...)` / `service.imageDescription`.
674
+ * Pre-WS2 callers (the AOSP bootstrap, Florence-2 LocalAIManager)
675
+ * still hit this fallback.
676
+ */
677
+ interface ArbiterLike {
678
+ hasCapability?: (capability: string) => boolean;
679
+ requestVisionDescribe?: <Req, Res>(req: {
680
+ modelKey: string;
681
+ payload: Req;
682
+ }) => Promise<Res>;
683
+ requestImageGen?: <Req, Res>(req: {
684
+ modelKey: string;
685
+ payload: Req;
686
+ }) => Promise<Res>;
687
+ }
688
+
689
+ function tryGetArbiter(
690
+ service: LocalInferenceRuntimeService | null,
691
+ ): ArbiterLike | null {
692
+ if (!service?.getMemoryArbiter) return null;
693
+ const arbiter = service.getMemoryArbiter();
694
+ if (!arbiter || typeof arbiter !== "object") return null;
695
+ const cand = arbiter as ArbiterLike;
696
+ if (
697
+ typeof cand.hasCapability === "function" &&
698
+ typeof cand.requestVisionDescribe === "function" &&
699
+ cand.hasCapability("vision-describe")
700
+ ) {
701
+ return cand;
702
+ }
703
+ return null;
704
+ }
705
+
706
+ function tryGetImageGenArbiter(
707
+ service: LocalInferenceRuntimeService | null,
708
+ ): ArbiterLike | null {
709
+ if (!service?.getMemoryArbiter) return null;
710
+ const arbiter = service.getMemoryArbiter();
711
+ if (!arbiter || typeof arbiter !== "object") return null;
712
+ const cand = arbiter as ArbiterLike;
713
+ if (
714
+ typeof cand.hasCapability === "function" &&
715
+ typeof cand.requestImageGen === "function" &&
716
+ cand.hasCapability("image-gen")
717
+ ) {
718
+ return cand;
719
+ }
720
+ return null;
721
+ }
722
+
723
+ function paramsToVisionRequest(params: ImageDescriptionParams | string): {
724
+ image: { kind: "dataUrl"; dataUrl: string } | { kind: "url"; url: string };
725
+ prompt?: string;
726
+ } {
727
+ const url = typeof params === "string" ? params : params.imageUrl;
728
+ if (typeof url !== "string" || !url) {
729
+ throw unavailable(
730
+ ModelType.IMAGE_DESCRIPTION,
731
+ "invalid_input",
732
+ "[local-inference] IMAGE_DESCRIPTION requires a non-empty imageUrl",
733
+ );
734
+ }
735
+ const prompt = typeof params === "object" ? params.prompt : undefined;
736
+ if (url.startsWith("data:")) {
737
+ return {
738
+ image: { kind: "dataUrl", dataUrl: url },
739
+ prompt,
740
+ };
741
+ }
742
+ return {
743
+ image: { kind: "url", url },
744
+ prompt,
745
+ };
746
+ }
747
+
748
+ /**
749
+ * Runtime setting marker that plugin-vision's `hasEliza1VisionHandler`
750
+ * polls. Setting this to `"1"` makes VisionService prefer the eliza-1
751
+ * IMAGE_DESCRIPTION handler over local Florence-2. We set it the first
752
+ * time the handler runs against an arbiter that has the
753
+ * `vision-describe` capability registered, so the marker reflects
754
+ * actual capability rather than plugin presence.
755
+ */
756
+ const ELIZA1_VISION_MARKER = "ELIZA1_VISION_HANDLER_PRESENT";
757
+
758
+ function markEliza1VisionHandlerPresent(runtime: IAgentRuntime): void {
759
+ const r = runtime as IAgentRuntime & {
760
+ setSetting?: (key: string, value: unknown) => void;
761
+ getSetting?: (key: string) => unknown;
762
+ };
763
+ if (typeof r.setSetting !== "function") return;
764
+ if (typeof r.getSetting === "function") {
765
+ const existing = r.getSetting(ELIZA1_VISION_MARKER);
766
+ if (existing === "1" || existing === true) return;
767
+ }
768
+ try {
769
+ r.setSetting(ELIZA1_VISION_MARKER, "1");
770
+ } catch {
771
+ // Some test runtimes don't accept setSetting at runtime — non-fatal.
772
+ }
773
+ }
774
+
775
+ function createImageDescriptionHandler() {
776
+ return async (
777
+ runtime: IAgentRuntime,
778
+ params: ImageDescriptionParams | string,
779
+ ): Promise<ImageDescriptionResult> => {
780
+ const service = requireService(runtime, ModelType.IMAGE_DESCRIPTION);
781
+ const arbiter = tryGetArbiter(service);
782
+ if (arbiter?.requestVisionDescribe) {
783
+ // WS2 path. The arbiter owns the model handle and the projector
784
+ // cache; we forward the request and let it dispatch.
785
+ markEliza1VisionHandlerPresent(runtime);
786
+ const modelKeyCandidate =
787
+ typeof params === "object"
788
+ ? (params as unknown as { modelKey?: unknown }).modelKey
789
+ : undefined;
790
+ const modelKey =
791
+ typeof modelKeyCandidate === "string" && modelKeyCandidate
792
+ ? modelKeyCandidate
793
+ : "qwen3-vl";
794
+ const request = paramsToVisionRequest(params);
795
+ const result = await arbiter.requestVisionDescribe<
796
+ typeof request,
797
+ ImageDescriptionResult | string
798
+ >({ modelKey, payload: request });
799
+ return normalizeImageDescription(result);
800
+ }
801
+ if (typeof service.describeImage === "function") {
802
+ return normalizeImageDescription(await service.describeImage(params));
803
+ }
804
+ if (typeof service.imageDescription === "function") {
805
+ return normalizeImageDescription(await service.imageDescription(params));
806
+ }
807
+ throw unavailable(
808
+ ModelType.IMAGE_DESCRIPTION,
809
+ "capability_unavailable",
810
+ "[local-inference] Active local backend does not implement IMAGE_DESCRIPTION",
811
+ );
812
+ };
813
+ }
814
+
815
+ /**
816
+ * Image-gen request shape the WS3 arbiter capability accepts. Mirrors
817
+ * `ImageGenRequest` from `./services/imagegen/types` without importing
818
+ * the full module here — we want this provider file to stay free of a
819
+ * hard dependency on the imagegen subpackage so the type surface
820
+ * doesn't reach across plugins.
821
+ */
822
+ interface ProviderImageGenRequest {
823
+ prompt: string;
824
+ negativePrompt?: string;
825
+ width?: number;
826
+ height?: number;
827
+ steps?: number;
828
+ guidanceScale?: number;
829
+ seed?: number;
830
+ scheduler?: string;
831
+ signal?: AbortSignal;
832
+ }
833
+
834
+ interface ProviderImageGenResult {
835
+ image: Uint8Array;
836
+ mime: "image/png" | "image/jpeg";
837
+ seed: number;
838
+ metadata: {
839
+ model: string;
840
+ prompt: string;
841
+ steps: number;
842
+ guidanceScale: number;
843
+ inferenceTimeMs: number;
844
+ };
845
+ }
846
+
847
+ function paramsToImageGenRequest(
848
+ params: ImageGenerationParams,
849
+ ): ProviderImageGenRequest {
850
+ if (typeof params.prompt !== "string" || !params.prompt.trim()) {
851
+ throw unavailable(
852
+ ModelType.IMAGE,
853
+ "invalid_input",
854
+ "[local-inference] IMAGE requires a non-empty prompt",
855
+ );
856
+ }
857
+ const out: ProviderImageGenRequest = { prompt: params.prompt };
858
+ if (typeof params.size === "string" && /^\d+x\d+$/i.test(params.size)) {
859
+ const [w, h] = params.size
860
+ .toLowerCase()
861
+ .split("x")
862
+ .map((n) => Number(n));
863
+ if (Number.isFinite(w) && w > 0) out.width = w;
864
+ if (Number.isFinite(h) && h > 0) out.height = h;
865
+ }
866
+ // Forward optional extended knobs when callers pass them through
867
+ // the `ImageGenerationParams` extension fields. We intentionally
868
+ // don't enrich `ImageGenerationParams` in @elizaos/core for this —
869
+ // see "Hand-off" in the WS3 report.
870
+ const extended = params as ImageGenerationParams & {
871
+ negativePrompt?: unknown;
872
+ steps?: unknown;
873
+ guidanceScale?: unknown;
874
+ seed?: unknown;
875
+ scheduler?: unknown;
876
+ signal?: unknown;
877
+ };
878
+ if (typeof extended.negativePrompt === "string") {
879
+ out.negativePrompt = extended.negativePrompt;
880
+ }
881
+ if (typeof extended.steps === "number" && extended.steps > 0) {
882
+ out.steps = Math.floor(extended.steps);
883
+ }
884
+ if (
885
+ typeof extended.guidanceScale === "number" &&
886
+ extended.guidanceScale >= 0
887
+ ) {
888
+ out.guidanceScale = extended.guidanceScale;
889
+ }
890
+ if (typeof extended.seed === "number" && Number.isFinite(extended.seed)) {
891
+ out.seed = Math.floor(extended.seed);
892
+ }
893
+ if (typeof extended.scheduler === "string") {
894
+ out.scheduler = extended.scheduler;
895
+ }
896
+ if (extended.signal instanceof AbortSignal) {
897
+ out.signal = extended.signal;
898
+ }
899
+ return out;
900
+ }
901
+
902
+ function imageGenResultToUrls(
903
+ result: ProviderImageGenResult,
904
+ ): ImageGenerationResult[] {
905
+ if (!(result.image instanceof Uint8Array) || result.image.length === 0) {
906
+ throw unavailable(
907
+ ModelType.IMAGE,
908
+ "invalid_output",
909
+ "[local-inference] IMAGE backend returned an empty image buffer",
910
+ );
911
+ }
912
+ const mime = result.mime === "image/jpeg" ? "image/jpeg" : "image/png";
913
+ const base64 = Buffer.from(result.image).toString("base64");
914
+ return [{ url: `data:${mime};base64,${base64}` }];
915
+ }
916
+
917
+ function createImageGenerationHandler() {
918
+ return async (
919
+ runtime: IAgentRuntime,
920
+ params: ImageGenerationParams,
921
+ ): Promise<ImageGenerationResult[]> => {
922
+ const service = requireService(runtime, ModelType.IMAGE);
923
+ const arbiter = tryGetImageGenArbiter(service);
924
+ if (!arbiter?.requestImageGen) {
925
+ throw unavailable(
926
+ ModelType.IMAGE,
927
+ "capability_unavailable",
928
+ "[local-inference] IMAGE generation requires the WS3 arbiter image-gen capability. Register it via createImageGenCapabilityRegistration at plugin init.",
929
+ );
930
+ }
931
+ const request = paramsToImageGenRequest(params);
932
+ // The local-inference IMAGE handler only ever returns a single
933
+ // image — local diffusion runtimes serialize batch-1 by default,
934
+ // and an N>1 request would just be N back-to-back generates. We
935
+ // honour `params.count` by looping the request rather than
936
+ // pretending the backend supports batched output.
937
+ const count = Math.max(1, Math.min(8, params.count ?? 1));
938
+ // Resolve modelKey from the active tier the loader knows about.
939
+ // We prefer the optional `modelKey` extension; otherwise the
940
+ // runtime's active tier from `service.activeTier` / the
941
+ // `LOCAL_INFERENCE_ACTIVE_TIER` setting; otherwise the safe
942
+ // small-tier default. Callers that want to pin a specific
943
+ // diffusion model pass `modelKey` through the params extension.
944
+ const modelKeyCandidate = (
945
+ params as ImageGenerationParams & { modelKey?: unknown }
946
+ ).modelKey;
947
+ const modelKey =
948
+ typeof modelKeyCandidate === "string" && modelKeyCandidate
949
+ ? modelKeyCandidate
950
+ : resolveImageGenModelKeyFromRuntime(runtime);
951
+
952
+ const results: ImageGenerationResult[] = [];
953
+ for (let i = 0; i < count; i += 1) {
954
+ const seeded: ProviderImageGenRequest =
955
+ typeof request.seed === "number" && i > 0
956
+ ? { ...request, seed: request.seed + i }
957
+ : request;
958
+ const result = await arbiter.requestImageGen<
959
+ ProviderImageGenRequest,
960
+ ProviderImageGenResult
961
+ >({ modelKey, payload: seeded });
962
+ results.push(...imageGenResultToUrls(result));
963
+ }
964
+ return results;
965
+ };
966
+ }
967
+
968
+ /**
969
+ * Resolve the active tier-bound image-gen model id without importing
970
+ * the imagegen subpackage. We look at:
971
+ *
972
+ * 1. `runtime.getSetting("LOCAL_INFERENCE_IMAGE_MODEL_KEY")` — explicit pin.
973
+ * 2. `runtime.getSetting("LOCAL_INFERENCE_ACTIVE_TIER")` mapped through the
974
+ * same tier → default-model map that lives in `backend-selector.ts`.
975
+ * 3. Fall back to the small-tier default (`imagegen-sd-1_5-q5_0`).
976
+ */
977
+ function resolveImageGenModelKeyFromRuntime(runtime: IAgentRuntime): string {
978
+ const r = runtime as IAgentRuntime & {
979
+ getSetting?: (key: string) => unknown;
980
+ };
981
+ const pinned = r.getSetting("LOCAL_INFERENCE_IMAGE_MODEL_KEY");
982
+ if (typeof pinned === "string" && pinned.trim()) return pinned.trim();
983
+ const tier = r.getSetting("LOCAL_INFERENCE_ACTIVE_TIER");
984
+ if (typeof tier === "string" && tier.trim()) {
985
+ const mapped = TIER_TO_DEFAULT_IMAGE_MODEL_KEY[tier.trim()];
986
+ if (mapped) return mapped;
987
+ }
988
+ return "imagegen-sd-1_5-q5_0";
989
+ }
990
+
991
+ /**
992
+ * Inlined tier → default image-gen model id map. Duplicates the
993
+ * `TIER_TO_DEFAULT_IMAGE_MODEL` entries in `backend-selector.ts` —
994
+ * provider.ts intentionally avoids importing the imagegen subpackage
995
+ * so the provider stays loadable on runtimes that don't ship
996
+ * the WS3 capability. The two maps are kept in sync by the WS3
997
+ * routing test (`imagegen-routing.test.ts`).
998
+ */
999
+ const TIER_TO_DEFAULT_IMAGE_MODEL_KEY: Readonly<Record<string, string>> = {
1000
+ "eliza-1-2b": "imagegen-sd-1_5-q5_0",
1001
+ "eliza-1-4b": "imagegen-sd-1_5-q5_0",
1002
+ "eliza-1-9b": "imagegen-z-image-turbo-q4_k_m",
1003
+ "eliza-1-27b": "imagegen-z-image-turbo-q4_k_m",
1004
+ "eliza-1-27b-256k": "imagegen-z-image-turbo-q4_k_m",
1005
+ };
1006
+
1007
+ export function createLocalInferenceModelHandlers(): NonNullable<
1008
+ Plugin["models"]
1009
+ > {
1010
+ return {
1011
+ [ModelType.TEXT_SMALL]: createTextHandler(ModelType.TEXT_SMALL),
1012
+ [ModelType.TEXT_LARGE]: createTextHandler(ModelType.TEXT_LARGE),
1013
+ [ModelType.TEXT_EMBEDDING]: createEmbeddingHandler(),
1014
+ [ModelType.IMAGE]: createImageGenerationHandler(),
1015
+ [ModelType.IMAGE_DESCRIPTION]: createImageDescriptionHandler(),
1016
+ [ModelType.TEXT_TO_SPEECH]: createTextToSpeechHandler(),
1017
+ [ModelType.TRANSCRIPTION]: createTranscriptionHandler(),
1018
+ };
1019
+ }
1020
+
1021
+ function createStaticPluginModelHandlers(): NonNullable<Plugin["models"]> {
1022
+ const { [ModelType.TEXT_EMBEDDING]: _embedding, ...handlers } =
1023
+ createLocalInferenceModelHandlers();
1024
+ return handlers;
1025
+ }
1026
+
1027
+ export const localInferencePlugin: Plugin = {
1028
+ name: LOCAL_INFERENCE_PROVIDER_ID,
1029
+ description:
1030
+ "Eliza-1 local provider for text, embeddings, text-to-speech, and transcription.",
1031
+ priority: LOCAL_INFERENCE_PRIORITY,
1032
+ actions: [
1033
+ generateMediaAction,
1034
+ identifySpeakerAction,
1035
+ startTranscriptionAction,
1036
+ stopTranscriptionAction,
1037
+ ],
1038
+ events: {
1039
+ // Round-trip half of the voice→entity binding: when the merge engine
1040
+ // (plugin-lifeops) reports a binding, persist entityId onto the matching
1041
+ // voice profile(s). See runtime/voice-entity-binding.ts.
1042
+ [EventType.VOICE_ENTITY_BOUND]: [handleVoiceEntityBound],
1043
+ },
1044
+ // Voice-profile HTTP surface (speaker→entity bind/unbind + the
1045
+ // VoiceProfileSection management UI). Registered as rawPath plugin routes
1046
+ // because no server forwards these namespaces to the local-inference
1047
+ // route dispatcher. See routes/voice-profile-plugin-routes.ts.
1048
+ routes: [...voiceProfilePluginRoutes, ...transcriptsRoutes],
1049
+ // TEXT_EMBEDDING is wired by ensureLocalInferenceHandler(), not the static
1050
+ // plugin object. Runtime bootstrap probes embeddings before the user has
1051
+ // activated an Eliza-1 bundle; registering the static handler there claims a
1052
+ // provider that cannot embed yet and aborts startup instead of letting the
1053
+ // app come online.
1054
+ models: createStaticPluginModelHandlers(),
1055
+ async init(_config: unknown, runtime: IAgentRuntime) {
1056
+ const service = serviceFromRuntime(runtime);
1057
+ if (!service) {
1058
+ logger.info(
1059
+ "[local-inference] Provider registered; no active backend service is exposed yet. Model calls will return LOCAL_INFERENCE_UNAVAILABLE until an Eliza-1 backend is activated.",
1060
+ );
1061
+ return;
1062
+ }
1063
+ logger.info(
1064
+ {
1065
+ generate: typeof service.generate === "function",
1066
+ embed: typeof service.embed === "function",
1067
+ textToSpeech:
1068
+ typeof service.synthesizeSpeech === "function" ||
1069
+ typeof service.textToSpeech === "function",
1070
+ imageDescription:
1071
+ typeof service.describeImage === "function" ||
1072
+ typeof service.imageDescription === "function",
1073
+ transcription:
1074
+ typeof service.transcribe === "function" ||
1075
+ typeof service.transcribePcm === "function",
1076
+ },
1077
+ "[local-inference] Provider connected to runtime backend service",
1078
+ );
1079
+ },
1080
+ };
1081
+
1082
+ export default localInferencePlugin;