@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,647 @@
1
+ /**
2
+ * @module plugin-local-inference/actions/generate-media
3
+ *
4
+ * Unified `GENERATE_MEDIA` agent action.
5
+ *
6
+ * Routes a single user-facing intent to the appropriate `runtime.useModel(...)`
7
+ * call:
8
+ *
9
+ * - image → `ModelType.IMAGE` (WS3 arbiter, returns PNG bytes)
10
+ * - audio → `ModelType.TEXT_TO_SPEECH` (Eliza-1 / local TTS, returns PCM/WAV/MP3)
11
+ * - video → unavailable in the local backend; refuses with a clean message
12
+ *
13
+ * Intent classification is keyword-first (cheap, deterministic) with an
14
+ * optional `ModelType.TEXT_SMALL` JSON fallback for ambiguous prompts. The
15
+ * prompt body is extracted by stripping any leading imperative ("draw me a ",
16
+ * "say ", "speak in spanish: ") so that downstream models see a clean
17
+ * description.
18
+ *
19
+ * Trajectory hook: result `data.computerUseAction` is set to a stable
20
+ * marker (`GENERATE_MEDIA_IMAGE` / `GENERATE_MEDIA_AUDIO`) so the trajectory
21
+ * logger picks the action up exactly the way it picks up
22
+ * `plugin-computeruse` actions.
23
+ */
24
+
25
+ import {
26
+ type Action,
27
+ type ActionResult,
28
+ type Content,
29
+ ContentType,
30
+ type HandlerCallback,
31
+ type IAgentRuntime,
32
+ type ImageGenerationResult,
33
+ logger,
34
+ type Media,
35
+ type Memory,
36
+ ModelType,
37
+ } from "@elizaos/core";
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // Intent classification
41
+ // ---------------------------------------------------------------------------
42
+
43
+ export type MediaKind = "image" | "audio" | "video";
44
+
45
+ interface IntentDetection {
46
+ kind: MediaKind;
47
+ prompt: string;
48
+ source: "keyword" | "classifier";
49
+ }
50
+
51
+ interface KeywordRule {
52
+ kind: MediaKind;
53
+ pattern: RegExp;
54
+ /** Explicit strip pattern to remove the leading imperative. */
55
+ strip: RegExp;
56
+ }
57
+
58
+ /**
59
+ * Keyword rules, ordered most-specific-first. Matching is case-insensitive
60
+ * and anchored to the start of a sanitized prompt (after lowercase + trim).
61
+ * Each rule maps to a media kind and optionally strips a leading imperative
62
+ * from the prompt before dispatch.
63
+ */
64
+ const KEYWORD_RULES: readonly KeywordRule[] = [
65
+ // Image rules (most-common first).
66
+ {
67
+ kind: "image",
68
+ pattern: /\b(draw|sketch|paint|illustrate)\b/i,
69
+ strip:
70
+ /^\s*(please\s+)?(draw|sketch|paint|illustrate)(\s+me)?(\s+an?)?\s+(of\s+)?/i,
71
+ },
72
+ {
73
+ kind: "image",
74
+ pattern:
75
+ /\b(generate|create|make)\s+(an?\s+|the\s+)?(image|picture|photo|photograph|drawing|illustration)\b/i,
76
+ strip:
77
+ /^\s*(please\s+)?(generate|create|make)\s+(an?\s+|the\s+)?(image|picture|photo|photograph|drawing|illustration)(\s+of)?\s*/i,
78
+ },
79
+ {
80
+ kind: "image",
81
+ pattern: /\b(image|picture|photo|photograph)\s+of\b/i,
82
+ strip: /^\s*(an?\s+|the\s+)?(image|picture|photo|photograph)\s+of\s+/i,
83
+ },
84
+ {
85
+ kind: "image",
86
+ pattern: /\brender\b/i,
87
+ strip: /^\s*(please\s+)?render(\s+me)?(\s+an?)?\s+(of\s+)?/i,
88
+ },
89
+ // Audio rules.
90
+ {
91
+ kind: "audio",
92
+ pattern: /\b(say|speak|read\s+aloud|read\s+out|narrate)\b/i,
93
+ strip:
94
+ /^\s*(please\s+)?(say|speak|read\s+aloud|read\s+out|narrate)(\s+aloud)?(\s+this)?(\s+in\s+\w+)?[:,]?\s+/i,
95
+ },
96
+ {
97
+ kind: "audio",
98
+ pattern: /\b(text\s*to\s*speech|tts|voice\s+this|voice\s+over)\b/i,
99
+ strip:
100
+ /^\s*(please\s+)?(do\s+)?(text\s*to\s*speech|tts|voice\s+this|voice\s+over)[:,]?\s*/i,
101
+ },
102
+ {
103
+ kind: "audio",
104
+ pattern: /\bgenerate\s+(an?\s+|some\s+)?(audio|speech|voice)\b/i,
105
+ strip:
106
+ /^\s*(please\s+)?generate\s+(an?\s+|some\s+)?(audio|speech|voice)\s+(of|for|saying)?\s*/i,
107
+ },
108
+ // Video rules. We detect them only to refuse cleanly.
109
+ {
110
+ kind: "video",
111
+ pattern: /\b(video|animate|animation|movie|clip)\b/i,
112
+ strip:
113
+ /^\s*(please\s+)?(generate|create|make|render)?\s*(an?\s+|the\s+)?(video|animation|movie|clip)(\s+of)?\s*/i,
114
+ },
115
+ ];
116
+
117
+ type ClassifierFn = (prompt: string) => Promise<MediaKind | null>;
118
+
119
+ export interface IntentDetectorOptions {
120
+ /**
121
+ * Optional override for the text-classifier fallback. Tests inject a
122
+ * deterministic classifier; in production this is bound to
123
+ * `runtime.useModel(ModelType.TEXT_SMALL, ...)`.
124
+ */
125
+ classifier?: ClassifierFn;
126
+ }
127
+
128
+ function stripPrompt(rule: KeywordRule, text: string): string {
129
+ return text.replace(rule.strip, "").trim();
130
+ }
131
+
132
+ function tryKeywordMatch(text: string): IntentDetection | null {
133
+ const trimmed = text.trim();
134
+ if (!trimmed) return null;
135
+ for (const rule of KEYWORD_RULES) {
136
+ if (rule.pattern.test(trimmed)) {
137
+ const prompt = stripPrompt(rule, trimmed);
138
+ return {
139
+ kind: rule.kind,
140
+ prompt: prompt || trimmed,
141
+ source: "keyword",
142
+ };
143
+ }
144
+ }
145
+ return null;
146
+ }
147
+
148
+ /**
149
+ * Detect the media intent from a user message.
150
+ *
151
+ * Algorithm:
152
+ * 1. Try keyword rules first (cheap, deterministic).
153
+ * 2. If nothing matched and a classifier is provided, ask it for a JSON
154
+ * label. Trust the classifier only when it returns one of our three
155
+ * kinds; otherwise return `null` so the caller can decline.
156
+ */
157
+ export async function detectMediaIntent(
158
+ text: string,
159
+ options: IntentDetectorOptions = {},
160
+ ): Promise<IntentDetection | null> {
161
+ const keyword = tryKeywordMatch(text);
162
+ if (keyword) return keyword;
163
+ if (!options.classifier) return null;
164
+ const label = await options.classifier(text);
165
+ if (label === "image" || label === "audio" || label === "video") {
166
+ return { kind: label, prompt: text.trim(), source: "classifier" };
167
+ }
168
+ return null;
169
+ }
170
+
171
+ const CLASSIFIER_INSTRUCTION = [
172
+ "Classify the following user message into exactly one media kind:",
173
+ ' - "image" if the user wants a picture, drawing, photo, or rendering.',
174
+ ' - "audio" if the user wants speech, narration, or text-to-speech output.',
175
+ ' - "video" if the user wants a video, animation, or motion clip.',
176
+ 'Respond with ONLY a JSON object of the form {"kind":"image"} (one key).',
177
+ 'If the request is none of these, respond with {"kind":"none"}.',
178
+ "",
179
+ "User message:",
180
+ ].join("\n");
181
+
182
+ function parseClassifierOutput(raw: string): MediaKind | null {
183
+ const trimmed = raw.trim();
184
+ if (!trimmed) return null;
185
+ const match = trimmed.match(/\{[\s\S]*\}/);
186
+ if (!match) return null;
187
+ let parsed: unknown;
188
+ try {
189
+ parsed = JSON.parse(match[0]);
190
+ } catch {
191
+ return null;
192
+ }
193
+ if (!parsed || typeof parsed !== "object") return null;
194
+ const kind = (parsed as { kind?: unknown }).kind;
195
+ if (kind === "image" || kind === "audio" || kind === "video") return kind;
196
+ return null;
197
+ }
198
+
199
+ function makeRuntimeClassifier(runtime: IAgentRuntime): ClassifierFn {
200
+ return async (prompt) => {
201
+ const response = await runtime.useModel(ModelType.TEXT_SMALL, {
202
+ prompt: `${CLASSIFIER_INSTRUCTION}${prompt}`,
203
+ maxTokens: 32,
204
+ temperature: 0,
205
+ });
206
+ return parseClassifierOutput(response);
207
+ };
208
+ }
209
+
210
+ // ---------------------------------------------------------------------------
211
+ // Dispatch
212
+ // ---------------------------------------------------------------------------
213
+
214
+ interface DispatchSuccessImage {
215
+ kind: "image";
216
+ bytes: Uint8Array;
217
+ mime: "image/png" | "image/jpeg";
218
+ url: string;
219
+ }
220
+
221
+ interface DispatchSuccessAudio {
222
+ kind: "audio";
223
+ bytes: Uint8Array;
224
+ mime: "audio/wav" | "audio/mpeg" | "audio/pcm";
225
+ url: string;
226
+ }
227
+
228
+ type DispatchSuccess = DispatchSuccessImage | DispatchSuccessAudio;
229
+
230
+ function normalizeAudioBytes(value: unknown): Uint8Array {
231
+ if (value instanceof Uint8Array) {
232
+ return new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
233
+ }
234
+ if (value instanceof ArrayBuffer) {
235
+ return new Uint8Array(value);
236
+ }
237
+ if (
238
+ typeof Buffer !== "undefined" &&
239
+ value !== null &&
240
+ typeof value === "object" &&
241
+ value instanceof Buffer
242
+ ) {
243
+ return new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
244
+ }
245
+ throw new Error(
246
+ "[generate-media] TEXT_TO_SPEECH backend returned non-binary audio output",
247
+ );
248
+ }
249
+
250
+ function detectAudioMime(bytes: Uint8Array): DispatchSuccessAudio["mime"] {
251
+ if (bytes.length >= 4) {
252
+ // "RIFF" header → WAV.
253
+ if (
254
+ bytes[0] === 0x52 &&
255
+ bytes[1] === 0x49 &&
256
+ bytes[2] === 0x46 &&
257
+ bytes[3] === 0x46
258
+ ) {
259
+ return "audio/wav";
260
+ }
261
+ // "ID3" → MP3 with ID3 tag.
262
+ if (bytes[0] === 0x49 && bytes[1] === 0x44 && bytes[2] === 0x33) {
263
+ return "audio/mpeg";
264
+ }
265
+ // MP3 frame sync (0xFFE0 mask).
266
+ if (bytes[0] === 0xff && (bytes[1] & 0xe0) === 0xe0) {
267
+ return "audio/mpeg";
268
+ }
269
+ }
270
+ return "audio/pcm";
271
+ }
272
+
273
+ function detectImageMimeFromDataUrl(
274
+ url: string,
275
+ ): { mime: "image/png" | "image/jpeg"; bytes: Uint8Array } | null {
276
+ const match = url.match(/^data:(image\/(?:png|jpeg));base64,(.*)$/);
277
+ if (!match) return null;
278
+ const mime = match[1] === "image/jpeg" ? "image/jpeg" : "image/png";
279
+ const bytes = new Uint8Array(Buffer.from(match[2], "base64"));
280
+ return { mime, bytes };
281
+ }
282
+
283
+ async function dispatchImage(
284
+ runtime: IAgentRuntime,
285
+ prompt: string,
286
+ ): Promise<DispatchSuccessImage> {
287
+ const results = (await runtime.useModel(ModelType.IMAGE, {
288
+ prompt,
289
+ count: 1,
290
+ })) as ImageGenerationResult[];
291
+ const first = Array.isArray(results) ? results[0] : null;
292
+ if (!first || typeof first.url !== "string" || first.url.length === 0) {
293
+ throw new Error(
294
+ "[generate-media] IMAGE backend returned no result; expected ImageGenerationResult[]",
295
+ );
296
+ }
297
+ const parsed = detectImageMimeFromDataUrl(first.url);
298
+ if (parsed) {
299
+ return {
300
+ kind: "image",
301
+ bytes: parsed.bytes,
302
+ mime: parsed.mime,
303
+ url: first.url,
304
+ };
305
+ }
306
+ // Backend returned a non-data URL (e.g. CDN). Surface as-is; consumers
307
+ // can fetch it. Bytes are absent in that path.
308
+ return {
309
+ kind: "image",
310
+ bytes: new Uint8Array(0),
311
+ mime: "image/png",
312
+ url: first.url,
313
+ };
314
+ }
315
+
316
+ async function dispatchAudio(
317
+ runtime: IAgentRuntime,
318
+ text: string,
319
+ ): Promise<DispatchSuccessAudio> {
320
+ const raw = (await runtime.useModel(ModelType.TEXT_TO_SPEECH, {
321
+ text,
322
+ })) as unknown;
323
+ const bytes = normalizeAudioBytes(raw);
324
+ if (bytes.length === 0) {
325
+ throw new Error(
326
+ "[generate-media] TEXT_TO_SPEECH backend returned an empty buffer",
327
+ );
328
+ }
329
+ const mime = detectAudioMime(bytes);
330
+ const base64 = Buffer.from(bytes).toString("base64");
331
+ return {
332
+ kind: "audio",
333
+ bytes,
334
+ mime,
335
+ url: `data:${mime};base64,${base64}`,
336
+ };
337
+ }
338
+
339
+ // ---------------------------------------------------------------------------
340
+ // Attachment shaping
341
+ // ---------------------------------------------------------------------------
342
+
343
+ function uuidLike(): string {
344
+ if (
345
+ typeof crypto !== "undefined" &&
346
+ typeof crypto.randomUUID === "function"
347
+ ) {
348
+ return crypto.randomUUID();
349
+ }
350
+ // Fallback for environments without crypto.randomUUID (shouldn't happen
351
+ // on Node 20+); produces a stable shape so attachment IDs stay unique.
352
+ return `gen-media-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
353
+ }
354
+
355
+ function makeAttachment(success: DispatchSuccess, prompt: string): Media {
356
+ const contentType =
357
+ success.kind === "image" ? ContentType.IMAGE : ContentType.AUDIO;
358
+ return {
359
+ id: uuidLike(),
360
+ url: success.url,
361
+ title: success.kind === "image" ? "Generated image" : "Generated audio",
362
+ source: "generate-media",
363
+ description: prompt,
364
+ contentType,
365
+ };
366
+ }
367
+
368
+ // ---------------------------------------------------------------------------
369
+ // Message-text extraction
370
+ // ---------------------------------------------------------------------------
371
+
372
+ function extractMessageText(message: Memory | null | undefined): string {
373
+ const text = message?.content?.text;
374
+ return typeof text === "string" ? text : "";
375
+ }
376
+
377
+ // ---------------------------------------------------------------------------
378
+ // Action definition
379
+ // ---------------------------------------------------------------------------
380
+
381
+ interface BuildHandlerOptions {
382
+ /** Test seam: override the intent detector. */
383
+ detectIntent: typeof detectMediaIntent;
384
+ /** Test seam: override the classifier resolver. */
385
+ classifierFactory: (runtime: IAgentRuntime) => ClassifierFn;
386
+ }
387
+
388
+ export function buildGenerateMediaHandler(
389
+ opts: Partial<BuildHandlerOptions> = {},
390
+ ) {
391
+ const detect = opts.detectIntent ?? detectMediaIntent;
392
+ const classifierFactory = opts.classifierFactory ?? makeRuntimeClassifier;
393
+ return async function generateMediaHandler(
394
+ runtime: IAgentRuntime,
395
+ message: Memory,
396
+ _state?: unknown,
397
+ _options?: unknown,
398
+ callback?: HandlerCallback,
399
+ ): Promise<ActionResult> {
400
+ const raw = extractMessageText(message);
401
+ if (!raw.trim()) {
402
+ const errText =
403
+ "GENERATE_MEDIA requires a non-empty message describing what to generate.";
404
+ await callback?.({ text: errText });
405
+ return {
406
+ success: false,
407
+ text: errText,
408
+ error: errText,
409
+ data: {
410
+ source: "generate-media",
411
+ computerUseAction: "GENERATE_MEDIA_INVALID",
412
+ },
413
+ };
414
+ }
415
+
416
+ let intent: IntentDetection | null;
417
+ try {
418
+ intent = await detect(raw, { classifier: classifierFactory(runtime) });
419
+ } catch (err) {
420
+ const msg = err instanceof Error ? err.message : String(err);
421
+ logger.warn({ err: msg }, "[generate-media] intent detection failed");
422
+ const errText = `Could not classify media request: ${msg}`;
423
+ await callback?.({ text: errText });
424
+ return {
425
+ success: false,
426
+ text: errText,
427
+ error: errText,
428
+ data: {
429
+ source: "generate-media",
430
+ computerUseAction: "GENERATE_MEDIA_CLASSIFY_FAILED",
431
+ },
432
+ };
433
+ }
434
+
435
+ if (!intent) {
436
+ const errText =
437
+ 'I couldn\'t tell whether you wanted an image, audio, or video. Try "draw me ...", "say ...", or describe the picture you want.';
438
+ await callback?.({ text: errText });
439
+ return {
440
+ success: false,
441
+ text: errText,
442
+ error: errText,
443
+ data: {
444
+ source: "generate-media",
445
+ computerUseAction: "GENERATE_MEDIA_AMBIGUOUS",
446
+ },
447
+ };
448
+ }
449
+
450
+ if (intent.kind === "video") {
451
+ const errText =
452
+ "Video generation is unavailable in the local inference backend.";
453
+ await callback?.({ text: errText });
454
+ return {
455
+ success: false,
456
+ text: errText,
457
+ error: errText,
458
+ data: {
459
+ source: "generate-media",
460
+ computerUseAction: "GENERATE_MEDIA_VIDEO_UNSUPPORTED",
461
+ detectedKind: intent.kind,
462
+ detectedSource: intent.source,
463
+ },
464
+ };
465
+ }
466
+
467
+ if (!intent.prompt) {
468
+ const errText = `Detected a ${intent.kind} request but couldn't extract a prompt.`;
469
+ await callback?.({ text: errText });
470
+ return {
471
+ success: false,
472
+ text: errText,
473
+ error: errText,
474
+ data: {
475
+ source: "generate-media",
476
+ computerUseAction: "GENERATE_MEDIA_EMPTY_PROMPT",
477
+ detectedKind: intent.kind,
478
+ },
479
+ };
480
+ }
481
+
482
+ try {
483
+ const result =
484
+ intent.kind === "image"
485
+ ? await dispatchImage(runtime, intent.prompt)
486
+ : await dispatchAudio(runtime, intent.prompt);
487
+ const attachment = makeAttachment(result, intent.prompt);
488
+ const narration =
489
+ result.kind === "image"
490
+ ? "Here's the image you asked for."
491
+ : "Here's the audio you asked for.";
492
+ const responseContent: Content = {
493
+ text: narration,
494
+ attachments: [attachment],
495
+ source: "generate-media",
496
+ };
497
+ await callback?.(responseContent);
498
+ return {
499
+ success: true,
500
+ text: narration,
501
+ userFacingText: narration,
502
+ values: {
503
+ mediaKind: result.kind,
504
+ mediaMime: result.mime,
505
+ },
506
+ data: {
507
+ source: "generate-media",
508
+ computerUseAction:
509
+ result.kind === "image"
510
+ ? "GENERATE_MEDIA_IMAGE"
511
+ : "GENERATE_MEDIA_AUDIO",
512
+ detectedKind: intent.kind,
513
+ detectedSource: intent.source,
514
+ prompt: intent.prompt,
515
+ mime: result.mime,
516
+ byteLength: result.bytes.byteLength,
517
+ attachmentId: attachment.id,
518
+ attachmentUrl: attachment.url,
519
+ },
520
+ };
521
+ } catch (err) {
522
+ const msg = err instanceof Error ? err.message : String(err);
523
+ logger.warn(
524
+ { err: msg, kind: intent.kind },
525
+ "[generate-media] dispatch failed",
526
+ );
527
+ const errText =
528
+ intent.kind === "image"
529
+ ? `Image generation failed: ${msg}`
530
+ : `Audio generation failed: ${msg}`;
531
+ await callback?.({ text: errText });
532
+ return {
533
+ success: false,
534
+ text: errText,
535
+ error: err instanceof Error ? err : new Error(msg),
536
+ data: {
537
+ source: "generate-media",
538
+ computerUseAction:
539
+ intent.kind === "image"
540
+ ? "GENERATE_MEDIA_IMAGE_FAILED"
541
+ : "GENERATE_MEDIA_AUDIO_FAILED",
542
+ detectedKind: intent.kind,
543
+ detectedSource: intent.source,
544
+ prompt: intent.prompt,
545
+ },
546
+ };
547
+ }
548
+ };
549
+ }
550
+
551
+ // ---------------------------------------------------------------------------
552
+ // Validator
553
+ // ---------------------------------------------------------------------------
554
+
555
+ async function validate(
556
+ _runtime: IAgentRuntime,
557
+ message: Memory,
558
+ ): Promise<boolean> {
559
+ // Cheap pre-check: any non-empty text message is a candidate. The
560
+ // keyword + classifier run inside the handler so the planner can pick
561
+ // GENERATE_MEDIA without paying the classifier cost upfront.
562
+ return extractMessageText(message).trim().length > 0;
563
+ }
564
+
565
+ // ---------------------------------------------------------------------------
566
+ // Examples
567
+ // ---------------------------------------------------------------------------
568
+
569
+ export const generateMediaAction: Action = {
570
+ name: "GENERATE_MEDIA",
571
+ similes: [
572
+ "DRAW_IMAGE",
573
+ "MAKE_PICTURE",
574
+ "CREATE_IMAGE",
575
+ "RENDER_IMAGE",
576
+ "SPEAK",
577
+ "SAY_ALOUD",
578
+ "TEXT_TO_SPEECH",
579
+ "GENERATE_AUDIO",
580
+ "GENERATE_VIDEO",
581
+ ],
582
+ description:
583
+ "Generate an image, audio (TTS), or video from a natural-language prompt. Routes to the appropriate local model via the runtime model registry. Video is unavailable in the local backend and is refused cleanly.",
584
+ descriptionCompressed:
585
+ "GENERATE_MEDIA image|audio|video-refusal prompt; routes IMAGE|TEXT_TO_SPEECH",
586
+ routingHint:
587
+ "explicit ask to draw/picture/photo/say/speak/read-aloud/animate -> GENERATE_MEDIA; not for general text replies",
588
+ suppressPostActionContinuation: true,
589
+ validate,
590
+ handler: buildGenerateMediaHandler(),
591
+ examples: [
592
+ [
593
+ {
594
+ name: "{{user1}}",
595
+ content: { text: "Draw me a sunset over a mountain lake." },
596
+ },
597
+ {
598
+ name: "{{agent}}",
599
+ content: {
600
+ text: "Here's the image you asked for.",
601
+ actions: ["GENERATE_MEDIA"],
602
+ },
603
+ },
604
+ ],
605
+ [
606
+ {
607
+ name: "{{user1}}",
608
+ content: { text: "Say hello in spanish." },
609
+ },
610
+ {
611
+ name: "{{agent}}",
612
+ content: {
613
+ text: "Here's the audio you asked for.",
614
+ actions: ["GENERATE_MEDIA"],
615
+ },
616
+ },
617
+ ],
618
+ [
619
+ {
620
+ name: "{{user1}}",
621
+ content: { text: "Generate a picture of a cyberpunk city at night." },
622
+ },
623
+ {
624
+ name: "{{agent}}",
625
+ content: {
626
+ text: "Here's the image you asked for.",
627
+ actions: ["GENERATE_MEDIA"],
628
+ },
629
+ },
630
+ ],
631
+ [
632
+ {
633
+ name: "{{user1}}",
634
+ content: { text: "Make a 10-second video of a cat dancing." },
635
+ },
636
+ {
637
+ name: "{{agent}}",
638
+ content: {
639
+ text: "Video generation is unavailable in the local inference backend.",
640
+ actions: ["GENERATE_MEDIA"],
641
+ },
642
+ },
643
+ ],
644
+ ],
645
+ };
646
+
647
+ export default generateMediaAction;
@@ -0,0 +1,23 @@
1
+ /**
2
+ * @module plugin-local-inference/actions/identify-speaker
3
+ *
4
+ * `IDENTIFY_SPEAKER` agent action — the explicit, user-driven half of the
5
+ * voice → entity binding (issue #8234, shape #2).
6
+ *
7
+ * When the OWNER names a voice the agent just heard but hasn't identified
8
+ * ("that was Jill", "this is my friend Sam"), this action binds the most
9
+ * recently observed *unidentified* speaker profile to a named person. It
10
+ * does not touch the entity graph directly — it emits `VOICE_TURN_OBSERVED`
11
+ * so the merge engine (plugin-lifeops) creates/merges the Entity, then the
12
+ * round-trip `VOICE_ENTITY_BOUND` handler persists `entityId` back onto the
13
+ * voice profile. If no merge-engine plugin is loaded the action is inert
14
+ * beyond logging intent.
15
+ *
16
+ * Target selection: an explicit `profileId` option wins; otherwise the
17
+ * single most-recently-observed profile whose `entityId` is still `null`
18
+ * (i.e. "the person who just spoke and isn't known yet").
19
+ */
20
+ import { type Action } from "@elizaos/core";
21
+ export declare function extractSpeakerName(text: string): string | null;
22
+ export declare const identifySpeakerAction: Action;
23
+ //# sourceMappingURL=identify-speaker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"identify-speaker.d.ts","sourceRoot":"","sources":["identify-speaker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EACN,KAAK,MAAM,EAMX,MAAM,eAAe,CAAC;AA8BvB,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAS9D;AA8FD,eAAO,MAAM,qBAAqB,EAAE,MAUnC,CAAC"}