@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,154 @@
1
+ /**
2
+ * Vision-language describe-image types (WS2).
3
+ *
4
+ * Two layers live here:
5
+ *
6
+ * 1. The **request/result** contract every WS2 backend implements
7
+ * (`VisionDescribeRequest`, `VisionDescribeResult`). Callers pass
8
+ * raw image bytes + a prompt; backends return a title+description.
9
+ *
10
+ * 2. The **backend** interface (`VisionDescribeBackend`) that the
11
+ * `MemoryArbiter` registers as a capability handler. One backend
12
+ * per binding family (node-llama-cpp / llama-server / AOSP libllama
13
+ * shim). All three implement the same `load → describe → unload`
14
+ * shape so the arbiter can swap between them without caring how
15
+ * the projector is wired underneath.
16
+ *
17
+ * Why a separate file: the arbiter's `CapabilityRegistration<TBackend,
18
+ * TRequest, TResult>` is generic; pinning concrete shapes here keeps
19
+ * the registration sites short and removes a dozen casts at the
20
+ * call-site.
21
+ */
22
+
23
+ /**
24
+ * Channel order for the raw pixel buffer. Most platforms hand us RGBA
25
+ * (HTMLCanvasElement, Capacitor `Camera`, the desktop `puppeteer-core`
26
+ * screenshot pipeline). The encoder normalizes internally; this enum
27
+ * stays so the hash step can pick a stable byte layout that doesn't
28
+ * depend on the platform-provided buffer order.
29
+ */
30
+ export type VisionImageChannelOrder = "rgba" | "rgb" | "bgra" | "bgr";
31
+
32
+ /**
33
+ * The raw image data the backend will encode. The arbiter does not see
34
+ * this — it gets handed straight to the backend's `run()`. The reason
35
+ * we accept multiple wrappers (URL / base64 / bytes) is that the three
36
+ * upstream entry points (HTTP route, agent runtime model handler,
37
+ * computer-use frame loop) each prefer a different shape. The backend
38
+ * resolves to bytes once.
39
+ */
40
+ export type VisionImageInput =
41
+ | { kind: "bytes"; bytes: Uint8Array; mimeType?: string }
42
+ | { kind: "base64"; base64: string; mimeType?: string }
43
+ | { kind: "dataUrl"; dataUrl: string }
44
+ | { kind: "url"; url: string; mimeType?: string };
45
+
46
+ /**
47
+ * Caller request to `describeImage`. The `modelFamily` distinguishes
48
+ * projected-token cache entries from different VL families that share
49
+ * the same hash space — Qwen3-VL tokens are not interchangeable with
50
+ * Florence-2 tokens. Default is `qwen3-vl` (the WS2 deliverable);
51
+ * each additional family registers under its own identifier.
52
+ */
53
+ export interface VisionDescribeRequest {
54
+ image: VisionImageInput;
55
+ prompt?: string;
56
+ /**
57
+ * The model family identifier. Used to namespace the projector cache
58
+ * so swapping the backend's model family invalidates cached tokens.
59
+ * Defaults to `"qwen3-vl"` when omitted.
60
+ */
61
+ modelFamily?: string;
62
+ /** Max output tokens; defaults to 256 (description-length budget). */
63
+ maxTokens?: number;
64
+ /** 0..1, default 0.2 (descriptions should be deterministic-ish). */
65
+ temperature?: number;
66
+ signal?: AbortSignal;
67
+ }
68
+
69
+ /** Backend response — same shape that ImageDescriptionResult expects. */
70
+ export interface VisionDescribeResult {
71
+ title: string;
72
+ description: string;
73
+ /** Best-effort: ms spent in the projector (for arbiter telemetry). */
74
+ projectorMs?: number;
75
+ /** Best-effort: ms spent in the decoder. */
76
+ decodeMs?: number;
77
+ /** Whether the projected tokens came from the WS1 vision cache. */
78
+ cacheHit?: boolean;
79
+ }
80
+
81
+ /**
82
+ * Per-load arguments for a vision-describe backend. The arbiter's
83
+ * `load(modelKey)` only carries an opaque key; the binding resolves
84
+ * that key to real model+mmproj paths through this struct, which
85
+ * `createVisionCapabilityRegistration` populates from the catalog.
86
+ */
87
+ export interface VisionDescribeLoadArgs {
88
+ /** Absolute path to the text decoder GGUF (the "main" model). */
89
+ modelPath: string;
90
+ /** Absolute path to the matching mmproj projector GGUF. */
91
+ mmprojPath: string;
92
+ /**
93
+ * GPU offload preference. The backend translates this to its native
94
+ * knob: node-llama-cpp `gpuLayers`, llama-server `--n-gpu-layers`,
95
+ * AOSP libllama shim `eliza_llama_model_params_set_n_gpu_layers`.
96
+ * `"auto"` lets the binding decide; numeric is honoured verbatim.
97
+ */
98
+ gpuLayers?: number | "auto" | "max";
99
+ /** Max sampled context window in tokens. Defaults to 4096. */
100
+ contextSize?: number;
101
+ }
102
+
103
+ /**
104
+ * The contract every WS2 backend implements. The shape is intentionally
105
+ * narrow: the arbiter only ever calls `describe`. `dispose` is wrapped
106
+ * by the arbiter's `unload` so the backend can free GPU/VRAM and drop
107
+ * file descriptors on eviction.
108
+ */
109
+ export interface VisionDescribeBackend {
110
+ /** Stable identifier — `"capacitor-llama"`, `"llama-server"`, `"aosp"`, or `"fake"` (tests). */
111
+ readonly id: "capacitor-llama" | "llama-server" | "aosp" | "fake";
112
+ /**
113
+ * Run a describe pass. Backends MAY consult an injected projector cache
114
+ * via `args.projectedTokens` (when the caller's hash already produced
115
+ * a cache hit) instead of running the projector again; backends that
116
+ * don't implement projector-token reuse ignore the field.
117
+ */
118
+ describe(
119
+ request: VisionDescribeRequest,
120
+ args?: VisionDescribeBackendOptions,
121
+ ): Promise<VisionDescribeResult>;
122
+ /** Release the loaded weights. Idempotent. */
123
+ dispose(): Promise<void>;
124
+ }
125
+
126
+ /**
127
+ * Per-call options the arbiter wrapper passes into the backend. Lives
128
+ * here (rather than on `VisionDescribeRequest`) so the caller-facing
129
+ * request type stays free of arbiter implementation details.
130
+ */
131
+ export interface VisionDescribeBackendOptions {
132
+ /**
133
+ * Pre-computed projected tokens from the WS1 vision-embedding cache.
134
+ * When present the backend SHOULD skip its own projector step and
135
+ * decode against these tokens directly. Backends that can't do this
136
+ * still produce a correct result by ignoring the field; the arbiter's
137
+ * wrapper will measure `cacheHit: false` in that case.
138
+ */
139
+ projectedTokens?: {
140
+ tokens: Float32Array;
141
+ tokenCount: number;
142
+ hiddenSize: number;
143
+ };
144
+ }
145
+
146
+ /**
147
+ * Capability handler load function. The arbiter calls it with a model
148
+ * key (e.g. `"qwen3-vl-2b"`); the implementation resolves to a real
149
+ * `(modelPath, mmprojPath)` pair from the catalog + installed registry
150
+ * and returns a live backend.
151
+ */
152
+ export type VisionDescribeBackendLoader = (
153
+ modelKey: string,
154
+ ) => Promise<VisionDescribeBackend>;
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Final optional IMAGE_DESCRIPTION fallback layer.
3
+ *
4
+ * This mirrors the cloud wrapper shape but only runs when the previous
5
+ * handler explicitly returned a typed fallback outcome.
6
+ */
7
+ import type { ImageDescriptionParams } from "@elizaos/core";
8
+ import { type LocalVisionOutcome, type VisionFallbackReason, type WrappedImageDescriptionHandler } from "./cloud-fallback";
9
+ export interface VisionVastFallbackOptions {
10
+ enabled?: boolean;
11
+ apiKey?: string;
12
+ baseUrl?: string;
13
+ fetch?: typeof fetch;
14
+ handler?: (params: ImageDescriptionParams | string, reason: VisionFallbackReason) => Promise<LocalVisionOutcome>;
15
+ log?: (message: string, detail?: Record<string, unknown>) => void;
16
+ }
17
+ export declare function wrapImageDescriptionHandlerWithVastFallback(previous: WrappedImageDescriptionHandler, options?: VisionVastFallbackOptions): WrappedImageDescriptionHandler;
18
+ //# sourceMappingURL=vast-fallback.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vast-fallback.d.ts","sourceRoot":"","sources":["vast-fallback.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EACX,sBAAsB,EAEtB,MAAM,eAAe,CAAC;AACvB,OAAO,EAEN,KAAK,kBAAkB,EAGvB,KAAK,oBAAoB,EACzB,KAAK,8BAA8B,EACnC,MAAM,kBAAkB,CAAC;AAE1B,MAAM,WAAW,yBAAyB;IACzC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,KAAK,CAAC;IACrB,OAAO,CAAC,EAAE,CACT,MAAM,EAAE,sBAAsB,GAAG,MAAM,EACvC,MAAM,EAAE,oBAAoB,KACxB,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACjC,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,IAAI,CAAC;CAClE;AA+DD,wBAAgB,2CAA2C,CAC1D,QAAQ,EAAE,8BAA8B,EACxC,OAAO,GAAE,yBAA8B,GACrC,8BAA8B,CA8BhC"}
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Final optional IMAGE_DESCRIPTION fallback layer.
3
+ *
4
+ * This mirrors the cloud wrapper shape but only runs when the previous
5
+ * handler explicitly returned a typed fallback outcome.
6
+ */
7
+
8
+ import type {
9
+ ImageDescriptionParams,
10
+ ImageDescriptionResult,
11
+ } from "@elizaos/core";
12
+ import {
13
+ isVisionFallbackOutcome,
14
+ type LocalVisionOutcome,
15
+ type LocalVisionResult,
16
+ normalizeVisionDescription,
17
+ type VisionFallbackReason,
18
+ type WrappedImageDescriptionHandler,
19
+ } from "./cloud-fallback";
20
+
21
+ export interface VisionVastFallbackOptions {
22
+ enabled?: boolean;
23
+ apiKey?: string;
24
+ baseUrl?: string;
25
+ fetch?: typeof fetch;
26
+ handler?: (
27
+ params: ImageDescriptionParams | string,
28
+ reason: VisionFallbackReason,
29
+ ) => Promise<LocalVisionOutcome>;
30
+ log?: (message: string, detail?: Record<string, unknown>) => void;
31
+ }
32
+
33
+ function resolveVastApiKey(options: VisionVastFallbackOptions): string | null {
34
+ return (
35
+ options.apiKey?.trim() || process.env.ELIZA_VAST_API_KEY?.trim() || null
36
+ );
37
+ }
38
+
39
+ function resolveVastBaseUrl(options: VisionVastFallbackOptions): string {
40
+ return (
41
+ options.baseUrl?.trim() ||
42
+ process.env.ELIZA_VAST_BASE_URL?.trim() ||
43
+ "https://api.vast.ai"
44
+ ).replace(/\/+$/, "");
45
+ }
46
+
47
+ function imageRequestBody(params: ImageDescriptionParams | string): {
48
+ image: { kind: "url"; url: string } | { kind: "data"; data: string };
49
+ prompt?: string;
50
+ } {
51
+ if (typeof params === "string") {
52
+ return params.startsWith("data:")
53
+ ? { image: { kind: "data", data: params } }
54
+ : { image: { kind: "url", url: params } };
55
+ }
56
+ const imageUrl = (params as { imageUrl?: string }).imageUrl;
57
+ const image = (params as { image?: string }).image;
58
+ const source = imageUrl ?? image;
59
+ const body = source?.startsWith("data:")
60
+ ? { image: { kind: "data" as const, data: source } }
61
+ : { image: { kind: "url" as const, url: source ?? "" } };
62
+ if (params.prompt) return { ...body, prompt: params.prompt };
63
+ return body;
64
+ }
65
+
66
+ async function callVastVision(
67
+ params: ImageDescriptionParams | string,
68
+ options: VisionVastFallbackOptions,
69
+ ): Promise<ImageDescriptionResult> {
70
+ const apiKey = resolveVastApiKey(options);
71
+ if (!apiKey) {
72
+ throw new Error("VAST image fallback is not configured");
73
+ }
74
+ const fetchImpl = options.fetch ?? fetch;
75
+ const response = await fetchImpl(
76
+ `${resolveVastBaseUrl(options)}/v1/vision/describe`,
77
+ {
78
+ method: "POST",
79
+ headers: {
80
+ "content-type": "application/json",
81
+ authorization: `Bearer ${apiKey}`,
82
+ },
83
+ body: JSON.stringify(imageRequestBody(params)),
84
+ },
85
+ );
86
+ if (!response.ok) {
87
+ throw new Error(`VAST image fallback failed with ${response.status}`);
88
+ }
89
+ return normalizeVisionDescription(
90
+ (await response.json()) as LocalVisionResult,
91
+ );
92
+ }
93
+
94
+ export function wrapImageDescriptionHandlerWithVastFallback(
95
+ previous: WrappedImageDescriptionHandler,
96
+ options: VisionVastFallbackOptions = {},
97
+ ): WrappedImageDescriptionHandler {
98
+ const enabled = options.enabled ?? true;
99
+ const log = options.log ?? (() => undefined);
100
+ return async (params): Promise<LocalVisionOutcome> => {
101
+ const outcome = await previous(params);
102
+ if (!isVisionFallbackOutcome(outcome)) {
103
+ return normalizeVisionDescription(outcome);
104
+ }
105
+ if (!enabled) return outcome;
106
+
107
+ const apiKey = resolveVastApiKey(options);
108
+ if (!options.handler && !apiKey) return outcome;
109
+
110
+ log("[vision/vast-fallback] upstream IMAGE_DESCRIPTION fallback", {
111
+ reason: outcome.reason,
112
+ });
113
+ try {
114
+ const vastOutcome = options.handler
115
+ ? await options.handler(params, outcome.reason)
116
+ : await callVastVision(params, options);
117
+ if (isVisionFallbackOutcome(vastOutcome)) return vastOutcome;
118
+ return normalizeVisionDescription(vastOutcome);
119
+ } catch (error) {
120
+ return {
121
+ kind: "fallback",
122
+ reason: "vast-error",
123
+ cause: error instanceof Error ? error : new Error(String(error)),
124
+ };
125
+ }
126
+ };
127
+ }
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Content-hashed cache for projected vision-language tokens (WS1 deliverable).
3
+ *
4
+ * Vision models in the Eliza-1 stack (Qwen3-VL, Florence-2, Apothic-VL) all
5
+ * go through the same expensive projector step: raw pixel
6
+ * bytes → patch embeddings → projector → tokens that the text decoder
7
+ * actually consumes. When the user pastes the same screenshot three times
8
+ * in a row, or when computer-use takes near-duplicate frames of an idle
9
+ * screen, we want to skip the projector entirely and reuse the cached
10
+ * tokens.
11
+ *
12
+ * Contract:
13
+ * - Caller computes a stable hash of the *normalized* input bytes
14
+ * (downscaled to the model's input resolution, then SHA-256 of the
15
+ * packed pixels). The hash is the cache key.
16
+ * - Caller pairs the hash with the projected token tensor (a flat
17
+ * `Float32Array` of length `tokens * hiddenSize`) AND the geometry
18
+ * `{ tokens, hiddenSize }` so a reader can reshape on the way out.
19
+ * - `get(hash)` returns `null` on miss or expiry, the entry on hit.
20
+ * A hit also "touches" the entry to keep it warm under LRU.
21
+ * - `set(hash, entry, ttlMs?)` inserts with a TTL (default 5 min); if
22
+ * the LRU is full, the coldest entry is evicted.
23
+ *
24
+ * Why a separate module:
25
+ * - The arbiter owns the *model handle*; the cache holds *per-input
26
+ * projected weights* that survive across model loads/unloads of the
27
+ * same family. Keeping the cache in a sibling module lets the vision
28
+ * plugin reuse it even when the arbiter swapped the underlying model
29
+ * for memory pressure (the projector tokens are still valid as long
30
+ * as the model family + hash match — we encode the family in the key
31
+ * to be safe).
32
+ *
33
+ * What this is NOT:
34
+ * - A blob cache for the encoder *weights*. Those live in mmap regions
35
+ * owned by the arbiter / SharedResourceRegistry and are evicted via
36
+ * `MmapRegionHandle.evictPages()`.
37
+ * - A cache for downstream LLM generations. Prefix-cache for text is
38
+ * handled by `cache-bridge.ts` and the backend session pool.
39
+ */
40
+ export interface VisionEmbeddingEntry {
41
+ /** Flat row-major buffer: `tokenCount * hiddenSize` floats. */
42
+ tokens: Float32Array;
43
+ tokenCount: number;
44
+ hiddenSize: number;
45
+ /** True when this entry is still within its TTL. */
46
+ live: boolean;
47
+ }
48
+ export interface VisionEmbeddingCacheConfig {
49
+ /** Max entries retained. LRU evicts beyond this. Default 32. */
50
+ maxEntries: number;
51
+ /** Default TTL when `set()` is called without one. Default 5 min. */
52
+ defaultTtlMs: number;
53
+ }
54
+ export declare class VisionEmbeddingCache {
55
+ private readonly config;
56
+ /**
57
+ * `Map` preserves insertion order; we re-insert on hit to bubble entries
58
+ * to the back, so the first key in iteration order is the LRU candidate.
59
+ */
60
+ private readonly entries;
61
+ private readonly now;
62
+ constructor(opts?: {
63
+ config?: Partial<VisionEmbeddingCacheConfig>;
64
+ now?: () => number;
65
+ });
66
+ /**
67
+ * Lookup. Returns the entry on hit (and refreshes LRU position), or null
68
+ * on miss / expiry. Expired entries are deleted on read so they don't
69
+ * silently consume the LRU budget.
70
+ */
71
+ get(hash: string): VisionEmbeddingEntry | null;
72
+ /**
73
+ * Insert. Replaces any existing entry under the same hash. Evicts the
74
+ * coldest entry if we're at capacity. `ttlMs` overrides the configured
75
+ * default; pass 0 to use the default.
76
+ */
77
+ set(hash: string, entry: {
78
+ tokens: Float32Array;
79
+ tokenCount: number;
80
+ hiddenSize: number;
81
+ }, ttlMs?: number): void;
82
+ /** Diagnostic: current entry count. */
83
+ size(): number;
84
+ /** Diagnostic: snapshot of (hash, byteSize, expiresAtMs) for each entry. */
85
+ snapshot(): ReadonlyArray<{
86
+ hash: string;
87
+ bytes: number;
88
+ expiresAtMs: number;
89
+ }>;
90
+ /** Drop everything. Cheap; only releases JS-side refs to the Float32Arrays. */
91
+ clear(): void;
92
+ /**
93
+ * Drop entries whose TTL has expired. Returns the number removed. Cheap
94
+ * to call from the arbiter's pressure tick.
95
+ */
96
+ purgeExpired(nowMs?: number): number;
97
+ }
98
+ //# sourceMappingURL=vision-embedding-cache.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vision-embedding-cache.d.ts","sourceRoot":"","sources":["vision-embedding-cache.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AASH,MAAM,WAAW,oBAAoB;IACpC,+DAA+D;IAC/D,MAAM,EAAE,YAAY,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,oDAAoD;IACpD,IAAI,EAAE,OAAO,CAAC;CACd;AAED,MAAM,WAAW,0BAA0B;IAC1C,gEAAgE;IAChE,UAAU,EAAE,MAAM,CAAC;IACnB,qEAAqE;IACrE,YAAY,EAAE,MAAM,CAAC;CACrB;AAOD,qBAAa,oBAAoB;IAChC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA6B;IACpD;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAiC;IACzD,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAe;gBAGlC,IAAI,GAAE;QACL,MAAM,CAAC,EAAE,OAAO,CAAC,0BAA0B,CAAC,CAAC;QAC7C,GAAG,CAAC,EAAE,MAAM,MAAM,CAAC;KACd;IAYP;;;;OAIG;IACH,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,oBAAoB,GAAG,IAAI;IAkB9C;;;;OAIG;IACH,GAAG,CACF,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE;QAAE,MAAM,EAAE,YAAY,CAAC;QAAC,UAAU,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,EACvE,KAAK,CAAC,EAAE,MAAM,GACZ,IAAI;IAsBP,uCAAuC;IACvC,IAAI,IAAI,MAAM;IAId,4EAA4E;IAC5E,QAAQ,IAAI,aAAa,CAAC;QACzB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;QACd,WAAW,EAAE,MAAM,CAAC;KACpB,CAAC;IAYF,+EAA+E;IAC/E,KAAK,IAAI,IAAI;IAIb;;;OAGG;IACH,YAAY,CAAC,KAAK,GAAE,MAAmB,GAAG,MAAM;CAUhD"}
@@ -0,0 +1,189 @@
1
+ /**
2
+ * Content-hashed cache for projected vision-language tokens (WS1 deliverable).
3
+ *
4
+ * Vision models in the Eliza-1 stack (Qwen3-VL, Florence-2, Apothic-VL) all
5
+ * go through the same expensive projector step: raw pixel
6
+ * bytes → patch embeddings → projector → tokens that the text decoder
7
+ * actually consumes. When the user pastes the same screenshot three times
8
+ * in a row, or when computer-use takes near-duplicate frames of an idle
9
+ * screen, we want to skip the projector entirely and reuse the cached
10
+ * tokens.
11
+ *
12
+ * Contract:
13
+ * - Caller computes a stable hash of the *normalized* input bytes
14
+ * (downscaled to the model's input resolution, then SHA-256 of the
15
+ * packed pixels). The hash is the cache key.
16
+ * - Caller pairs the hash with the projected token tensor (a flat
17
+ * `Float32Array` of length `tokens * hiddenSize`) AND the geometry
18
+ * `{ tokens, hiddenSize }` so a reader can reshape on the way out.
19
+ * - `get(hash)` returns `null` on miss or expiry, the entry on hit.
20
+ * A hit also "touches" the entry to keep it warm under LRU.
21
+ * - `set(hash, entry, ttlMs?)` inserts with a TTL (default 5 min); if
22
+ * the LRU is full, the coldest entry is evicted.
23
+ *
24
+ * Why a separate module:
25
+ * - The arbiter owns the *model handle*; the cache holds *per-input
26
+ * projected weights* that survive across model loads/unloads of the
27
+ * same family. Keeping the cache in a sibling module lets the vision
28
+ * plugin reuse it even when the arbiter swapped the underlying model
29
+ * for memory pressure (the projector tokens are still valid as long
30
+ * as the model family + hash match — we encode the family in the key
31
+ * to be safe).
32
+ *
33
+ * What this is NOT:
34
+ * - A blob cache for the encoder *weights*. Those live in mmap regions
35
+ * owned by the arbiter / SharedResourceRegistry and are evicted via
36
+ * `MmapRegionHandle.evictPages()`.
37
+ * - A cache for downstream LLM generations. Prefix-cache for text is
38
+ * handled by `cache-bridge.ts` and the backend session pool.
39
+ */
40
+
41
+ interface CacheEntry {
42
+ tokens: Float32Array;
43
+ tokenCount: number;
44
+ hiddenSize: number;
45
+ expiresAtMs: number;
46
+ }
47
+
48
+ export interface VisionEmbeddingEntry {
49
+ /** Flat row-major buffer: `tokenCount * hiddenSize` floats. */
50
+ tokens: Float32Array;
51
+ tokenCount: number;
52
+ hiddenSize: number;
53
+ /** True when this entry is still within its TTL. */
54
+ live: boolean;
55
+ }
56
+
57
+ export interface VisionEmbeddingCacheConfig {
58
+ /** Max entries retained. LRU evicts beyond this. Default 32. */
59
+ maxEntries: number;
60
+ /** Default TTL when `set()` is called without one. Default 5 min. */
61
+ defaultTtlMs: number;
62
+ }
63
+
64
+ const DEFAULTS: VisionEmbeddingCacheConfig = {
65
+ maxEntries: 32,
66
+ defaultTtlMs: 5 * 60_000,
67
+ };
68
+
69
+ export class VisionEmbeddingCache {
70
+ private readonly config: VisionEmbeddingCacheConfig;
71
+ /**
72
+ * `Map` preserves insertion order; we re-insert on hit to bubble entries
73
+ * to the back, so the first key in iteration order is the LRU candidate.
74
+ */
75
+ private readonly entries = new Map<string, CacheEntry>();
76
+ private readonly now: () => number;
77
+
78
+ constructor(
79
+ opts: {
80
+ config?: Partial<VisionEmbeddingCacheConfig>;
81
+ now?: () => number;
82
+ } = {},
83
+ ) {
84
+ this.config = {
85
+ maxEntries: Math.max(1, opts.config?.maxEntries ?? DEFAULTS.maxEntries),
86
+ defaultTtlMs: Math.max(
87
+ 0,
88
+ opts.config?.defaultTtlMs ?? DEFAULTS.defaultTtlMs,
89
+ ),
90
+ };
91
+ this.now = opts.now ?? (() => Date.now());
92
+ }
93
+
94
+ /**
95
+ * Lookup. Returns the entry on hit (and refreshes LRU position), or null
96
+ * on miss / expiry. Expired entries are deleted on read so they don't
97
+ * silently consume the LRU budget.
98
+ */
99
+ get(hash: string): VisionEmbeddingEntry | null {
100
+ const found = this.entries.get(hash);
101
+ if (!found) return null;
102
+ if (found.expiresAtMs <= this.now()) {
103
+ this.entries.delete(hash);
104
+ return null;
105
+ }
106
+ // Touch — re-insert so it moves to the back of the iteration order.
107
+ this.entries.delete(hash);
108
+ this.entries.set(hash, found);
109
+ return {
110
+ tokens: found.tokens,
111
+ tokenCount: found.tokenCount,
112
+ hiddenSize: found.hiddenSize,
113
+ live: true,
114
+ };
115
+ }
116
+
117
+ /**
118
+ * Insert. Replaces any existing entry under the same hash. Evicts the
119
+ * coldest entry if we're at capacity. `ttlMs` overrides the configured
120
+ * default; pass 0 to use the default.
121
+ */
122
+ set(
123
+ hash: string,
124
+ entry: { tokens: Float32Array; tokenCount: number; hiddenSize: number },
125
+ ttlMs?: number,
126
+ ): void {
127
+ if (entry.tokens.length !== entry.tokenCount * entry.hiddenSize) {
128
+ throw new Error(
129
+ `[vision-embedding-cache] token buffer length ${entry.tokens.length} does not match tokenCount*hiddenSize (${entry.tokenCount}*${entry.hiddenSize})`,
130
+ );
131
+ }
132
+ const ttl = ttlMs && ttlMs > 0 ? ttlMs : this.config.defaultTtlMs;
133
+ const expiresAtMs = this.now() + ttl;
134
+ this.entries.delete(hash);
135
+ this.entries.set(hash, {
136
+ tokens: entry.tokens,
137
+ tokenCount: entry.tokenCount,
138
+ hiddenSize: entry.hiddenSize,
139
+ expiresAtMs,
140
+ });
141
+ while (this.entries.size > this.config.maxEntries) {
142
+ const firstKey = this.entries.keys().next().value;
143
+ if (firstKey === undefined) break;
144
+ this.entries.delete(firstKey);
145
+ }
146
+ }
147
+
148
+ /** Diagnostic: current entry count. */
149
+ size(): number {
150
+ return this.entries.size;
151
+ }
152
+
153
+ /** Diagnostic: snapshot of (hash, byteSize, expiresAtMs) for each entry. */
154
+ snapshot(): ReadonlyArray<{
155
+ hash: string;
156
+ bytes: number;
157
+ expiresAtMs: number;
158
+ }> {
159
+ const out: { hash: string; bytes: number; expiresAtMs: number }[] = [];
160
+ for (const [hash, entry] of this.entries) {
161
+ out.push({
162
+ hash,
163
+ bytes: entry.tokens.byteLength,
164
+ expiresAtMs: entry.expiresAtMs,
165
+ });
166
+ }
167
+ return out;
168
+ }
169
+
170
+ /** Drop everything. Cheap; only releases JS-side refs to the Float32Arrays. */
171
+ clear(): void {
172
+ this.entries.clear();
173
+ }
174
+
175
+ /**
176
+ * Drop entries whose TTL has expired. Returns the number removed. Cheap
177
+ * to call from the arbiter's pressure tick.
178
+ */
179
+ purgeExpired(nowMs: number = this.now()): number {
180
+ let removed = 0;
181
+ for (const [hash, entry] of this.entries) {
182
+ if (entry.expiresAtMs <= nowMs) {
183
+ this.entries.delete(hash);
184
+ removed++;
185
+ }
186
+ }
187
+ return removed;
188
+ }
189
+ }
@@ -0,0 +1,88 @@
1
+ # Voice Workbench
2
+
3
+ Tracking issue: [elizaOS/eliza#8785](https://github.com/elizaOS/eliza/issues/8785).
4
+
5
+ elizaOS ships a mature voice pipeline (VAD, streaming ASR, EOT classifier,
6
+ barge-in, diarization, speaker imprint/profiles, Kokoro/OmniVoice TTS) but its
7
+ test harnesses were **fragmented** across five families with no shared scenario
8
+ format, no shared corpus, divergent metric definitions, and a headful surface
9
+ that only covered a single-speaker, single-turn round-trip. The Voice Workbench
10
+ unifies them onto **one scenario format, one metric module, and one report**.
11
+
12
+ ## Status
13
+
14
+ This directory holds the **pure, framework-level foundation** — the parts that
15
+ can be implemented, tested, and shipped without an audio corpus, native models,
16
+ or a browser. The execution runners that actually drive real services/audio are
17
+ intentionally **gated** (they need a provisioned Eliza-1 local backend + a
18
+ synthesized corpus) and are listed under *Remaining* below.
19
+
20
+ ### Implemented (this directory, unit-tested, no native artifacts)
21
+
22
+ | Piece | File | What it is |
23
+ | --- | --- | --- |
24
+ | **Scenario schema** | `voice-scenario.ts` | The declarative `VoiceScenario` format: named `participants` (voice→entity), ordered `turns` (`expectRespond`, `expectedTranscript`, `expectedSpeakerLabel`, `expectedEntity`, `pausesMs`), scenario `assertions` (WER/DER/EOT/latency ceilings), and `classes`. Pure `validateVoiceScenario` reports every consistency error at once. |
25
+ | **Metric module (single source of truth)** | `e2e-harness.ts` | All voice scoring lives here. WER is delegated to `@elizaos/shared/voice-wer` (one definition for headless + headful). Added scorers: `scoreEotDecision` (latency p50/p95 + false-trigger/false-suppression rate), `scoreRespondDecision` (FP/FN split), `scoreDiarization` (DER + confusions/misses), `scoreEntityExtraction` (precision/recall/F1), `scoreVoiceEntityMatch` (recognized-voice→entity accuracy). |
26
+ | **Benchmark report** | `voice-workbench-report.ts` | `buildVoiceWorkbenchReport` rolls a matrix of per-scenario scorer results into one gating report (per-metric mean/worst + percentiles, per-scenario verdict). `formatVoiceWorkbenchMarkdown` renders it; `regressionsAgainstBaseline` flags metrics that worsened past a tolerance. |
27
+ | **WER consolidation** | `@elizaos/shared/voice-wer` | The previously-duplicated `wordErrorRate` (`e2e-harness.ts` **and** `voice-selftest-harness.ts`, with subtly different normalization) is now defined once — Unicode-aware, contraction-preserving — and imported by both. |
28
+
29
+ Tests: `voice-workbench.test.ts`, `voice-workbench-report.test.ts`,
30
+ `e2e-harness.test.ts`.
31
+
32
+ ### Honesty contract
33
+
34
+ A scenario whose corpus/backend artifacts are absent is reported `skipped`,
35
+ **never `pass`** — matching the existing self-test contract. A workbench report
36
+ is `skipped` overall only when *every* scenario was skipped; one ran-and-failed
37
+ scenario makes the whole report `fail`.
38
+
39
+ ## Execution modes (the three the schema feeds)
40
+
41
+ 1. **Headless** — feed corpus audio through the real services without a browser:
42
+ `/api/asr/local-inference`, `LiveDiarizationSession` / `/api/voice/audio-frames`,
43
+ the `ELIZA_VOICE_EOT_BACKEND` classifier, respond/room decisions over a real
44
+ `AgentRuntime` (scenario-runner PGLite boot), `VOICE_TURN_OBSERVED` /
45
+ `VOICE_ENTITY_BOUND` / `IDENTIFY_SPEAKER`, and `/api/tts/local-inference`.
46
+ 2. **Headful** — extend `VoiceSelfTestShell` (`packages/ui/src/voice/voice-selftest/`)
47
+ from a single-turn self-test into a scenario player that drives the real
48
+ client pipeline (capture → ASR → SSE → TTS → playback) turn-by-turn, with
49
+ per-turn machine-readable + DOM-mirrored verdicts.
50
+ 3. **Benchmark/report** — a single `voice:workbench` entrypoint that runs the
51
+ matrix in both modes and rolls up via `voice-workbench-report.ts` into one
52
+ JSON + Markdown report with regression baselines.
53
+
54
+ All three consume the **same** `VoiceScenario` and the **same** scorers, so a
55
+ metric is defined exactly once regardless of where the audio is driven.
56
+
57
+ ## Consolidation map (what converges here)
58
+
59
+ The workbench is the convergence point for these previously-disjoint harnesses:
60
+
61
+ | Legacy harness | Convergence |
62
+ | --- | --- |
63
+ | `e2e-harness.ts:wordErrorRate` + `voice-selftest-harness.ts:wordErrorRate` | **Done** — one `@elizaos/shared/voice-wer`. |
64
+ | Pure scoring lib (`e2e-harness.ts`) | **Promoted** to the single metric module (EOT/diarization/respond/entity scorers added). |
65
+ | `packages/app-core/scripts/voice-duet.mjs` (`voice:duet`), `voice-e2e-hardware.ts`, `voice-vad-smoke.ts`, `voice-attribution-smoke.ts`, `lib/duet-bridge.mjs` | Feed measurements into the shared scorers + report (planned absorb). |
66
+ | `packages/benchmarks/voice/three-voice-scenario.mjs`, `three-voice-e2e-real.mjs` | Corpus-generation precedent the `VoiceScenario` corpus generator extends (planned). |
67
+ | `packages/benchmarks/voicebench/` (TS latency p95/p99) | The report layer mirrors its p95/p99 shape; remains a research bench linked from the workbench. |
68
+ | Per-spec inline `tinyWav()` fixtures (`packages/app/test/ui-smoke/voice-*.spec.ts`) | Replaced by the versioned corpus (planned). |
69
+
70
+ ## Remaining (gated — needs corpus + real backend)
71
+
72
+ These are tracked on #8785 and are **not** stubbed here (no LARP):
73
+
74
+ - **Corpus generator + versioned labeled corpus** — TTS-synthesize each turn,
75
+ splice pauses, mix multi-speaker streams; persist labeled WAV + ground-truth
76
+ JSON. Needs the real TTS routes / Kokoro voices. (`__test-helpers__/synthetic-speech.ts`
77
+ is the synthesis seed.)
78
+ - **Headless runner** — wire the scenario through the real ASR/diarization/EOT/
79
+ respond/entity/TTS services + `AgentRuntime`.
80
+ - **scenario-runner audio turn kind** — add an `audio`/`voice` `ScenarioTurnExecution`
81
+ so voice scenarios become first-class `.scenario.ts` files.
82
+ - **Headful scenario player** — `VoiceSelfTestShell` → multi-turn player +
83
+ `packages/app/test/ui-smoke/voice-workbench-*.spec.ts` per scenario class.
84
+ - **`voice:workbench` entrypoint + CI lane** — run the matrix, emit the report
85
+ (`buildVoiceWorkbenchReport`), `skipped` (never `pass`) when artifacts absent.
86
+ - **Multi-agent room semantics** — the canonical ≥3-participant "who responds"
87
+ contract (an open question on the issue) must be settled before the workbench
88
+ can assert against it rather than inventing a rule.