@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,212 @@
1
+ /**
2
+ * AudioFrameConsumer — turn the Android `audioFrame` PCM stream into live,
3
+ * VAD-segmented, speaker-attributed voice turns.
4
+ *
5
+ * The Android native capture path (`plugin-native-talkmode`) streams an
6
+ * `audioFrame` Capacitor event: base64 little-endian s16 mono PCM at 16 kHz,
7
+ * 20 ms per frame, plus `{ sampleRate, channels, samples, rms, timestamp,
8
+ * frameIndex }`. This module is the platform-agnostic consumer that subscribes
9
+ * to that stream (wherever the bun:ffi voice libs are present) and runs:
10
+ *
11
+ * audioFrame (base64 LE-s16) → decode → VadDetector (turn segmentation)
12
+ * → on speech-end: VoiceAttributionPipeline.attribute(turn PCM)
13
+ * → handleLiveVoiceAttribution → VOICE_TURN_OBSERVED + voiceTurnSignal
14
+ *
15
+ * Design notes:
16
+ * - It does NOT reinvent VAD: it drives the existing `VadDetector` state
17
+ * machine (`speech-start` / `speech-pause` / `speech-end`), reusing its
18
+ * Silero onset/offset/hangover logic. It buffers the turn's PCM between
19
+ * `speech-start` and `speech-end`, then attributes the whole utterance.
20
+ * - Every native dependency (`VadDetector`, `VoiceAttributionPipeline`, the
21
+ * runtime) is INJECTED, so the consumer is fully unit-testable with fakes
22
+ * and has no static import of bun:ffi. A `build*` factory in the smoke
23
+ * harness wires the real ggml-backed deps.
24
+ * - The decode boundary (`decodeAudioFramePcm`) is the ONLY place that knows
25
+ * the wire format (base64 LE-s16). Internally everything is Float32 [-1,1].
26
+ *
27
+ * What this module does NOT do: it does not transcribe (ASR text is the
28
+ * separate streaming-ASR path) and it does not own the WebView→agent
29
+ * transport — see `android/AUDIO_FRAMES.md` and `LIVE_PIPELINE.md` for the
30
+ * remaining device wiring.
31
+ */
32
+ import { type EmitVoiceTurnObservedArgs, type HandleLiveVoiceAttributionOptions } from "../../runtime/voice-entity-binding.js";
33
+ import type { VoiceTurnSignal } from "./eot-classifier.js";
34
+ import type { VoiceAttributionOutput, VoiceAttributionPipeline } from "./speaker/attribution-pipeline.js";
35
+ import type { PcmFrame, VadEvent, VoiceInputSource } from "./types.js";
36
+ /**
37
+ * The `audioFrame` event payload, mirroring `TalkModeAudioFrameEvent` in
38
+ * `@elizaos/capacitor-talkmode`. Re-declared structurally here so this
39
+ * package does not take a build dep on the Capacitor plugin.
40
+ */
41
+ export interface AudioFrameEvent {
42
+ /** Base64-encoded little-endian signed 16-bit mono PCM for this frame. */
43
+ pcm16: string;
44
+ /** Sample rate of the captured PCM in Hz (e.g. 16000). */
45
+ sampleRate: number;
46
+ /** Channel count (always 1 — mono). */
47
+ channels: number;
48
+ /** Number of PCM samples in this frame (`pcm16` byte length / 2). */
49
+ samples: number;
50
+ /** RMS amplitude of this frame, normalized 0..1. */
51
+ rms: number;
52
+ /** Monotonic capture timestamp for this frame, ms. */
53
+ timestamp: number;
54
+ /** Running index of this frame since capture started (0-based). */
55
+ frameIndex: number;
56
+ }
57
+ /** The sample rate every voice model in this pipeline is dimensioned for. */
58
+ export declare const AUDIO_FRAME_PIPELINE_SAMPLE_RATE = 16000;
59
+ export declare class AudioFrameDecodeError extends Error {
60
+ constructor(message: string);
61
+ }
62
+ /**
63
+ * Decode an `audioFrame` payload into a Float32 [-1, 1] window. This is the
64
+ * single boundary that understands the base64 LE-s16 wire format.
65
+ *
66
+ * The native capture path only ever produces 16 kHz mono; this asserts that
67
+ * invariant rather than resampling silently (the downstream Silero/WeSpeaker
68
+ * graphs are 16 kHz-only — a wrong rate is a bug to surface, not paper over).
69
+ */
70
+ export declare function decodeAudioFramePcm(frame: AudioFrameEvent): Float32Array;
71
+ /**
72
+ * The structural slice of `VadDetector` the consumer needs. Taking the
73
+ * structural view (not the concrete class) keeps the consumer testable with a
74
+ * fake VAD and avoids pulling the optional native VAD surface into callers
75
+ * that only want to feed frames.
76
+ */
77
+ export interface VadSegmenter {
78
+ /** True while a speech segment (incl. its pause hangover) is open. */
79
+ readonly inSpeech: boolean;
80
+ /** Subscribe to the authoritative VAD timeline. Returns an unsubscribe fn. */
81
+ onVadEvent(listener: (event: VadEvent) => void): () => void;
82
+ /** Feed one mic frame; resolves once its windows are processed. */
83
+ pushFrame(frame: PcmFrame): Promise<void>;
84
+ /** Flush trailing samples and finalize any open segment. */
85
+ flush(): Promise<void>;
86
+ /** Clear all state at a hard boundary. */
87
+ reset(): void;
88
+ }
89
+ /**
90
+ * The structural slice of `VoiceAttributionPipeline` the consumer needs.
91
+ */
92
+ export interface AttributionPipelineLike {
93
+ attribute(req: Parameters<VoiceAttributionPipeline["attribute"]>[0]): Promise<VoiceAttributionOutput>;
94
+ }
95
+ /**
96
+ * The structural slice of `IAgentRuntime` the consumer needs:
97
+ * `handleLiveVoiceAttribution` calls `emitEvent`.
98
+ */
99
+ export interface RuntimeEventSink {
100
+ emitEvent(type: unknown, payload: Record<string, unknown>): Promise<void>;
101
+ }
102
+ export interface AudioFrameConsumerDeps {
103
+ /** Turn-segmentation VAD (drives speech-start/pause/end). */
104
+ vad: VadSegmenter;
105
+ /** Diarization + speaker-attribution pipeline. */
106
+ pipeline: AttributionPipelineLike;
107
+ /** Runtime event sink for VOICE_TURN_OBSERVED. */
108
+ runtime: RuntimeEventSink;
109
+ }
110
+ export interface AudioFrameConsumerConfig {
111
+ /** Source metadata stamped onto every attributed turn. */
112
+ source?: VoiceInputSource;
113
+ /** Gating options forwarded to `handleLiveVoiceAttribution` per turn. */
114
+ attributionOptions?: HandleLiveVoiceAttributionOptions;
115
+ /**
116
+ * Hard cap on a single buffered turn, in seconds. A speaker who never
117
+ * triggers `speech-end` (e.g. continuous noise) must not grow the buffer
118
+ * without bound. When exceeded the turn is force-finalized. Default 30 s.
119
+ */
120
+ maxTurnSeconds?: number;
121
+ /**
122
+ * Pre-roll seconds of audio kept before `speech-start` so the onset of the
123
+ * first word (which the VAD only confirms a window or two in) is not clipped
124
+ * out of the attribution buffer. Default 0.3 s.
125
+ */
126
+ preRollSeconds?: number;
127
+ }
128
+ /** A finalized, attributed turn the consumer surfaces to its caller. */
129
+ export interface AttributedTurn {
130
+ turnId: string;
131
+ output: VoiceAttributionOutput;
132
+ signal: VoiceTurnSignal;
133
+ /** Turn span in the mic-clock (frame `timestamp`) domain. */
134
+ startedAtMs: number;
135
+ endedAtMs: number;
136
+ /** Total buffered turn samples that were attributed. */
137
+ samples: number;
138
+ }
139
+ export type AttributedTurnListener = (turn: AttributedTurn) => void;
140
+ /**
141
+ * Drives the `audioFrame` → VAD turn-segmentation → attribution → signal
142
+ * pipeline. One instance per capture session.
143
+ *
144
+ * Frame ingestion is serialized through the injected VAD's `pushFrame`
145
+ * (which itself serializes the Silero forward pass), so `onAudioFrame` is
146
+ * safe to fire-and-forget from a Capacitor event listener; turns surface in
147
+ * order via `onTurn`.
148
+ */
149
+ export declare class AudioFrameConsumer {
150
+ private readonly vad;
151
+ private readonly pipeline;
152
+ private readonly runtime;
153
+ private readonly source;
154
+ private readonly attributionOptions;
155
+ private readonly maxTurnSamples;
156
+ private readonly preRollSamples;
157
+ private readonly unsubscribeVad;
158
+ private readonly turnListeners;
159
+ /** Float32 chunks of the in-flight turn, oldest first. */
160
+ private turnChunks;
161
+ private turnSamples;
162
+ /** Rolling pre-roll ring (frames captured before speech-start). */
163
+ private preRoll;
164
+ private preRollSampleCount;
165
+ private capturing;
166
+ private turnSeq;
167
+ private turnStartedAtMs;
168
+ private lastFrameEndMs;
169
+ /** Serialized attribution chain so turns finalize one at a time, in order. */
170
+ private attributing;
171
+ private closed;
172
+ /** Count of frames that failed to decode (surfaced via getters, not thrown). */
173
+ droppedFrames: number;
174
+ constructor(deps: AudioFrameConsumerDeps, config?: AudioFrameConsumerConfig);
175
+ /** True while a turn is being buffered (between speech-start and speech-end). */
176
+ get inTurn(): boolean;
177
+ /** Subscribe to finalized attributed turns. Returns an unsubscribe fn. */
178
+ onTurn(listener: AttributedTurnListener): () => void;
179
+ /**
180
+ * Feed one decoded-or-raw `audioFrame`. Accepts either the wire-format
181
+ * `AudioFrameEvent` (decoded here) or a pre-decoded Float32 window with the
182
+ * frame's mic-clock timestamp. Resolves once the frame's VAD windows are
183
+ * processed.
184
+ */
185
+ onAudioFrame(frame: AudioFrameEvent): Promise<void>;
186
+ /**
187
+ * Feed a pre-decoded Float32 16 kHz window with its mic-clock timestamp
188
+ * (ms). The decode boundary already ran; used by transports that decode
189
+ * upstream and by the host harness.
190
+ */
191
+ pushDecodedFrame(pcm: Float32Array, timestampMs: number): Promise<void>;
192
+ /**
193
+ * Flush the VAD (finalize any open segment) and await all pending
194
+ * attribution. Call at end-of-capture so a trailing utterance is not lost.
195
+ */
196
+ flush(): Promise<void>;
197
+ /** Release listeners and clear all buffers. Idempotent. */
198
+ close(): Promise<void>;
199
+ private onVadEvent;
200
+ private beginTurn;
201
+ private finalizeTurn;
202
+ private attributeTurn;
203
+ private appendTurnChunk;
204
+ private appendPreRoll;
205
+ }
206
+ /**
207
+ * Re-export of the producer's emit args, so a consumer caller can construct a
208
+ * VOICE_TURN_OBSERVED payload directly when wiring a custom transport without
209
+ * importing the runtime subpath twice.
210
+ */
211
+ export type { EmitVoiceTurnObservedArgs };
212
+ //# sourceMappingURL=audio-frame-consumer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"audio-frame-consumer.d.ts","sourceRoot":"","sources":["audio-frame-consumer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,EACN,KAAK,yBAAyB,EAC9B,KAAK,iCAAiC,EAEtC,MAAM,uCAAuC,CAAC;AAC/C,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,KAAK,EACX,sBAAsB,EACtB,wBAAwB,EACxB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAMvE;;;;GAIG;AACH,MAAM,WAAW,eAAe;IAC/B,0EAA0E;IAC1E,KAAK,EAAE,MAAM,CAAC;IACd,0DAA0D;IAC1D,UAAU,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,qEAAqE;IACrE,OAAO,EAAE,MAAM,CAAC;IAChB,oDAAoD;IACpD,GAAG,EAAE,MAAM,CAAC;IACZ,sDAAsD;IACtD,SAAS,EAAE,MAAM,CAAC;IAClB,mEAAmE;IACnE,UAAU,EAAE,MAAM,CAAC;CACnB;AAED,6EAA6E;AAC7E,eAAO,MAAM,gCAAgC,QAAS,CAAC;AAEvD,qBAAa,qBAAsB,SAAQ,KAAK;gBACnC,OAAO,EAAE,MAAM;CAI3B;AAED;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,eAAe,GAAG,YAAY,CA0BxE;AA8BD;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B,sEAAsE;IACtE,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC;IAC3B,8EAA8E;IAC9E,UAAU,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,GAAG,MAAM,IAAI,CAAC;IAC5D,mEAAmE;IACnE,SAAS,CAAC,KAAK,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC1C,4DAA4D;IAC5D,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,0CAA0C;IAC1C,KAAK,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACvC,SAAS,CACR,GAAG,EAAE,UAAU,CAAC,wBAAwB,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,GACvD,OAAO,CAAC,sBAAsB,CAAC,CAAC;CACnC;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAChC,SAAS,CAAC,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1E;AAMD,MAAM,WAAW,sBAAsB;IACtC,6DAA6D;IAC7D,GAAG,EAAE,YAAY,CAAC;IAClB,kDAAkD;IAClD,QAAQ,EAAE,uBAAuB,CAAC;IAClC,kDAAkD;IAClD,OAAO,EAAE,gBAAgB,CAAC;CAC1B;AAED,MAAM,WAAW,wBAAwB;IACxC,0DAA0D;IAC1D,MAAM,CAAC,EAAE,gBAAgB,CAAC;IAC1B,yEAAyE;IACzE,kBAAkB,CAAC,EAAE,iCAAiC,CAAC;IACvD;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,wEAAwE;AACxE,MAAM,WAAW,cAAc;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,sBAAsB,CAAC;IAC/B,MAAM,EAAE,eAAe,CAAC;IACxB,6DAA6D;IAC7D,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,wDAAwD;IACxD,OAAO,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,MAAM,sBAAsB,GAAG,CAAC,IAAI,EAAE,cAAc,KAAK,IAAI,CAAC;AAEpE;;;;;;;;GAQG;AACH,qBAAa,kBAAkB;IAC9B,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAe;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA0B;IACnD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAmB;IAC3C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA+B;IACtD,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAoC;IACvE,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAa;IAC5C,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAqC;IAEnE,0DAA0D;IAC1D,OAAO,CAAC,UAAU,CAAsB;IACxC,OAAO,CAAC,WAAW,CAAK;IACxB,mEAAmE;IACnE,OAAO,CAAC,OAAO,CAAsB;IACrC,OAAO,CAAC,kBAAkB,CAAK;IAC/B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,OAAO,CAAK;IACpB,OAAO,CAAC,eAAe,CAAK;IAC5B,OAAO,CAAC,cAAc,CAAK;IAC3B,8EAA8E;IAC9E,OAAO,CAAC,WAAW,CAAoC;IACvD,OAAO,CAAC,MAAM,CAAS;IAEvB,gFAAgF;IAChF,aAAa,SAAK;gBAGjB,IAAI,EAAE,sBAAsB,EAC5B,MAAM,GAAE,wBAA6B;IAqBtC,iFAAiF;IACjF,IAAI,MAAM,IAAI,OAAO,CAEpB;IAED,0EAA0E;IAC1E,MAAM,CAAC,QAAQ,EAAE,sBAAsB,GAAG,MAAM,IAAI;IAKpD;;;;;OAKG;IACG,YAAY,CAAC,KAAK,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBzD;;;;OAIG;IACG,gBAAgB,CACrB,GAAG,EAAE,YAAY,EACjB,WAAW,EAAE,MAAM,GACjB,OAAO,CAAC,IAAI,CAAC;IAgBhB;;;OAGG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAM5B,2DAA2D;IACrD,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAc5B,OAAO,CAAC,UAAU;IAgBlB,OAAO,CAAC,SAAS;IAWjB,OAAO,CAAC,YAAY;YAkBN,aAAa;IA+B3B,OAAO,CAAC,eAAe;IAUvB,OAAO,CAAC,aAAa;CAarB;AAgBD;;;;GAIG;AACH,YAAY,EAAE,yBAAyB,EAAE,CAAC"}
@@ -0,0 +1,343 @@
1
+ /**
2
+ * AudioFrameConsumer unit tests (no models, fully injected deps).
3
+ *
4
+ * Drives the consumer with:
5
+ * - a REAL `VadDetector` backed by a deterministic scripted fake Silero, so
6
+ * the turn-segmentation state machine (speech-start → speech-end) is the
7
+ * real one, exercised deterministically;
8
+ * - a fake `AttributionPipelineLike` that records the PCM it was handed and
9
+ * returns a canned `VoiceAttributionOutput`;
10
+ * - a fake `RuntimeEventSink` that records `emitEvent` calls.
11
+ *
12
+ * Asserts: the wire-format decode boundary; one turn segmented per
13
+ * speech-start/speech-end pair; the buffered PCM handed to attribution; the
14
+ * VOICE_TURN_OBSERVED emission and the folded voiceTurnSignal surfaced via
15
+ * `onTurn`; the runaway-turn cap; pre-roll inclusion.
16
+ */
17
+
18
+ import { describe, expect, it } from "vitest";
19
+ import {
20
+ type AttributionPipelineLike,
21
+ AudioFrameConsumer,
22
+ AudioFrameDecodeError,
23
+ type AudioFrameEvent,
24
+ decodeAudioFramePcm,
25
+ type RuntimeEventSink,
26
+ } from "./audio-frame-consumer";
27
+ import type { VoiceAttributionOutput } from "./speaker/attribution-pipeline";
28
+ import { VadDetector } from "./vad";
29
+
30
+ const SR = 16_000;
31
+ const FRAME = 512; // one Silero window
32
+
33
+ /** Deterministic fake Silero: scripted probability per processed window. */
34
+ class ScriptedSilero {
35
+ readonly sampleRate = SR;
36
+ readonly windowSamples = FRAME;
37
+ private idx = 0;
38
+ constructor(private readonly probs: readonly number[]) {}
39
+ async process(window: Float32Array): Promise<number> {
40
+ expect(window.length).toBe(FRAME);
41
+ const p = this.probs[this.idx] ?? this.probs[this.probs.length - 1] ?? 0;
42
+ this.idx++;
43
+ return p;
44
+ }
45
+ reset(): void {}
46
+ }
47
+
48
+ /** Records the attribute() inputs and returns a canned output. */
49
+ class FakePipeline implements AttributionPipelineLike {
50
+ readonly calls: Array<{ turnId: string; pcm: Float32Array }> = [];
51
+ constructor(private readonly entityId: string | null = "entity-x") {}
52
+ async attribute(
53
+ req: Parameters<AttributionPipelineLike["attribute"]>[0],
54
+ ): Promise<VoiceAttributionOutput> {
55
+ this.calls.push({ turnId: req.turnId, pcm: req.pcm });
56
+ return {
57
+ turnId: req.turnId,
58
+ primarySpeaker: {
59
+ id: "spk",
60
+ imprintClusterId: "cluster-1",
61
+ entityId: this.entityId ?? undefined,
62
+ confidence: 0.5,
63
+ },
64
+ segments: [],
65
+ turn: { turnId: req.turnId },
66
+ observation: {
67
+ profileId: "prof-1",
68
+ imprintClusterId: "cluster-1",
69
+ entityId: this.entityId,
70
+ embedding: new Float32Array(256),
71
+ embeddingModel: "wespeaker",
72
+ confidence: 0.5,
73
+ },
74
+ };
75
+ }
76
+ }
77
+
78
+ class FakeRuntime implements RuntimeEventSink {
79
+ readonly emitted: Array<{ type: unknown; payload: Record<string, unknown> }> =
80
+ [];
81
+ async emitEvent(
82
+ type: unknown,
83
+ payload: Record<string, unknown>,
84
+ ): Promise<void> {
85
+ this.emitted.push({ type, payload });
86
+ }
87
+ }
88
+
89
+ /** A 20 ms (320-sample) audioFrame at 16 kHz, encoded as base64 LE-s16. */
90
+ function makeFrame(opts: {
91
+ amplitude: number;
92
+ timestamp: number;
93
+ frameIndex: number;
94
+ samples?: number;
95
+ }): AudioFrameEvent {
96
+ const samples = opts.samples ?? 320;
97
+ const buf = Buffer.alloc(samples * 2);
98
+ for (let i = 0; i < samples; i++) {
99
+ const v = Math.round(
100
+ opts.amplitude * Math.sin((2 * Math.PI * 220 * i) / SR) * 32767,
101
+ );
102
+ buf.writeInt16LE(Math.max(-32768, Math.min(32767, v)), i * 2);
103
+ }
104
+ return {
105
+ pcm16: buf.toString("base64"),
106
+ sampleRate: SR,
107
+ channels: 1,
108
+ samples,
109
+ rms: opts.amplitude / Math.SQRT2,
110
+ timestamp: opts.timestamp,
111
+ frameIndex: opts.frameIndex,
112
+ };
113
+ }
114
+
115
+ /**
116
+ * Build a consumer whose REAL VadDetector is fed by a scripted Silero. Returns
117
+ * the consumer plus the recorders so tests can assert on them.
118
+ */
119
+ function buildHarness(
120
+ probs: readonly number[],
121
+ entityId: string | null = "entity-x",
122
+ ) {
123
+ const silero = new ScriptedSilero(probs);
124
+ const vad = new VadDetector(silero, {
125
+ onsetThreshold: 0.5,
126
+ // Short hangovers so a finite scripted run finalizes the turn.
127
+ pauseHangoverMs: 64,
128
+ endHangoverMs: 128,
129
+ minSpeechMs: 32,
130
+ });
131
+ const pipeline = new FakePipeline(entityId);
132
+ const runtime = new FakeRuntime();
133
+ const consumer = new AudioFrameConsumer(
134
+ { vad, pipeline, runtime },
135
+ {
136
+ source: { kind: "device", deviceId: "pixel" },
137
+ attributionOptions: {
138
+ ownerEntityId: "entity-x",
139
+ knownSpeakerEntityIds: ["entity-x"],
140
+ endOfTurnProbability: 0.95,
141
+ },
142
+ preRollSeconds: 0, // deterministic buffering for assertions
143
+ maxTurnSeconds: 30,
144
+ },
145
+ );
146
+ return { consumer, pipeline, runtime, vad };
147
+ }
148
+
149
+ describe("decodeAudioFramePcm", () => {
150
+ it("decodes base64 LE-s16 mono → Float32 [-1,1]", () => {
151
+ const frame = makeFrame({ amplitude: 0.5, timestamp: 0, frameIndex: 0 });
152
+ const pcm = decodeAudioFramePcm(frame);
153
+ expect(pcm).toBeInstanceOf(Float32Array);
154
+ expect(pcm.length).toBe(320);
155
+ // Peak ≈ amplitude; all in range.
156
+ let max = 0;
157
+ for (const v of pcm) {
158
+ expect(v).toBeGreaterThanOrEqual(-1);
159
+ expect(v).toBeLessThanOrEqual(1);
160
+ if (Math.abs(v) > max) max = Math.abs(v);
161
+ }
162
+ expect(max).toBeGreaterThan(0.45);
163
+ expect(max).toBeLessThan(0.55);
164
+ });
165
+
166
+ it("round-trips a known s16 sample exactly", () => {
167
+ const buf = Buffer.alloc(4);
168
+ buf.writeInt16LE(16384, 0); // +0.5
169
+ buf.writeInt16LE(-32768, 2); // -1.0
170
+ const pcm = decodeAudioFramePcm({
171
+ pcm16: buf.toString("base64"),
172
+ sampleRate: SR,
173
+ channels: 1,
174
+ samples: 2,
175
+ rms: 0,
176
+ timestamp: 0,
177
+ frameIndex: 0,
178
+ });
179
+ expect(pcm[0]).toBeCloseTo(0.5, 5);
180
+ expect(pcm[1]).toBeCloseTo(-1.0, 5);
181
+ });
182
+
183
+ it("rejects non-mono and wrong sample rate (no silent resample)", () => {
184
+ const base = makeFrame({ amplitude: 0.1, timestamp: 0, frameIndex: 0 });
185
+ expect(() => decodeAudioFramePcm({ ...base, channels: 2 })).toThrow(
186
+ AudioFrameDecodeError,
187
+ );
188
+ expect(() => decodeAudioFramePcm({ ...base, sampleRate: 48000 })).toThrow(
189
+ AudioFrameDecodeError,
190
+ );
191
+ });
192
+ });
193
+
194
+ describe("AudioFrameConsumer", () => {
195
+ it("segments one turn from speech-start..speech-end and attributes it", async () => {
196
+ // 320-sample frames re-window into the VAD as 512-sample windows. Script
197
+ // ~24 speech windows (loud) then ~12 silence windows to force speech-end.
198
+ const probs = [...Array(24).fill(0.9), ...Array(12).fill(0.0)];
199
+ const { consumer, pipeline, runtime } = buildHarness(probs);
200
+ const turns: Array<{ turnId: string }> = [];
201
+ consumer.onTurn((t) => turns.push({ turnId: t.turnId }));
202
+
203
+ // Feed enough loud frames to cover the speech windows, then silence.
204
+ let ts = 1000;
205
+ let idx = 0;
206
+ for (let i = 0; i < 40; i++) {
207
+ await consumer.onAudioFrame(
208
+ makeFrame({ amplitude: 0.6, timestamp: ts, frameIndex: idx++ }),
209
+ );
210
+ ts += 20;
211
+ }
212
+ for (let i = 0; i < 24; i++) {
213
+ await consumer.onAudioFrame(
214
+ makeFrame({ amplitude: 0.0, timestamp: ts, frameIndex: idx++ }),
215
+ );
216
+ ts += 20;
217
+ }
218
+ await consumer.flush();
219
+
220
+ expect(pipeline.calls.length).toBe(1);
221
+ expect(turns.length).toBe(1);
222
+ // The attributed PCM is the buffered turn (non-empty, real samples).
223
+ expect(pipeline.calls[0].pcm.length).toBeGreaterThan(SR * 0.4);
224
+ // VOICE_TURN_OBSERVED was emitted for the attributed (bound) speaker.
225
+ expect(runtime.emitted.length).toBe(1);
226
+ expect(runtime.emitted[0].payload.matchedEntityId).toBe("entity-x");
227
+ });
228
+
229
+ it("produces a voiceTurnSignal: enrolled owner → agent speaks", async () => {
230
+ const probs = [...Array(24).fill(0.9), ...Array(12).fill(0.0)];
231
+ const { consumer } = buildHarness(probs, "entity-x");
232
+ let signal: {
233
+ agentShouldSpeak: boolean | null;
234
+ nextSpeaker: string;
235
+ } | null = null;
236
+ consumer.onTurn((t) => {
237
+ signal = {
238
+ agentShouldSpeak: t.signal.agentShouldSpeak,
239
+ nextSpeaker: t.signal.nextSpeaker,
240
+ };
241
+ });
242
+ let ts = 1000;
243
+ for (let i = 0; i < 40; i++) {
244
+ await consumer.onAudioFrame(
245
+ makeFrame({ amplitude: 0.6, timestamp: ts, frameIndex: i }),
246
+ );
247
+ ts += 20;
248
+ }
249
+ for (let i = 0; i < 24; i++) {
250
+ await consumer.onAudioFrame(
251
+ makeFrame({ amplitude: 0.0, timestamp: ts, frameIndex: 40 + i }),
252
+ );
253
+ ts += 20;
254
+ }
255
+ await consumer.flush();
256
+ expect(signal).not.toBeNull();
257
+ expect(signal!.agentShouldSpeak).toBe(true);
258
+ expect(signal!.nextSpeaker).toBe("agent");
259
+ });
260
+
261
+ it("stamps the signal onto the attribution output turn metadata", async () => {
262
+ const probs = [...Array(24).fill(0.9), ...Array(12).fill(0.0)];
263
+ const { consumer } = buildHarness(probs);
264
+ let metaSignal: unknown;
265
+ consumer.onTurn((t) => {
266
+ metaSignal = (
267
+ t.output.turn.metadata as { voiceTurnSignal?: unknown } | undefined
268
+ )?.voiceTurnSignal;
269
+ });
270
+ let ts = 1000;
271
+ for (let i = 0; i < 40; i++) {
272
+ await consumer.onAudioFrame(
273
+ makeFrame({ amplitude: 0.6, timestamp: ts, frameIndex: i }),
274
+ );
275
+ ts += 20;
276
+ }
277
+ for (let i = 0; i < 24; i++) {
278
+ await consumer.onAudioFrame(
279
+ makeFrame({ amplitude: 0.0, timestamp: ts, frameIndex: 40 + i }),
280
+ );
281
+ ts += 20;
282
+ }
283
+ await consumer.flush();
284
+ expect(metaSignal).toBeTruthy();
285
+ });
286
+
287
+ it("does not segment a turn from pure silence", async () => {
288
+ const { consumer, pipeline } = buildHarness(Array(40).fill(0.0));
289
+ let ts = 1000;
290
+ for (let i = 0; i < 40; i++) {
291
+ await consumer.onAudioFrame(
292
+ makeFrame({ amplitude: 0.0, timestamp: ts, frameIndex: i }),
293
+ );
294
+ ts += 20;
295
+ }
296
+ await consumer.flush();
297
+ expect(pipeline.calls.length).toBe(0);
298
+ });
299
+
300
+ it("counts a frame that fails to decode as dropped (and rethrows)", async () => {
301
+ const { consumer } = buildHarness(Array(10).fill(0.0));
302
+ const bad = makeFrame({ amplitude: 0.1, timestamp: 0, frameIndex: 0 });
303
+ await expect(
304
+ consumer.onAudioFrame({ ...bad, channels: 2 }),
305
+ ).rejects.toBeInstanceOf(AudioFrameDecodeError);
306
+ expect(consumer.droppedFrames).toBe(1);
307
+ });
308
+
309
+ it("force-finalizes a runaway turn at the max-turn cap", async () => {
310
+ // Never-ending speech: every window is loud. A tiny 1 s cap exercises the
311
+ // runaway path quickly.
312
+ const silero = new ScriptedSilero(Array(2000).fill(0.95));
313
+ const vad = new VadDetector(silero, {
314
+ onsetThreshold: 0.5,
315
+ pauseHangoverMs: 64,
316
+ endHangoverMs: 128,
317
+ minSpeechMs: 32,
318
+ });
319
+ const pl = new FakePipeline("entity-x");
320
+ const rt = new FakeRuntime();
321
+ const c = new AudioFrameConsumer(
322
+ { vad, pipeline: pl, runtime: rt },
323
+ { preRollSeconds: 0, maxTurnSeconds: 1 }, // 1 s cap
324
+ );
325
+ let finalized = 0;
326
+ c.onTurn(() => finalized++);
327
+ let ts = 1000;
328
+ // Feed ~2 s of loud audio (100 frames * 20 ms) — the cap must fire mid-stream.
329
+ for (let i = 0; i < 100; i++) {
330
+ await c.onAudioFrame(
331
+ makeFrame({ amplitude: 0.7, timestamp: ts, frameIndex: i }),
332
+ );
333
+ ts += 20;
334
+ }
335
+ await c.flush();
336
+ expect(finalized).toBeGreaterThanOrEqual(1);
337
+ expect(pl.calls.length).toBeGreaterThanOrEqual(1);
338
+ // Each finalized turn must not exceed the cap by more than one frame.
339
+ for (const call of pl.calls) {
340
+ expect(call.pcm.length).toBeLessThanOrEqual(SR * 1 + 512);
341
+ }
342
+ });
343
+ });