@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,596 @@
1
+ /**
2
+ * Voice turn controller — the turn-taking layer above `VoiceScheduler`.
3
+ *
4
+ * Sits between W1's `VadEvent` stream + W2's `StreamingTranscriber` events
5
+ * and the generation path (the runtime message handler / the local engine's
6
+ * `generate`, which routes through `voiceStreamingArgs` → `VoiceScheduler` →
7
+ * phrase chunker → TTS). Implements the brief's items A4 / A5 / A6:
8
+ *
9
+ * - `speech-start` → fire `prewarm(roomId)` immediately (the
10
+ * response-handler stable prefix / MTP
11
+ * slot KV-prefill) — before STT finishes.
12
+ * - `speech-pause(ms > thr)` → kick a SPECULATIVE response off W2's
13
+ * current partial transcript. The generate
14
+ * call gets an `AbortSignal`; the in-flight
15
+ * generation is stashed.
16
+ * - `speech-active` / a new `speech-start` / VAD re-trigger
17
+ * → ABORT the speculative generation (the abort
18
+ * propagates into `dispatcher.generate`).
19
+ * - `speech-end` (no new speech)
20
+ * → finalize: flush the transcriber for the
21
+ * final transcript; if the speculative result
22
+ * is still valid against it, PROMOTE it; else
23
+ * discard and run the real turn on the
24
+ * finalized transcript.
25
+ *
26
+ * Barge-in: while the agent is speaking the controller flips
27
+ * `scheduler.bargeIn.setAgentSpeaking(true)` (and binds the VAD into the
28
+ * barge-in controller). A provisional `pause-tts` pauses TTS in the
29
+ * scheduler; a `blip` → `resume-tts`; ASR-confirmed words → `hard-stop` →
30
+ * the scheduler drains the ring buffer + flushes the chunker, and the
31
+ * controller aborts the in-flight turn (the same `AbortSignal` the engine
32
+ * threads into `dispatcher.generate`). The transcriber's `words` event is
33
+ * wired into `bargeIn.onWordsDetected({wordCount})` so a blip alone only
34
+ * pauses, but real recognized words hard-stop.
35
+ *
36
+ * No fallback sludge: `prewarm` failures surface via `onError`; a speculative
37
+ * abort is a real `AbortSignal.abort()`, never a swallowed flag.
38
+ */
39
+
40
+ import type { BargeInController } from "./barge-in";
41
+ import {
42
+ EOT_MID_CLAUSE_THRESHOLD,
43
+ type EotClassifier,
44
+ turnSignalFromProbability,
45
+ type VoiceTurnSignal,
46
+ } from "./eot-classifier";
47
+ import type { VoiceScheduler } from "./scheduler";
48
+ import type {
49
+ StreamingTranscriber,
50
+ TranscriberEvent,
51
+ TranscriptUpdate,
52
+ VadEvent,
53
+ VadEventSource,
54
+ VoiceInputSource,
55
+ VoiceSegment,
56
+ VoiceSpeaker,
57
+ VoiceTurnMetadata,
58
+ } from "./types";
59
+
60
+ /** Outcome of one generation pass (speculative or final). */
61
+ export interface VoiceTurnOutcome {
62
+ /** The transcript the generation ran against (so the controller can
63
+ * decide whether a speculative result is still valid). */
64
+ transcript: string;
65
+ /** Voice attribution metadata for the transcript that produced this outcome. */
66
+ source?: VoiceInputSource;
67
+ speaker?: VoiceSpeaker;
68
+ segments?: VoiceSegment[];
69
+ turn?: VoiceTurnMetadata;
70
+ /** Final reply text the model produced (already streamed into TTS by the
71
+ * generate callee). May be empty for an IGNORE turn. */
72
+ replyText: string;
73
+ }
74
+
75
+ export interface VoiceGenerateRequest {
76
+ /** Best transcript available at the time the request is issued. */
77
+ transcript: string;
78
+ /** Optional source/speaker metadata for attribution-only storage. */
79
+ source?: VoiceInputSource;
80
+ speaker?: VoiceSpeaker;
81
+ segments?: VoiceSegment[];
82
+ turn?: VoiceTurnMetadata;
83
+ /** True for the finalized turn (post `speech-end` + `flush()`), false for
84
+ * a speculative pass off a partial. */
85
+ final: boolean;
86
+ /** Aborted when speech resumes (speculative) or on a hard-stop barge-in. */
87
+ signal: AbortSignal;
88
+ /**
89
+ * Semantic turn-taking signal available at request issue time. Response
90
+ * handlers can deterministically suppress/accept without waiting for another
91
+ * model token when this says the next speaker is not the agent.
92
+ */
93
+ turnSignal?: VoiceTurnSignal;
94
+ }
95
+
96
+ export interface VoiceTurnControllerDeps {
97
+ /** W1: the authoritative VAD event stream (a `VadDetector` is structurally one of these). */
98
+ vad: VadEventSource;
99
+ /** W2: the live streaming transcriber. The controller subscribes to its
100
+ * events and calls `flush()` on `speech-end`. */
101
+ transcriber: StreamingTranscriber;
102
+ /** W9: the voice scheduler — used for the barge-in controller + agent-speaking flag. */
103
+ scheduler: VoiceScheduler;
104
+ /**
105
+ * KV-prefill / response-handler-prefix prewarm. Called on `speech-start`.
106
+ * Fire-and-forget; a rejection is surfaced via `onError`, not swallowed.
107
+ * (In the engine this wraps `engine.prewarmConversation(roomId, ...)` /
108
+ * `runtime.prewarmResponseHandler(roomId)`.)
109
+ */
110
+ prewarm?: (roomId: string) => void | Promise<void>;
111
+ /** Optional cached first-audio filler played immediately on speech-start. */
112
+ playFirstAudioFiller?: () => string | null;
113
+ /**
114
+ * Semantic turn detector layered with VAD/STT. It runs continuously on
115
+ * partial transcripts so `speech-pause` can decide whether to speculate or
116
+ * wait for the user to continue.
117
+ */
118
+ turnDetector?: EotClassifier;
119
+ /**
120
+ * Run a generation pass. The callee builds the message, calls the runtime
121
+ * message handler / `useModel`, and streams `replyText` into TTS via the
122
+ * scheduler. Must honour `request.signal` (abort = stop the LLM/drafter at
123
+ * the next kernel boundary). Resolves with the produced reply + the
124
+ * transcript it ran against. Rejecting with the request's `AbortError` is
125
+ * fine — the controller treats that as "aborted".
126
+ */
127
+ generate: (request: VoiceGenerateRequest) => Promise<VoiceTurnOutcome>;
128
+ }
129
+
130
+ export interface VoiceTurnControllerConfig {
131
+ /** Conversation / room id passed to `prewarm` and (implicitly) `generate`. */
132
+ roomId: string;
133
+ /**
134
+ * Minimum `speech-pause` duration before a speculative response is kicked.
135
+ * Default 300 ms — long enough that mid-sentence breath pauses don't
136
+ * trigger one, short enough to win latency on a real end-of-utterance.
137
+ */
138
+ speculatePauseMs?: number;
139
+ }
140
+
141
+ export interface VoiceTurnControllerEvents {
142
+ /** A speculative generation was started off a partial transcript. */
143
+ onSpeculativeStart?(transcript: string): void;
144
+ /** The in-flight speculative generation was aborted (speech resumed). */
145
+ onSpeculativeAbort?(): void;
146
+ /** The speculative result was promoted as the turn's answer (it matched the final transcript). */
147
+ onSpeculativePromoted?(outcome: VoiceTurnOutcome): void;
148
+ /** A turn finished (promoted speculative OR a fresh final run). */
149
+ onTurnComplete?(outcome: VoiceTurnOutcome): void;
150
+ /** `prewarm` rejected, or a `generate` pass rejected with a non-abort error. */
151
+ onError?(error: Error): void;
152
+ /** A VAD pause/end was suppressed because semantic turn-taking says user continues. */
153
+ onTurnSuppressed?(transcript: string, signal: VoiceTurnSignal): void;
154
+ }
155
+
156
+ const DEFAULT_SPECULATE_PAUSE_MS = 300;
157
+
158
+ interface InFlightGeneration {
159
+ /** Transcript the generation ran against. */
160
+ transcript: string;
161
+ controller: AbortController;
162
+ promise: Promise<VoiceTurnOutcome | null>;
163
+ }
164
+
165
+ export class VoiceTurnController {
166
+ private readonly deps: VoiceTurnControllerDeps;
167
+ private readonly events: VoiceTurnControllerEvents;
168
+ private readonly roomId: string;
169
+ private readonly speculatePauseMs: number;
170
+ private readonly bargeIn: BargeInController;
171
+
172
+ private speculative: InFlightGeneration | null = null;
173
+ /** A finalize() in progress (awaiting `transcriber.flush()` + generate). */
174
+ private finalizing: Promise<void> | null = null;
175
+ private latestPartial = "";
176
+ private latestTurnSignal: {
177
+ transcript: string;
178
+ signal: VoiceTurnSignal;
179
+ sequence: number;
180
+ } | null = null;
181
+ private turnSignalSequence = 0;
182
+ private started = false;
183
+ private vadUnsub: (() => void) | null = null;
184
+ private transcriberUnsub: (() => void) | null = null;
185
+ private bargeSignalUnsub: (() => void) | null = null;
186
+ private activeFinalController: AbortController | null = null;
187
+ /** True once `speech-end` ran and finalize is pending/done for this segment. */
188
+ private segmentEnded = false;
189
+ private latestUpdate: TranscriptUpdate | null = null;
190
+
191
+ constructor(
192
+ deps: VoiceTurnControllerDeps,
193
+ config: VoiceTurnControllerConfig,
194
+ events: VoiceTurnControllerEvents = {},
195
+ ) {
196
+ this.deps = deps;
197
+ this.events = events;
198
+ this.roomId = config.roomId;
199
+ this.speculatePauseMs = Math.max(
200
+ 0,
201
+ config.speculatePauseMs ?? DEFAULT_SPECULATE_PAUSE_MS,
202
+ );
203
+ this.bargeIn = deps.scheduler.bargeIn;
204
+ }
205
+
206
+ /** Subscribe to the VAD + transcriber streams and start turn-taking. Idempotent. */
207
+ start(): void {
208
+ if (this.started) return;
209
+ this.started = true;
210
+ // Barge-in controller takes the VAD directly so it can pause/resume TTS
211
+ // while the agent is speaking; the scheduler already listens to its
212
+ // `onSignal` stream.
213
+ this.bargeIn.bindVad(this.deps.vad);
214
+ this.bargeSignalUnsub = this.bargeIn.onSignal((signal) => {
215
+ if (signal.type !== "hard-stop") return;
216
+ this.abortSpeculative();
217
+ if (
218
+ this.activeFinalController &&
219
+ !this.activeFinalController.signal.aborted
220
+ ) {
221
+ this.activeFinalController.abort();
222
+ }
223
+ });
224
+ this.vadUnsub = this.deps.vad.onVadEvent((e) => this.onVadEvent(e));
225
+ this.transcriberUnsub = this.deps.transcriber.on((e) =>
226
+ this.onTranscriberEvent(e),
227
+ );
228
+ }
229
+
230
+ /** Detach from the streams and abort any in-flight speculative generation. */
231
+ stop(): void {
232
+ if (!this.started) return;
233
+ this.started = false;
234
+ this.vadUnsub?.();
235
+ this.vadUnsub = null;
236
+ this.transcriberUnsub?.();
237
+ this.transcriberUnsub = null;
238
+ this.bargeIn.unbindVad();
239
+ this.bargeSignalUnsub?.();
240
+ this.bargeSignalUnsub = null;
241
+ this.abortSpeculative();
242
+ if (
243
+ this.activeFinalController &&
244
+ !this.activeFinalController.signal.aborted
245
+ ) {
246
+ this.activeFinalController.abort();
247
+ }
248
+ this.activeFinalController = null;
249
+ }
250
+
251
+ // --- VAD ---------------------------------------------------------------
252
+
253
+ private onVadEvent(event: VadEvent): void {
254
+ switch (event.type) {
255
+ case "speech-start": {
256
+ // New utterance onset. If we were mid-finalize from a previous
257
+ // segment, that segment got *more* speech — abort the speculative
258
+ // run for it (the finalize promise still resolves; its abort is
259
+ // honoured). Reset segment state + the barge-in episode so the next
260
+ // hard-stop gets a fresh `BargeInCancelToken`.
261
+ this.segmentEnded = false;
262
+ this.latestUpdate = null;
263
+ this.latestPartial = "";
264
+ this.abortSpeculative();
265
+ this.bargeIn.reset();
266
+ this.playFirstAudioFiller();
267
+ void this.firePrewarm();
268
+ break;
269
+ }
270
+ case "speech-active": {
271
+ // Speech is ongoing again — any speculative response we kicked on a
272
+ // pause is stale. Abort it.
273
+ if (this.speculative) this.abortSpeculative();
274
+ break;
275
+ }
276
+ case "speech-pause": {
277
+ if (
278
+ event.pauseDurationMs >= this.speculatePauseMs &&
279
+ !this.speculative &&
280
+ !this.segmentEnded
281
+ ) {
282
+ this.maybeStartSpeculative(this.latestPartial, this.latestUpdate);
283
+ }
284
+ break;
285
+ }
286
+ case "speech-end": {
287
+ this.segmentEnded = true;
288
+ this.beginFinalize();
289
+ break;
290
+ }
291
+ case "blip":
292
+ // Handled entirely by the barge-in controller (resume-tts when the
293
+ // agent is speaking; nothing otherwise). No turn-taking effect.
294
+ break;
295
+ }
296
+ }
297
+
298
+ private onTranscriberEvent(event: TranscriberEvent): void {
299
+ switch (event.kind) {
300
+ case "partial":
301
+ this.latestPartial = event.update.partial;
302
+ this.latestUpdate = event.update;
303
+ this.queueTurnSignalRefresh(event.update.partial);
304
+ break;
305
+ case "final":
306
+ this.latestPartial = event.update.partial;
307
+ this.latestUpdate = event.update;
308
+ this.queueTurnSignalRefresh(event.update.partial);
309
+ break;
310
+ case "words":
311
+ // ASR confirmed real words during a barge-in window — promote a
312
+ // provisional `pause-tts` into a `hard-stop` (TTS cancelled + LLM
313
+ // aborted). A blip alone would never reach here.
314
+ this.bargeIn.onWordsDetected({
315
+ wordCount: event.words.length,
316
+ partialText: event.words.join(" "),
317
+ timestampMs: Date.now(),
318
+ });
319
+ break;
320
+ }
321
+ }
322
+
323
+ // --- prewarm -----------------------------------------------------------
324
+
325
+ /**
326
+ * C2 — public idle prewarm entry point. Callers (e.g. the UI when a
327
+ * conversation opens) invoke this to materialize the KV cache for the
328
+ * response-handler stable prefix BEFORE the user starts speaking, so the
329
+ * first speech-start has nothing left to do. Fire-and-forget: the
330
+ * returned promise is `void` because we don't want callers blocking on
331
+ * prewarm; failures surface via `onError` exactly like the speech-start
332
+ * path. Idempotent — repeated calls just re-prewarm.
333
+ */
334
+ prewarmOnIdle(): void {
335
+ void this.firePrewarm();
336
+ }
337
+
338
+ private async firePrewarm(): Promise<void> {
339
+ if (!this.deps.prewarm) return;
340
+ try {
341
+ await this.deps.prewarm(this.roomId);
342
+ } catch (err) {
343
+ this.events.onError?.(toError(err));
344
+ }
345
+ }
346
+
347
+ private playFirstAudioFiller(): void {
348
+ if (!this.deps.playFirstAudioFiller) return;
349
+ try {
350
+ this.deps.playFirstAudioFiller();
351
+ } catch (err) {
352
+ this.events.onError?.(toError(err));
353
+ }
354
+ }
355
+
356
+ // --- speculative generation -------------------------------------------
357
+
358
+ private maybeStartSpeculative(
359
+ transcript: string,
360
+ update: TranscriptUpdate | null,
361
+ ): void {
362
+ const text = transcript.trim();
363
+ if (text.length === 0) return;
364
+ if (!this.deps.turnDetector) {
365
+ this.startSpeculative(text, update, null);
366
+ return;
367
+ }
368
+ void this.startSpeculativeAfterTurnSignal(text, update);
369
+ }
370
+
371
+ private async startSpeculativeAfterTurnSignal(
372
+ text: string,
373
+ update: TranscriptUpdate | null,
374
+ ): Promise<void> {
375
+ const turnSignal = await this.ensureTurnSignal(text);
376
+ if (
377
+ !this.started ||
378
+ this.segmentEnded ||
379
+ this.speculative ||
380
+ this.latestPartial.trim() !== text
381
+ ) {
382
+ return;
383
+ }
384
+ if (turnSignal && shouldSuppressAgentSpeech(turnSignal)) {
385
+ this.events.onTurnSuppressed?.(text, turnSignal);
386
+ return;
387
+ }
388
+ this.startSpeculative(text, update, turnSignal);
389
+ }
390
+
391
+ private startSpeculative(
392
+ text: string,
393
+ update: TranscriptUpdate | null,
394
+ turnSignal: VoiceTurnSignal | null,
395
+ ): void {
396
+ const controller = new AbortController();
397
+ this.events.onSpeculativeStart?.(text);
398
+ const promise = this.runGenerate({
399
+ transcript: text,
400
+ ...voiceRequestMetadata(update),
401
+ final: false,
402
+ signal: controller.signal,
403
+ ...(turnSignal ? { turnSignal } : {}),
404
+ });
405
+ this.speculative = { transcript: text, controller, promise };
406
+ }
407
+
408
+ private abortSpeculative(): void {
409
+ const spec = this.speculative;
410
+ if (!spec) return;
411
+ this.speculative = null;
412
+ if (!spec.controller.signal.aborted) spec.controller.abort();
413
+ this.events.onSpeculativeAbort?.();
414
+ // Drop the partial TTS the speculative run may have already streamed —
415
+ // it was generated against a stale partial transcript. This is NOT a
416
+ // user barge-in, so use the dedicated drop path (no `onCancel`).
417
+ this.deps.scheduler.cancelPendingTts();
418
+ }
419
+
420
+ // --- finalize ----------------------------------------------------------
421
+
422
+ private beginFinalize(): void {
423
+ // Serialize finalize calls — `speech-end` should only fire once per
424
+ // segment, but be defensive against a VAD that repeats it.
425
+ if (this.finalizing) return;
426
+ this.finalizing = this.finalize().finally(() => {
427
+ this.finalizing = null;
428
+ });
429
+ }
430
+
431
+ private async finalize(): Promise<void> {
432
+ let finalUpdate: TranscriptUpdate;
433
+ try {
434
+ finalUpdate = await this.deps.transcriber.flush();
435
+ } catch (err) {
436
+ // Flush failure aborts any speculative run and bubbles up — no silent
437
+ // empty-transcript turn.
438
+ this.abortSpeculative();
439
+ this.events.onError?.(toError(err));
440
+ return;
441
+ }
442
+ const finalTranscript = finalUpdate.partial.trim();
443
+ // If a new `speech-start` arrived while we were flushing, that segment
444
+ // got more speech — drop this finalize.
445
+ if (!this.segmentEnded) {
446
+ this.abortSpeculative();
447
+ return;
448
+ }
449
+
450
+ const spec = this.speculative;
451
+ if (spec && spec.transcript === finalTranscript) {
452
+ // The speculative run is valid — promote it (its TTS has already been
453
+ // streaming).
454
+ this.speculative = null;
455
+ let outcome: VoiceTurnOutcome | null;
456
+ try {
457
+ outcome = await spec.promise;
458
+ } catch (err) {
459
+ outcome = null;
460
+ this.events.onError?.(toError(err));
461
+ }
462
+ if (outcome) {
463
+ this.events.onSpeculativePromoted?.(outcome);
464
+ this.events.onTurnComplete?.(outcome);
465
+ return;
466
+ }
467
+ // Speculative aborted or failed after all — fall through to a fresh
468
+ // final run below.
469
+ } else if (spec) {
470
+ // The partial we speculated off didn't survive — discard it (its TTS
471
+ // is stale).
472
+ this.abortSpeculative();
473
+ }
474
+
475
+ if (finalTranscript.length === 0) {
476
+ // Nothing was said (a blip the VAD let through). No turn.
477
+ return;
478
+ }
479
+ const finalTurnSignal = await this.ensureTurnSignal(finalTranscript);
480
+ if (finalTurnSignal && shouldSuppressAgentSpeech(finalTurnSignal)) {
481
+ this.abortSpeculative();
482
+ this.events.onTurnSuppressed?.(finalTranscript, finalTurnSignal);
483
+ return;
484
+ }
485
+ const controller = new AbortController();
486
+ this.activeFinalController = controller;
487
+ let outcome: VoiceTurnOutcome | null;
488
+ try {
489
+ outcome = await this.runGenerate({
490
+ transcript: finalTranscript,
491
+ ...voiceRequestMetadata(finalUpdate),
492
+ final: true,
493
+ signal: controller.signal,
494
+ ...(finalTurnSignal ? { turnSignal: finalTurnSignal } : {}),
495
+ });
496
+ } catch (err) {
497
+ outcome = null;
498
+ this.events.onError?.(toError(err));
499
+ } finally {
500
+ if (this.activeFinalController === controller) {
501
+ this.activeFinalController = null;
502
+ }
503
+ }
504
+ if (outcome) this.events.onTurnComplete?.(outcome);
505
+ }
506
+
507
+ // --- generate adapter --------------------------------------------------
508
+
509
+ private async runGenerate(
510
+ request: VoiceGenerateRequest,
511
+ ): Promise<VoiceTurnOutcome | null> {
512
+ try {
513
+ return await this.deps.generate(request);
514
+ } catch (err) {
515
+ if (isAbortError(err) || request.signal.aborted) return null;
516
+ this.events.onError?.(toError(err));
517
+ return null;
518
+ }
519
+ }
520
+
521
+ // --- semantic turn detector ------------------------------------------
522
+
523
+ private queueTurnSignalRefresh(transcript: string): void {
524
+ if (!this.deps.turnDetector || transcript.trim().length === 0) return;
525
+ void this.computeTurnSignal(transcript);
526
+ }
527
+
528
+ private async ensureTurnSignal(
529
+ transcript: string,
530
+ ): Promise<VoiceTurnSignal | null> {
531
+ const text = transcript.trim();
532
+ if (!this.deps.turnDetector || text.length === 0) return null;
533
+ const cached = this.latestTurnSignal;
534
+ if (cached && cached.transcript === text) return cached.signal;
535
+ return this.computeTurnSignal(text);
536
+ }
537
+
538
+ private async computeTurnSignal(
539
+ transcript: string,
540
+ ): Promise<VoiceTurnSignal | null> {
541
+ const detector = this.deps.turnDetector;
542
+ if (!detector) return null;
543
+ const text = transcript.trim();
544
+ if (text.length === 0) return null;
545
+ const sequence = ++this.turnSignalSequence;
546
+ try {
547
+ const signal = detector.signal
548
+ ? await detector.signal(text)
549
+ : turnSignalFromProbability({
550
+ probability: await detector.score(text),
551
+ transcript: text,
552
+ source: "custom",
553
+ model: detector.constructor.name,
554
+ });
555
+ const current = this.latestTurnSignal;
556
+ if (!current || sequence >= current.sequence) {
557
+ this.latestTurnSignal = { transcript: text, signal, sequence };
558
+ }
559
+ return signal;
560
+ } catch (err) {
561
+ this.events.onError?.(toError(err));
562
+ return null;
563
+ }
564
+ }
565
+ }
566
+
567
+ function shouldSuppressAgentSpeech(signal: VoiceTurnSignal): boolean {
568
+ return (
569
+ signal.agentShouldSpeak === false ||
570
+ signal.nextSpeaker === "user" ||
571
+ signal.endOfTurnProbability < EOT_MID_CLAUSE_THRESHOLD
572
+ );
573
+ }
574
+
575
+ function isAbortError(err: unknown): boolean {
576
+ return (
577
+ err instanceof Error &&
578
+ (err.name === "AbortError" || err.message.toLowerCase().includes("abort"))
579
+ );
580
+ }
581
+
582
+ function toError(err: unknown): Error {
583
+ return err instanceof Error ? err : new Error(String(err));
584
+ }
585
+
586
+ function voiceRequestMetadata(
587
+ update: TranscriptUpdate | null,
588
+ ): Pick<VoiceGenerateRequest, "source" | "speaker" | "segments" | "turn"> {
589
+ if (!update) return {};
590
+ return {
591
+ ...(update.source ? { source: update.source } : {}),
592
+ ...(update.speaker ? { speaker: update.speaker } : {}),
593
+ ...(update.segments ? { segments: update.segments } : {}),
594
+ ...(update.turn ? { turn: update.turn } : {}),
595
+ };
596
+ }