@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,927 @@
1
+ import { inferenceTelemetry } from "../inference-telemetry";
2
+ import { BargeInController } from "./barge-in";
3
+ import type { PhonemeTokenizer } from "./phoneme-tokenizer";
4
+ import { PhraseCache } from "./phrase-cache";
5
+ import { PhraseChunker } from "./phrase-chunker";
6
+ import {
7
+ PrefixPreservingQueue,
8
+ type TaggedAudioChunk,
9
+ } from "./prefix-preserving-queue";
10
+ import { InMemoryAudioSink, PcmRingBuffer } from "./ring-buffer";
11
+ import { RollbackQueue } from "./rollback-queue";
12
+ import type {
13
+ AcceptedToken,
14
+ AudioChunk,
15
+ AudioSink,
16
+ BargeInSignal,
17
+ OmniVoiceBackend,
18
+ Phrase,
19
+ RejectedTokenRange,
20
+ SchedulerConfig,
21
+ SpeakerPreset,
22
+ StreamingTtsBackend,
23
+ TextToken,
24
+ TtsPcmChunk,
25
+ VoiceAudioSource,
26
+ VoiceSchedulerPhraseTelemetry,
27
+ VoiceSchedulerTelemetryEvent,
28
+ VoiceSchedulerTelemetryListener,
29
+ VoiceTtsCancelReason,
30
+ } from "./types";
31
+
32
+ /**
33
+ * T2 — per-phrase TTS chunk-size telemetry, emitted once per
34
+ * `synthesizePhraseStream` call when `SchedulerEvents.onChunkMetrics` is
35
+ * wired. `chunks` is the in-arrival-order distribution of streamed PCM
36
+ * chunks (size in PCM bytes assuming Float32 samples, duration in ms
37
+ * derived from samples / sampleRate). Used to debug T1-class chunk-size
38
+ * pathologies and to verify T3 time-budget effects.
39
+ */
40
+ export interface TtsPhraseChunkMetrics {
41
+ phraseId: number;
42
+ /** Order-preserving list of per-chunk sizes. Empty when no chunks landed. */
43
+ chunks: ReadonlyArray<{
44
+ chunkBytes: number;
45
+ chunkDurationMs: number;
46
+ }>;
47
+ /** Sum of chunk durations in ms. */
48
+ totalDurationMs: number;
49
+ /** Sum of chunk bytes. */
50
+ totalBytes: number;
51
+ /** Whether the phrase synthesis was cancelled mid-stream. */
52
+ cancelled: boolean;
53
+ }
54
+
55
+ export type TtsChunkMetricsListener = (metrics: TtsPhraseChunkMetrics) => void;
56
+
57
+ export interface SchedulerEvents {
58
+ onPhrase?(phrase: Phrase): void;
59
+ onRollback?(phraseId: number, range: RejectedTokenRange): void;
60
+ onAudio?(chunk: AudioChunk): void;
61
+ /**
62
+ * Barge-in hard-stop: ring buffer drained, chunker reset, in-flight TTS
63
+ * cancelled. The engine layer's `voiceStreamingArgs` separately threads
64
+ * the `BargeInCancelToken.signal` (`bargeIn.onSignal` → `hard-stop`)
65
+ * into `dispatcher.generate` so the LLM/drafter abort too.
66
+ */
67
+ onCancel?(): void;
68
+ /** Provisional barge-in: a VAD voice hit while the agent is speaking paused TTS playback. */
69
+ onTtsPause?(): void;
70
+ /** Blip resolved the provisional barge-in — TTS playback resumed. */
71
+ onTtsResume?(): void;
72
+ /** Structured scheduler telemetry for latency, cache, rollback, and barge-in metrics. */
73
+ onTelemetry?: VoiceSchedulerTelemetryListener;
74
+ /**
75
+ * T2 — per-phrase TTS chunk-size distribution. Optional; when set, the
76
+ * scheduler emits one summary per streamed phrase synthesis (success or
77
+ * cancelled). Lets test harnesses and metrics consumers verify T1/T3
78
+ * effects without scraping the audio bus.
79
+ */
80
+ onChunkMetrics?: TtsChunkMetricsListener;
81
+ }
82
+
83
+ export interface SchedulerDeps {
84
+ backend: OmniVoiceBackend;
85
+ sink?: AudioSink;
86
+ phraseCache?: PhraseCache;
87
+ /** Optional. Required only when `config.chunkerConfig.chunkOn ===
88
+ * 'phoneme-stream'`. Defaults are available from
89
+ * `createDefaultPhonemeTokenizer()`. */
90
+ phonemeTokenizer?: PhonemeTokenizer;
91
+ }
92
+
93
+ interface InFlight {
94
+ phrase: Phrase;
95
+ cancelSignal: { cancelled: boolean };
96
+ done: Promise<void>;
97
+ }
98
+
99
+ interface NativeCancelableTtsBackend {
100
+ cancelTts(): void;
101
+ }
102
+
103
+ const DEFAULT_MAX_IN_FLIGHT_PHRASES = 4;
104
+
105
+ function nowMs(): number {
106
+ return globalThis.performance.now();
107
+ }
108
+
109
+ function phraseTelemetry(phrase: Phrase): VoiceSchedulerPhraseTelemetry {
110
+ return {
111
+ id: phrase.id,
112
+ text: phrase.text,
113
+ fromIndex: phrase.fromIndex,
114
+ toIndex: phrase.toIndex,
115
+ terminator: phrase.terminator,
116
+ tokenCount: Math.max(0, phrase.toIndex - phrase.fromIndex + 1),
117
+ textBytes: new TextEncoder().encode(phrase.text).length,
118
+ };
119
+ }
120
+
121
+ function isStreamingTtsBackend(
122
+ backend: OmniVoiceBackend,
123
+ ): backend is OmniVoiceBackend & StreamingTtsBackend {
124
+ return (
125
+ typeof (backend as Partial<StreamingTtsBackend>).synthesizeStream ===
126
+ "function"
127
+ );
128
+ }
129
+
130
+ function isNativeCancelableTtsBackend(
131
+ backend: OmniVoiceBackend,
132
+ ): backend is OmniVoiceBackend & NativeCancelableTtsBackend {
133
+ return (
134
+ typeof (backend as Partial<NativeCancelableTtsBackend>).cancelTts ===
135
+ "function"
136
+ );
137
+ }
138
+
139
+ function copyPcm(pcm: Float32Array): Float32Array {
140
+ return new Float32Array(pcm);
141
+ }
142
+
143
+ function concatPcm(
144
+ parts: ReadonlyArray<Float32Array>,
145
+ total: number,
146
+ ): Float32Array {
147
+ const out = new Float32Array(total);
148
+ let offset = 0;
149
+ for (const part of parts) {
150
+ out.set(part, offset);
151
+ offset += part.length;
152
+ }
153
+ return out;
154
+ }
155
+
156
+ export class VoiceScheduler {
157
+ readonly chunker: PhraseChunker;
158
+ readonly rollback = new RollbackQueue();
159
+ readonly bargeIn = new BargeInController();
160
+ readonly ringBuffer: PcmRingBuffer;
161
+ readonly sink: AudioSink;
162
+ readonly preset: SpeakerPreset;
163
+ /**
164
+ * Prefix-preserving barge-in queue. When the streaming TTS path is active,
165
+ * each audio chunk is enqueued here tagged with its token range. On
166
+ * hard-stop (barge-in), `rollbackAt(divergencePoint)` partitions the
167
+ * queue: chunks at or before the divergence point are replayed into the
168
+ * sink; chunks after are dropped. This lets audio that was already
169
+ * correct play through without re-synthesizing.
170
+ */
171
+ readonly prefixQueue = new PrefixPreservingQueue();
172
+ private readonly backend: OmniVoiceBackend;
173
+ private readonly phraseCache: PhraseCache;
174
+ private readonly events: SchedulerEvents;
175
+ private readonly sampleRate: number;
176
+ private readonly inFlight = new Map<number, InFlight>();
177
+ private readonly maxInFlight: number;
178
+ private readonly streamingTtsActive: boolean;
179
+ private kernelTicks = 0;
180
+ private nextStandalonePhraseId = -1;
181
+ /** True while a provisional barge-in (`pause-tts`) has paused playback. */
182
+ private paused = false;
183
+ /**
184
+ * The last committed token index — updated whenever a phrase is dispatched
185
+ * to TTS. Used as the divergence point when a barge-in fires mid-response.
186
+ */
187
+ private lastCommittedTokenIndex = 0;
188
+ private agentSpeakingUntilMs = 0;
189
+ private agentSpeakingTimer: ReturnType<typeof setTimeout> | null = null;
190
+ private phraseFlushTimer: ReturnType<typeof setTimeout> | null = null;
191
+
192
+ constructor(
193
+ config: SchedulerConfig,
194
+ deps: SchedulerDeps,
195
+ events: SchedulerEvents = {},
196
+ ) {
197
+ this.chunker = new PhraseChunker(
198
+ config.chunkerConfig,
199
+ deps.phonemeTokenizer ?? null,
200
+ );
201
+ this.preset = config.preset;
202
+ this.backend = deps.backend;
203
+ this.phraseCache = deps.phraseCache ?? new PhraseCache();
204
+ this.sampleRate = config.sampleRate;
205
+ this.sink = deps.sink ?? new InMemoryAudioSink();
206
+ this.ringBuffer = new PcmRingBuffer(
207
+ config.ringBufferCapacity,
208
+ config.sampleRate,
209
+ this.sink,
210
+ );
211
+ this.events = events;
212
+ this.maxInFlight = Math.max(
213
+ 1,
214
+ config.maxInFlightPhrases ?? DEFAULT_MAX_IN_FLIGHT_PHRASES,
215
+ );
216
+ // streamingTtsActive defaults true. The Metal ggml_conv_transpose_1d stall
217
+ // that previously required disabling this on macOS is fixed in the
218
+ // llama.cpp merge (native Metal kernels; CPU fallback no longer triggers).
219
+ this.streamingTtsActive = config.streamingTtsActive ?? true;
220
+ // Legacy hard-stop hook (`bargeIn.onMicActive()` / `attach.onCancel`).
221
+ this.bargeIn.attach({
222
+ onCancel: () => this.handleBargeIn(),
223
+ });
224
+ // New signal stream: pause/resume on a provisional barge-in, hard-stop
225
+ // when ASR confirms words. (`onMicActive()` also emits `hard-stop`, so
226
+ // `handleBargeIn` fires from both the legacy `attach` and here — it's
227
+ // idempotent.)
228
+ this.bargeIn.onSignal((signal) => this.onBargeInSignal(signal));
229
+ }
230
+
231
+ async accept(token: TextToken, acceptedAt = Date.now()): Promise<void> {
232
+ const acc: AcceptedToken = { ...token, acceptedAt };
233
+ const phrase = this.chunker.push(acc);
234
+ if (phrase) {
235
+ this.clearPhraseFlushTimer();
236
+ await this.dispatchPhrase(phrase);
237
+ return;
238
+ }
239
+ this.armPhraseFlushTimer();
240
+ }
241
+
242
+ async reject(range: RejectedTokenRange): Promise<void> {
243
+ // Drop draft tokens still sitting in the chunker's buffer before
244
+ // phrase packing so the verifier's correction is not glued
245
+ // onto stale text.
246
+ this.chunker.dropPendingFrom(range.fromIndex);
247
+ this.armPhraseFlushTimer();
248
+ const events = this.rollback.onRejected(range);
249
+ let cancelledStreamingInFlight = false;
250
+ for (const ev of events) {
251
+ const inflight = this.inFlight.get(ev.phraseId);
252
+ if (inflight) {
253
+ inflight.cancelSignal.cancelled = true;
254
+ cancelledStreamingInFlight ||= isStreamingTtsBackend(this.backend);
255
+ this.emitTtsCancel(inflight.phrase, "rollback");
256
+ }
257
+ this.rollback.drop(ev.phraseId);
258
+ this.events.onRollback?.(ev.phraseId, range);
259
+ this.emitTelemetry({
260
+ type: "rollback",
261
+ atMs: nowMs(),
262
+ phraseId: ev.phraseId,
263
+ range,
264
+ reason: ev.reason,
265
+ });
266
+ }
267
+ if (cancelledStreamingInFlight) {
268
+ this.cancelNativeTts();
269
+ }
270
+ }
271
+
272
+ async flushPending(): Promise<void> {
273
+ this.clearPhraseFlushTimer();
274
+ const tail = this.chunker.flushPending();
275
+ if (tail) {
276
+ await this.dispatchPhrase(tail);
277
+ }
278
+ }
279
+
280
+ async waitIdle(): Promise<void> {
281
+ const all = Array.from(this.inFlight.values()).map((i) => i.done);
282
+ await Promise.all(all);
283
+ }
284
+
285
+ async synthesizeText(
286
+ text: string,
287
+ signal?: AbortSignal,
288
+ ): Promise<AudioChunk> {
289
+ const phrase: Phrase = {
290
+ id: this.nextStandalonePhraseId--,
291
+ text,
292
+ fromIndex: 0,
293
+ toIndex: 0,
294
+ terminator: "max-cap",
295
+ };
296
+ if (signal?.aborted) {
297
+ this.emitTtsCancel(phrase, "synthesis-cancelled");
298
+ throw new Error("[voice-scheduler] synthesis cancelled by abort signal");
299
+ }
300
+
301
+ const cached = this.phraseCache.get(text);
302
+ if (cached) {
303
+ this.emitTelemetry({
304
+ type: "phrase-cache-hit",
305
+ atMs: nowMs(),
306
+ phrase: phraseTelemetry(phrase),
307
+ });
308
+ this.emitTelemetry({
309
+ type: "tts-first-audio",
310
+ atMs: nowMs(),
311
+ phrase: phraseTelemetry(phrase),
312
+ source: "cache",
313
+ samples: cached.pcm.length,
314
+ sampleRate: cached.sampleRate,
315
+ });
316
+ return {
317
+ phraseId: phrase.id,
318
+ fromIndex: phrase.fromIndex,
319
+ toIndex: phrase.toIndex,
320
+ pcm: cached.pcm,
321
+ sampleRate: cached.sampleRate,
322
+ };
323
+ }
324
+ this.emitTelemetry({
325
+ type: "phrase-cache-miss",
326
+ atMs: nowMs(),
327
+ phrase: phraseTelemetry(phrase),
328
+ });
329
+
330
+ const cancelSignal = { cancelled: false };
331
+ const abort = () => {
332
+ cancelSignal.cancelled = true;
333
+ this.cancelNativeTts();
334
+ };
335
+ if (signal?.aborted) {
336
+ abort();
337
+ }
338
+ signal?.addEventListener("abort", abort, { once: true });
339
+ const detach = this.bargeIn.attach({
340
+ onCancel: () => {
341
+ cancelSignal.cancelled = true;
342
+ },
343
+ });
344
+ try {
345
+ this.emitTelemetry({
346
+ type: "tts-start",
347
+ atMs: nowMs(),
348
+ phrase: phraseTelemetry(phrase),
349
+ inFlightPhrases: this.inFlight.size,
350
+ });
351
+ const chunk = await this.backend.synthesize({
352
+ phrase,
353
+ preset: this.preset,
354
+ cancelSignal,
355
+ onKernelTick: () => this.tickKernel(),
356
+ });
357
+ if (cancelSignal.cancelled) {
358
+ this.emitTtsCancel(phrase, "synthesis-cancelled");
359
+ throw new Error("[voice-scheduler] synthesis cancelled by barge-in");
360
+ }
361
+ this.emitTelemetry({
362
+ type: "tts-first-audio",
363
+ atMs: nowMs(),
364
+ phrase: phraseTelemetry(phrase),
365
+ source: "synthesis",
366
+ samples: chunk.pcm.length,
367
+ sampleRate: chunk.sampleRate,
368
+ });
369
+ this.phraseCache.put({
370
+ text,
371
+ pcm: chunk.pcm,
372
+ sampleRate: chunk.sampleRate,
373
+ });
374
+ return chunk;
375
+ } finally {
376
+ detach();
377
+ signal?.removeEventListener("abort", abort);
378
+ }
379
+ }
380
+
381
+ async prewarmPhrases(
382
+ texts: ReadonlyArray<string>,
383
+ opts: { concurrency?: number } = {},
384
+ ): Promise<{ warmed: number; cached: number }> {
385
+ const concurrency = Math.max(1, Math.floor(opts.concurrency ?? 1));
386
+ let warmed = 0;
387
+ let cached = 0;
388
+ let cursor = 0;
389
+
390
+ const worker = async (): Promise<void> => {
391
+ for (;;) {
392
+ const index = cursor++;
393
+ if (index >= texts.length) return;
394
+ const text = texts[index]?.trim();
395
+ if (!text) continue;
396
+ if (this.phraseCache.has(text)) {
397
+ cached++;
398
+ continue;
399
+ }
400
+ const phrase: Phrase = {
401
+ id: this.nextStandalonePhraseId--,
402
+ text,
403
+ fromIndex: 0,
404
+ toIndex: 0,
405
+ terminator: "max-cap",
406
+ };
407
+ const chunk = await this.backend.synthesize({
408
+ phrase,
409
+ preset: this.preset,
410
+ cancelSignal: { cancelled: false },
411
+ onKernelTick: () => this.tickKernel(),
412
+ });
413
+ const stored = this.phraseCache.put({
414
+ text,
415
+ pcm: chunk.pcm,
416
+ sampleRate: chunk.sampleRate,
417
+ });
418
+ if (stored) warmed++;
419
+ }
420
+ };
421
+
422
+ await Promise.all(
423
+ Array.from({ length: Math.min(concurrency, texts.length) }, () =>
424
+ worker(),
425
+ ),
426
+ );
427
+ return { warmed, cached };
428
+ }
429
+
430
+ tickKernel(): void {
431
+ this.kernelTicks++;
432
+ }
433
+
434
+ kernelTickCount(): number {
435
+ return this.kernelTicks;
436
+ }
437
+
438
+ /**
439
+ * Mark the agent as audibly speaking for the duration of audio handed to the
440
+ * sink. This is the barge-in gate: VAD blips only pause/resume TTS while this
441
+ * flag is true, and ASR-confirmed words hard-stop playback plus generation.
442
+ */
443
+ markAgentSpeakingForAudio(samples: number, sampleRate: number): void {
444
+ if (samples <= 0 || sampleRate <= 0) return;
445
+ const durationMs = (samples / sampleRate) * 1000;
446
+ // A short guard absorbs sink scheduling jitter between tiny streaming chunks.
447
+ this.agentSpeakingUntilMs = Math.max(
448
+ this.agentSpeakingUntilMs,
449
+ nowMs() + durationMs + 50,
450
+ );
451
+ this.bargeIn.setAgentSpeaking(true);
452
+ this.armAgentSpeakingTimer();
453
+ }
454
+
455
+ /** True while a provisional barge-in has paused TTS playback. */
456
+ get ttsPaused(): boolean {
457
+ return this.paused;
458
+ }
459
+
460
+ /**
461
+ * Drop not-yet-spoken TTS without signalling a barge-in: drain the ring
462
+ * buffer, reset the chunker, cancel in-flight synthesis. Used by the turn
463
+ * controller when a speculative response is invalidated (speech resumed) —
464
+ * the speculative TTS was streamed off a stale partial transcript, so it
465
+ * must go, but this is not a user barge-in (`onCancel` is NOT fired).
466
+ */
467
+ cancelPendingTts(): void {
468
+ this.paused = false;
469
+ this.clearAgentSpeaking();
470
+ this.clearPhraseFlushTimer();
471
+ this.ringBuffer.drain();
472
+ this.prefixQueue.clear();
473
+ this.lastCommittedTokenIndex = 0;
474
+ this.chunker.reset();
475
+ for (const inflight of this.inFlight.values()) {
476
+ inflight.cancelSignal.cancelled = true;
477
+ this.emitTtsCancel(inflight.phrase, "pending-tts");
478
+ }
479
+ this.cancelNativeTts();
480
+ }
481
+
482
+ private async dispatchPhrase(phrase: Phrase): Promise<void> {
483
+ this.rollback.track(phrase);
484
+ // Advance the divergence-point cursor. Tokens up to toIndex are now
485
+ // "committed" — a barge-in rollback keeps audio for them.
486
+ this.lastCommittedTokenIndex = Math.max(
487
+ this.lastCommittedTokenIndex,
488
+ phrase.toIndex,
489
+ );
490
+ this.events.onPhrase?.(phrase);
491
+ this.emitTelemetry({
492
+ type: "phrase-dispatch",
493
+ atMs: nowMs(),
494
+ phrase: phraseTelemetry(phrase),
495
+ inFlightPhrases: this.inFlight.size,
496
+ });
497
+
498
+ const cached = this.phraseCache.get(phrase.text);
499
+ if (cached) {
500
+ this.emitTelemetry({
501
+ type: "phrase-cache-hit",
502
+ atMs: nowMs(),
503
+ phrase: phraseTelemetry(phrase),
504
+ });
505
+ const chunk: AudioChunk = {
506
+ phraseId: phrase.id,
507
+ fromIndex: phrase.fromIndex,
508
+ toIndex: phrase.toIndex,
509
+ pcm: cached.pcm,
510
+ sampleRate: cached.sampleRate,
511
+ };
512
+ this.commitAudio(chunk, phrase, "cache");
513
+ return;
514
+ }
515
+ this.emitTelemetry({
516
+ type: "phrase-cache-miss",
517
+ atMs: nowMs(),
518
+ phrase: phraseTelemetry(phrase),
519
+ });
520
+
521
+ if (this.inFlight.size >= this.maxInFlight) {
522
+ const oldest = this.inFlight.values().next().value;
523
+ if (oldest) {
524
+ await oldest.done;
525
+ }
526
+ }
527
+
528
+ const cancelSignal = { cancelled: false };
529
+ let resolveDone!: () => void;
530
+ let rejectDone!: (err: unknown) => void;
531
+ const done = new Promise<void>((resolve, reject) => {
532
+ resolveDone = resolve;
533
+ rejectDone = reject;
534
+ });
535
+ this.inFlight.set(phrase.id, { phrase, cancelSignal, done });
536
+ void this.runPhraseSynthesis(phrase, cancelSignal).then(
537
+ resolveDone,
538
+ rejectDone,
539
+ );
540
+ }
541
+
542
+ private async runPhraseSynthesis(
543
+ phrase: Phrase,
544
+ cancelSignal: { cancelled: boolean },
545
+ ): Promise<void> {
546
+ try {
547
+ this.rollback.markSynthesizing(phrase.id);
548
+ this.emitTelemetry({
549
+ type: "tts-start",
550
+ atMs: nowMs(),
551
+ phrase: phraseTelemetry(phrase),
552
+ inFlightPhrases: this.inFlight.size,
553
+ });
554
+ if (this.streamingTtsActive && isStreamingTtsBackend(this.backend)) {
555
+ const cancelled = await this.synthesizePhraseStream(
556
+ phrase,
557
+ cancelSignal,
558
+ );
559
+ if (cancelled || cancelSignal.cancelled) {
560
+ this.emitTtsCancel(phrase, "synthesis-cancelled");
561
+ }
562
+ return;
563
+ }
564
+ const chunk = await this.backend.synthesize({
565
+ phrase,
566
+ preset: this.preset,
567
+ cancelSignal,
568
+ onKernelTick: () => this.tickKernel(),
569
+ });
570
+ if (cancelSignal.cancelled) {
571
+ this.emitTtsCancel(phrase, "synthesis-cancelled");
572
+ return;
573
+ }
574
+ if (!this.isPhraseTracked(phrase.id)) {
575
+ return;
576
+ }
577
+ this.phraseCache.put({
578
+ text: phrase.text,
579
+ pcm: chunk.pcm,
580
+ sampleRate: chunk.sampleRate,
581
+ });
582
+ this.commitAudio(chunk, phrase, "synthesis");
583
+ } finally {
584
+ this.inFlight.delete(phrase.id);
585
+ }
586
+ }
587
+
588
+ private async synthesizePhraseStream(
589
+ phrase: Phrase,
590
+ cancelSignal: { cancelled: boolean },
591
+ ): Promise<boolean> {
592
+ const backend = this.backend;
593
+ if (!isStreamingTtsBackend(backend)) return false;
594
+
595
+ const parts: Float32Array[] = [];
596
+ let totalSamples = 0;
597
+ let sampleRate = 0;
598
+ let firstAudio = true;
599
+ // T2 — per-chunk size distribution. Float32 samples => 4 bytes/sample.
600
+ const chunkSamples: Array<{ samples: number; sampleRate: number }> = [];
601
+ const result = await backend.synthesizeStream({
602
+ phrase,
603
+ preset: this.preset,
604
+ cancelSignal,
605
+ onKernelTick: () => this.tickKernel(),
606
+ onChunk: (chunk: TtsPcmChunk) => {
607
+ if (cancelSignal.cancelled || !this.isPhraseTracked(phrase.id)) {
608
+ return true;
609
+ }
610
+ if (chunk.isFinal || chunk.pcm.length === 0) {
611
+ return cancelSignal.cancelled;
612
+ }
613
+ const pcm = copyPcm(chunk.pcm);
614
+ parts.push(pcm);
615
+ totalSamples += pcm.length;
616
+ sampleRate = chunk.sampleRate;
617
+ chunkSamples.push({
618
+ samples: pcm.length,
619
+ sampleRate: chunk.sampleRate,
620
+ });
621
+ // T2 — emit per-chunk metrics so consumers can detect whether TTS is
622
+ // streaming short chunks (good) or batching whole phrases (bad). The
623
+ // backend constructor name is the cheapest available identity label
624
+ // without threading a separate config field.
625
+ const chunkDurationMs =
626
+ chunk.sampleRate > 0 ? (pcm.length / chunk.sampleRate) * 1000 : 0;
627
+ const ttsBackendName = backend.constructor.name;
628
+ inferenceTelemetry.record("tts.chunk_size_ms", chunkDurationMs, {
629
+ backend: ttsBackendName,
630
+ });
631
+ inferenceTelemetry.record(
632
+ "tts.chunk_size_bytes",
633
+ pcm.length * 4, // Float32: 4 bytes per sample
634
+ { backend: ttsBackendName },
635
+ );
636
+ // Tag the chunk with its phrase token range and enqueue it for
637
+ // prefix-preserving barge-in rollback. The chunk covers the full
638
+ // phrase range — sub-phrase token attribution is not available from
639
+ // the streaming TTS ABI, so all chunks of a phrase carry the same
640
+ // [fromIndex, toIndex]. Rollback at phrase granularity is still a
641
+ // large improvement over dropping all in-flight audio.
642
+ const taggedChunk: TaggedAudioChunk = {
643
+ pcm,
644
+ tokenRange: [phrase.fromIndex, phrase.toIndex],
645
+ durationMs: chunkDurationMs,
646
+ };
647
+ this.prefixQueue.enqueue(taggedChunk);
648
+ this.commitAudio(
649
+ {
650
+ phraseId: phrase.id,
651
+ fromIndex: phrase.fromIndex,
652
+ toIndex: phrase.toIndex,
653
+ pcm,
654
+ sampleRate: chunk.sampleRate,
655
+ },
656
+ phrase,
657
+ "synthesis",
658
+ { emitFirstAudio: firstAudio, markPlayed: false },
659
+ );
660
+ firstAudio = false;
661
+ return cancelSignal.cancelled;
662
+ },
663
+ });
664
+
665
+ const cancelled = result.cancelled || cancelSignal.cancelled;
666
+ if (!cancelled && this.isPhraseTracked(phrase.id)) {
667
+ this.rollback.markPlayed(phrase.id);
668
+ if (totalSamples > 0) {
669
+ this.phraseCache.put({
670
+ text: phrase.text,
671
+ pcm: concatPcm(parts, totalSamples),
672
+ sampleRate,
673
+ });
674
+ }
675
+ }
676
+ // T2 — fire the chunk-size telemetry callback. Done unconditionally so
677
+ // a cancelled phrase still reports what it did stream (helps debug
678
+ // barge-in latency). Float32 samples occupy 4 bytes each.
679
+ if (this.events.onChunkMetrics) {
680
+ const chunks = chunkSamples.map((c) => ({
681
+ chunkBytes: c.samples * 4,
682
+ chunkDurationMs:
683
+ c.sampleRate > 0 ? (c.samples / c.sampleRate) * 1000 : 0,
684
+ }));
685
+ let totalDurationMs = 0;
686
+ let totalBytes = 0;
687
+ for (const c of chunks) {
688
+ totalDurationMs += c.chunkDurationMs;
689
+ totalBytes += c.chunkBytes;
690
+ }
691
+ this.events.onChunkMetrics({
692
+ phraseId: phrase.id,
693
+ chunks,
694
+ totalDurationMs,
695
+ totalBytes,
696
+ cancelled,
697
+ });
698
+ }
699
+ return cancelled;
700
+ }
701
+
702
+ private isPhraseTracked(phraseId: number): boolean {
703
+ return this.rollback
704
+ .snapshot()
705
+ .some((entry) => entry.phrase.id === phraseId);
706
+ }
707
+
708
+ private cancelNativeTts(): void {
709
+ if (isNativeCancelableTtsBackend(this.backend)) {
710
+ this.backend.cancelTts();
711
+ }
712
+ }
713
+
714
+ private commitAudio(
715
+ chunk: AudioChunk,
716
+ phrase: Phrase,
717
+ source: VoiceAudioSource,
718
+ opts: { emitFirstAudio?: boolean; markPlayed?: boolean } = {},
719
+ ): void {
720
+ if (opts.emitFirstAudio !== false) {
721
+ this.emitTelemetry({
722
+ type: "tts-first-audio",
723
+ atMs: nowMs(),
724
+ phrase: phraseTelemetry(phrase),
725
+ source,
726
+ samples: chunk.pcm.length,
727
+ sampleRate: chunk.sampleRate,
728
+ });
729
+ }
730
+ this.rollback.markRingBuffered(chunk.phraseId);
731
+ this.ringBuffer.write(chunk.pcm);
732
+ // When TTS is paused by a provisional barge-in, keep the synthesized
733
+ // PCM in the ring buffer but DON'T hand it to the sink yet — `resume-tts`
734
+ // flushes it; `hard-stop` drains it.
735
+ let flushedSamples = 0;
736
+ if (!this.paused) {
737
+ flushedSamples = this.ringBuffer.flushToSink();
738
+ this.markAgentSpeakingForAudio(flushedSamples, chunk.sampleRate);
739
+ }
740
+ if (opts.markPlayed !== false) {
741
+ this.rollback.markPlayed(chunk.phraseId);
742
+ }
743
+ this.emitTelemetry({
744
+ type: "audio-committed",
745
+ atMs: nowMs(),
746
+ phrase: phraseTelemetry(phrase),
747
+ source,
748
+ samples: chunk.pcm.length,
749
+ sampleRate: chunk.sampleRate,
750
+ flushedSamples,
751
+ paused: this.paused,
752
+ ringBufferSamples: this.ringBuffer.size(),
753
+ sinkBufferedSamples: this.sink.bufferedSamples(),
754
+ });
755
+ this.events.onAudio?.(chunk);
756
+ }
757
+
758
+ private onBargeInSignal(signal: BargeInSignal): void {
759
+ switch (signal.type) {
760
+ case "pause-tts": {
761
+ if (!this.paused) {
762
+ this.paused = true;
763
+ this.events.onTtsPause?.();
764
+ }
765
+ break;
766
+ }
767
+ case "resume-tts": {
768
+ if (this.paused) {
769
+ this.paused = false;
770
+ // Hand whatever was buffered during the pause to the sink now.
771
+ if (this.ringBuffer.size() > 0) {
772
+ const flushed = this.ringBuffer.flushToSink();
773
+ this.markAgentSpeakingForAudio(flushed, this.sampleRate);
774
+ }
775
+ this.events.onTtsResume?.();
776
+ }
777
+ break;
778
+ }
779
+ case "hard-stop":
780
+ // Handled by the legacy `attach.onCancel` hook registered in the
781
+ // constructor — `BargeInController.hardStop()` fires both the
782
+ // `attach` listeners and `onSignal(hard-stop)`, so doing the
783
+ // ring-buffer drain again here would double-fire `onCancel`. The
784
+ // engine layer subscribes to `onSignal(hard-stop)` separately to
785
+ // thread `signal.token.signal` into `dispatcher.generate`.
786
+ break;
787
+ }
788
+ }
789
+
790
+ private handleBargeIn(): void {
791
+ const ringBufferSamplesDrained = this.ringBuffer.size();
792
+ const sinkBufferedSamplesDrained = this.sink.bufferedSamples();
793
+ const wasPaused = this.paused;
794
+ const inFlightPhrases = Array.from(this.inFlight.values());
795
+ const divergencePoint = this.lastCommittedTokenIndex;
796
+
797
+ this.paused = false;
798
+ this.clearAgentSpeaking();
799
+ this.clearPhraseFlushTimer();
800
+
801
+ // Prefix-preserving rollback: partition in-flight audio chunks at the
802
+ // divergence point. Chunks for tokens <= divergencePoint are replayed
803
+ // into the sink (they were already correct); the rest are dropped.
804
+ // This avoids re-synthesizing audio the user would have heard anyway.
805
+ //
806
+ // If the prefix queue is empty (e.g. the backend emitted no streaming
807
+ // chunks yet), fall through to the plain drain path.
808
+ const prefixResult = this.prefixQueue.rollbackAt(divergencePoint);
809
+ if (prefixResult.retained.length > 0 || prefixResult.dropped.length > 0) {
810
+ // We had tagged chunks — apply prefix-preserving rollback.
811
+ // Drain the ring buffer first (it may hold chunks we're about to
812
+ // replay from the retained prefix, or chunks past the cutoff).
813
+ this.ringBuffer.drain();
814
+ // Replay retained prefix into the ring buffer and flush to sink.
815
+ for (const taggedChunk of prefixResult.retained) {
816
+ this.ringBuffer.write(taggedChunk.pcm);
817
+ }
818
+ if (prefixResult.retained.length > 0) {
819
+ const flushed = this.ringBuffer.flushToSink();
820
+ this.markAgentSpeakingForAudio(flushed, this.sampleRate);
821
+ }
822
+ this.emitTelemetry({
823
+ type: "barge-in-prefix-rollback",
824
+ atMs: nowMs(),
825
+ divergencePoint,
826
+ retainedChunks: prefixResult.retained.length,
827
+ droppedChunks: prefixResult.dropped.length,
828
+ straddledChunks: prefixResult.straddled.length,
829
+ retainedDurationMs: prefixResult.retainedDurationMs,
830
+ droppedDurationMs: prefixResult.droppedDurationMs,
831
+ });
832
+ } else {
833
+ // No tagged chunks — plain ring-buffer drain (legacy path).
834
+ this.ringBuffer.drain();
835
+ }
836
+
837
+ this.chunker.reset();
838
+ this.lastCommittedTokenIndex = 0;
839
+
840
+ for (const inflight of inFlightPhrases) {
841
+ inflight.cancelSignal.cancelled = true;
842
+ this.emitTtsCancel(inflight.phrase, "barge-in");
843
+ }
844
+ this.cancelNativeTts();
845
+ this.emitTelemetry({
846
+ type: "barge-in",
847
+ atMs: nowMs(),
848
+ ringBufferSamplesDrained,
849
+ sinkBufferedSamplesDrained,
850
+ inFlightPhrasesCancelled: inFlightPhrases.length,
851
+ wasPaused,
852
+ });
853
+ this.events.onCancel?.();
854
+ }
855
+
856
+ private emitTtsCancel(phrase: Phrase, reason: VoiceTtsCancelReason): void {
857
+ this.emitTelemetry({
858
+ type: "tts-cancel",
859
+ atMs: nowMs(),
860
+ phrase: phraseTelemetry(phrase),
861
+ reason,
862
+ });
863
+ }
864
+
865
+ private emitTelemetry(event: VoiceSchedulerTelemetryEvent): void {
866
+ this.events.onTelemetry?.(event);
867
+ }
868
+
869
+ private armPhraseFlushTimer(): void {
870
+ this.clearPhraseFlushTimer();
871
+ const delayMs = this.chunker.msUntilTimeBudget();
872
+ if (!Number.isFinite(delayMs)) return;
873
+ this.phraseFlushTimer = setTimeout(
874
+ () => {
875
+ this.phraseFlushTimer = null;
876
+ const phrase = this.chunker.flushIfTimeBudgetExceeded();
877
+ if (!phrase) {
878
+ this.armPhraseFlushTimer();
879
+ return;
880
+ }
881
+ void this.dispatchPhrase(phrase).catch((err) => {
882
+ setTimeout(() => {
883
+ throw err;
884
+ }, 0);
885
+ });
886
+ },
887
+ Math.max(0, delayMs),
888
+ );
889
+ }
890
+
891
+ private clearPhraseFlushTimer(): void {
892
+ if (this.phraseFlushTimer) {
893
+ clearTimeout(this.phraseFlushTimer);
894
+ this.phraseFlushTimer = null;
895
+ }
896
+ }
897
+
898
+ private armAgentSpeakingTimer(): void {
899
+ if (this.agentSpeakingTimer) {
900
+ clearTimeout(this.agentSpeakingTimer);
901
+ this.agentSpeakingTimer = null;
902
+ }
903
+ const delayMs = Math.max(1, this.agentSpeakingUntilMs - nowMs());
904
+ this.agentSpeakingTimer = setTimeout(() => {
905
+ this.agentSpeakingTimer = null;
906
+ if (nowMs() < this.agentSpeakingUntilMs) {
907
+ this.armAgentSpeakingTimer();
908
+ return;
909
+ }
910
+ this.agentSpeakingUntilMs = 0;
911
+ if (this.ringBuffer.size() === 0) {
912
+ this.bargeIn.setAgentSpeaking(false);
913
+ }
914
+ }, delayMs);
915
+ const maybeUnref = this.agentSpeakingTimer as { unref?: () => void };
916
+ maybeUnref.unref?.();
917
+ }
918
+
919
+ private clearAgentSpeaking(): void {
920
+ this.agentSpeakingUntilMs = 0;
921
+ if (this.agentSpeakingTimer) {
922
+ clearTimeout(this.agentSpeakingTimer);
923
+ this.agentSpeakingTimer = null;
924
+ }
925
+ this.bargeIn.setAgentSpeaking(false);
926
+ }
927
+ }