@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,1070 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { BargeInController } from "./barge-in";
3
+ import { RuleBasedEnglishPhonemeTokenizer } from "./phoneme-tokenizer";
4
+ import { canonicalizePhraseText, PhraseCache } from "./phrase-cache";
5
+ import { chunkTokens, PhraseChunker } from "./phrase-chunker";
6
+ import { InMemoryAudioSink, PcmRingBuffer } from "./ring-buffer";
7
+ import { RollbackQueue } from "./rollback-queue";
8
+ import { VoiceScheduler } from "./scheduler";
9
+ import type {
10
+ AudioChunk,
11
+ OmniVoiceBackend,
12
+ Phrase,
13
+ SpeakerPreset,
14
+ StreamingTtsBackend,
15
+ TextToken,
16
+ TtsPcmChunk,
17
+ VadEvent,
18
+ VoiceSchedulerTelemetryEvent,
19
+ } from "./types";
20
+ import {
21
+ readVoicePresetFile,
22
+ VoicePresetFormatError,
23
+ writeVoicePresetFile,
24
+ } from "./voice-preset-format";
25
+
26
+ function tok(index: number, text: string): TextToken {
27
+ return { index, text };
28
+ }
29
+
30
+ function makePreset(): SpeakerPreset {
31
+ const embedding = new Float32Array([0.1, 0.2, 0.3]);
32
+ return {
33
+ voiceId: "default",
34
+ embedding,
35
+ bytes: new Uint8Array(embedding.buffer.slice(0)),
36
+ };
37
+ }
38
+
39
+ class FakeBackend implements OmniVoiceBackend {
40
+ calls = 0;
41
+ cancelObserved: number[] = [];
42
+ delay = 0;
43
+ samplesPerToken = 8;
44
+
45
+ async synthesize(args: {
46
+ phrase: Phrase;
47
+ preset: SpeakerPreset;
48
+ cancelSignal: { cancelled: boolean };
49
+ onKernelTick?: () => void;
50
+ }): Promise<AudioChunk> {
51
+ this.calls++;
52
+ const tokenCount = args.phrase.toIndex - args.phrase.fromIndex + 1;
53
+ const len = Math.max(1, tokenCount * this.samplesPerToken);
54
+ if (this.delay > 0) {
55
+ await new Promise((r) => setTimeout(r, this.delay));
56
+ }
57
+ args.onKernelTick?.();
58
+ if (args.cancelSignal.cancelled) {
59
+ this.cancelObserved.push(args.phrase.id);
60
+ }
61
+ const pcm = new Float32Array(len);
62
+ for (let i = 0; i < len; i++) pcm[i] = (args.phrase.id + 1) * 0.01;
63
+ return {
64
+ phraseId: args.phrase.id,
65
+ fromIndex: args.phrase.fromIndex,
66
+ toIndex: args.phrase.toIndex,
67
+ pcm,
68
+ sampleRate: 24000,
69
+ };
70
+ }
71
+ }
72
+
73
+ class StreamingBackend implements OmniVoiceBackend, StreamingTtsBackend {
74
+ calls = 0;
75
+ streamCalls = 0;
76
+ cancelCalls = 0;
77
+ chunks: Float32Array[] = [
78
+ new Float32Array([0.11, 0.12]),
79
+ new Float32Array([0.13, 0.14, 0.15]),
80
+ ];
81
+
82
+ async synthesize(): Promise<AudioChunk> {
83
+ this.calls++;
84
+ throw new Error(
85
+ "batch synthesize should not be used for streaming backend",
86
+ );
87
+ }
88
+
89
+ async synthesizeStream(args: {
90
+ phrase: Phrase;
91
+ preset: SpeakerPreset;
92
+ cancelSignal: { cancelled: boolean };
93
+ onChunk: (chunk: TtsPcmChunk) => boolean | undefined;
94
+ onKernelTick?: () => void;
95
+ }): Promise<{ cancelled: boolean }> {
96
+ this.streamCalls++;
97
+ for (const pcm of this.chunks) {
98
+ args.onKernelTick?.();
99
+ if (args.cancelSignal.cancelled) break;
100
+ const want = args.onChunk({
101
+ pcm,
102
+ sampleRate: 24000,
103
+ isFinal: false,
104
+ });
105
+ if (want === true || args.cancelSignal.cancelled) {
106
+ args.onChunk({
107
+ pcm: new Float32Array(0),
108
+ sampleRate: 24000,
109
+ isFinal: true,
110
+ });
111
+ return { cancelled: true };
112
+ }
113
+ }
114
+ args.onChunk({
115
+ pcm: new Float32Array(0),
116
+ sampleRate: 24000,
117
+ isFinal: true,
118
+ });
119
+ return { cancelled: args.cancelSignal.cancelled };
120
+ }
121
+
122
+ cancelTts(): void {
123
+ this.cancelCalls++;
124
+ }
125
+ }
126
+
127
+ class PausingStreamingBackend extends StreamingBackend {
128
+ private releaseFirstChunk!: () => void;
129
+ readonly afterFirstChunk = new Promise<void>((resolve) => {
130
+ this.releaseFirstChunk = resolve;
131
+ });
132
+ private releaseFinish!: () => void;
133
+ private readonly finishGate = new Promise<void>((resolve) => {
134
+ this.releaseFinish = resolve;
135
+ });
136
+
137
+ async synthesizeStream(args: {
138
+ phrase: Phrase;
139
+ preset: SpeakerPreset;
140
+ cancelSignal: { cancelled: boolean };
141
+ onChunk: (chunk: TtsPcmChunk) => boolean | undefined;
142
+ onKernelTick?: () => void;
143
+ }): Promise<{ cancelled: boolean }> {
144
+ this.streamCalls++;
145
+ args.onKernelTick?.();
146
+ args.onChunk({
147
+ pcm: new Float32Array([0.21, 0.22, 0.23]),
148
+ sampleRate: 24000,
149
+ isFinal: false,
150
+ });
151
+ this.releaseFirstChunk();
152
+ await this.finishGate;
153
+ args.onChunk({
154
+ pcm: new Float32Array(0),
155
+ sampleRate: 24000,
156
+ isFinal: true,
157
+ });
158
+ return { cancelled: args.cancelSignal.cancelled };
159
+ }
160
+
161
+ finish(): void {
162
+ this.releaseFinish();
163
+ }
164
+ }
165
+
166
+ describe("PhraseChunker", () => {
167
+ it("splits at sentence-final punctuation", () => {
168
+ const tokens: TextToken[] = [
169
+ tok(0, "Hello"),
170
+ tok(1, " world"),
171
+ tok(2, "."),
172
+ tok(3, " How"),
173
+ tok(4, " are"),
174
+ tok(5, " you"),
175
+ tok(6, "?"),
176
+ ];
177
+ const phrases = chunkTokens(tokens, { maxTokensPerPhrase: 100 });
178
+ expect(phrases).toHaveLength(2);
179
+ expect(phrases[0].text).toBe("Hello world.");
180
+ expect(phrases[0].terminator).toBe("punctuation");
181
+ expect(phrases[0].fromIndex).toBe(0);
182
+ expect(phrases[0].toIndex).toBe(2);
183
+ expect(phrases[1].text).toBe(" How are you?");
184
+ expect(phrases[1].fromIndex).toBe(3);
185
+ expect(phrases[1].toIndex).toBe(6);
186
+ });
187
+
188
+ it("splits at the max-token cap when no punctuation", () => {
189
+ const tokens: TextToken[] = Array.from({ length: 7 }, (_, i) =>
190
+ tok(i, `t${i} `),
191
+ );
192
+ const phrases = chunkTokens(tokens, { maxTokensPerPhrase: 3 });
193
+ expect(phrases).toHaveLength(3);
194
+ expect(phrases[0].terminator).toBe("max-cap");
195
+ expect(phrases[0].fromIndex).toBe(0);
196
+ expect(phrases[0].toIndex).toBe(2);
197
+ expect(phrases[1].fromIndex).toBe(3);
198
+ expect(phrases[1].toIndex).toBe(5);
199
+ expect(phrases[2].fromIndex).toBe(6);
200
+ expect(phrases[2].toIndex).toBe(6);
201
+ });
202
+
203
+ it("flushes pending tokens via flushPending()", () => {
204
+ const chunker = new PhraseChunker({ maxTokensPerPhrase: 100 });
205
+ chunker.push({ ...tok(0, "Hi"), acceptedAt: 0 });
206
+ chunker.push({ ...tok(1, " there"), acceptedAt: 0 });
207
+ const tail = chunker.flushPending();
208
+ expect(tail).not.toBeNull();
209
+ expect(tail?.text).toBe("Hi there");
210
+ expect(tail?.terminator).toBe("max-cap");
211
+ });
212
+
213
+ it("flushes at a comma (clause boundary), not just sentence-final marks", () => {
214
+ const tokens: TextToken[] = [
215
+ tok(0, "Sure"),
216
+ tok(1, ","),
217
+ tok(2, " let"),
218
+ tok(3, " me"),
219
+ tok(4, " check"),
220
+ tok(5, "."),
221
+ ];
222
+ const phrases = chunkTokens(tokens, { maxTokensPerPhrase: 100 });
223
+ expect(phrases).toHaveLength(2);
224
+ expect(phrases[0].text).toBe("Sure,");
225
+ expect(phrases[0].terminator).toBe("punctuation");
226
+ expect(phrases[0].toIndex).toBe(1);
227
+ expect(phrases[1].text).toBe(" let me check.");
228
+ });
229
+
230
+ it("defaults maxTokensPerPhrase to 30 words when not supplied", () => {
231
+ const tokens: TextToken[] = Array.from({ length: 65 }, (_, i) =>
232
+ tok(i, `w${i} `),
233
+ );
234
+ // No commas / sentence-final marks → only the 30-word cap fires.
235
+ const phrases = chunkTokens(tokens, {});
236
+ expect(phrases).toHaveLength(3); // 30 + 30 + 5
237
+ expect(phrases[0].toIndex - phrases[0].fromIndex + 1).toBe(30);
238
+ expect(phrases[1].toIndex - phrases[1].fromIndex + 1).toBe(30);
239
+ expect(phrases[2].toIndex - phrases[2].fromIndex + 1).toBe(5);
240
+ expect(phrases.every((p) => p.terminator === "max-cap")).toBe(true);
241
+ });
242
+ });
243
+
244
+ describe("RollbackQueue", () => {
245
+ it("emits rollback events for in-flight phrases overlapping rejected range", () => {
246
+ const q = new RollbackQueue();
247
+ const phraseA: Phrase = {
248
+ id: 0,
249
+ text: "a.",
250
+ fromIndex: 0,
251
+ toIndex: 4,
252
+ terminator: "punctuation",
253
+ };
254
+ const phraseB: Phrase = {
255
+ id: 1,
256
+ text: "b.",
257
+ fromIndex: 5,
258
+ toIndex: 9,
259
+ terminator: "punctuation",
260
+ };
261
+ const phraseC: Phrase = {
262
+ id: 2,
263
+ text: "c.",
264
+ fromIndex: 10,
265
+ toIndex: 14,
266
+ terminator: "punctuation",
267
+ };
268
+ q.track(phraseA);
269
+ q.track(phraseB);
270
+ q.track(phraseC);
271
+ q.markPlayed(phraseA.id);
272
+ q.markRingBuffered(phraseB.id);
273
+ q.markSynthesizing(phraseC.id);
274
+
275
+ const events = q.onRejected({ fromIndex: 7, toIndex: 12 });
276
+ const ids = events.map((e) => e.phraseId).sort();
277
+ expect(ids).toEqual([1, 2]);
278
+ });
279
+
280
+ it("does not roll back already-played phrases", () => {
281
+ const q = new RollbackQueue();
282
+ const p: Phrase = {
283
+ id: 0,
284
+ text: "x",
285
+ fromIndex: 0,
286
+ toIndex: 3,
287
+ terminator: "max-cap",
288
+ };
289
+ q.track(p);
290
+ q.markPlayed(p.id);
291
+ expect(q.onRejected({ fromIndex: 1, toIndex: 2 })).toEqual([]);
292
+ });
293
+ });
294
+
295
+ describe("BargeInController", () => {
296
+ it("flips cancel signal and notifies listeners on mic activity", () => {
297
+ const c = new BargeInController();
298
+ let count = 0;
299
+ c.attach({ onCancel: () => count++ });
300
+ expect(c.cancelSignal().cancelled).toBe(false);
301
+ c.onMicActive();
302
+ expect(c.cancelSignal().cancelled).toBe(true);
303
+ expect(count).toBe(1);
304
+ });
305
+
306
+ it("reset issues a fresh cancel signal", () => {
307
+ const c = new BargeInController();
308
+ c.onMicActive();
309
+ expect(c.cancelSignal().cancelled).toBe(true);
310
+ c.reset();
311
+ expect(c.cancelSignal().cancelled).toBe(false);
312
+ });
313
+ });
314
+
315
+ describe("PcmRingBuffer", () => {
316
+ it("writes samples and flushes them to the sink", () => {
317
+ const sink = new InMemoryAudioSink();
318
+ const rb = new PcmRingBuffer(8, 24000, sink);
319
+ rb.write(new Float32Array([1, 2, 3, 4]));
320
+ expect(rb.size()).toBe(4);
321
+ rb.flushToSink();
322
+ expect(rb.size()).toBe(0);
323
+ expect(sink.totalWritten()).toBe(4);
324
+ });
325
+
326
+ it("wraps around when written past capacity (oldest dropped)", () => {
327
+ const sink = new InMemoryAudioSink();
328
+ const rb = new PcmRingBuffer(4, 24000, sink);
329
+ rb.write(new Float32Array([1, 2, 3, 4, 5, 6]));
330
+ expect(rb.size()).toBe(4);
331
+ rb.flushToSink();
332
+ expect(sink.chunks).toHaveLength(1);
333
+ expect(Array.from(sink.chunks[0].pcm)).toEqual([3, 4, 5, 6]);
334
+ });
335
+
336
+ it("drain clears buffer without writing to sink", () => {
337
+ const sink = new InMemoryAudioSink();
338
+ const rb = new PcmRingBuffer(4, 24000, sink);
339
+ rb.write(new Float32Array([1, 2, 3]));
340
+ rb.drain();
341
+ expect(rb.size()).toBe(0);
342
+ expect(sink.totalWritten()).toBe(0);
343
+ });
344
+ });
345
+
346
+ describe("PhraseCache", () => {
347
+ it("canonicalizes whitespace and case", () => {
348
+ expect(canonicalizePhraseText(" Hello World ")).toBe("hello world");
349
+ });
350
+
351
+ it("hits on canonical match", () => {
352
+ const c = new PhraseCache();
353
+ c.put({
354
+ text: "Sure.",
355
+ pcm: new Float32Array([0.5]),
356
+ sampleRate: 24000,
357
+ });
358
+ expect(c.has("sure.")).toBe(true);
359
+ expect(c.get(" SURE. ")?.pcm[0]).toBe(0.5);
360
+ });
361
+
362
+ it("evicts least-recently-used entries under a hard entry cap", () => {
363
+ const c = new PhraseCache({ maxEntries: 2 });
364
+ c.put({ text: "one", pcm: new Float32Array([1]), sampleRate: 24000 });
365
+ c.put({ text: "two", pcm: new Float32Array([2]), sampleRate: 24000 });
366
+ expect(c.get("one")?.pcm[0]).toBe(1);
367
+ c.put({ text: "three", pcm: new Float32Array([3]), sampleRate: 24000 });
368
+ expect(c.has("one")).toBe(true);
369
+ expect(c.has("two")).toBe(false);
370
+ expect(c.has("three")).toBe(true);
371
+ });
372
+
373
+ it("does not admit oversized PCM entries", () => {
374
+ const c = new PhraseCache({ maxPcmSamplesPerEntry: 2 });
375
+ c.put({
376
+ text: "too long",
377
+ pcm: new Float32Array([1, 2, 3]),
378
+ sampleRate: 24000,
379
+ });
380
+ expect(c.has("too long")).toBe(false);
381
+ });
382
+ });
383
+
384
+ describe("VoiceScheduler end-to-end", () => {
385
+ it("synthesizes phrases via test backend and emits PCM", async () => {
386
+ const backend = new FakeBackend();
387
+ const sink = new InMemoryAudioSink();
388
+ const phraseEvents: Phrase[] = [];
389
+ const audioEvents: AudioChunk[] = [];
390
+ const sched = new VoiceScheduler(
391
+ {
392
+ chunkerConfig: { maxTokensPerPhrase: 10 },
393
+ preset: makePreset(),
394
+ ringBufferCapacity: 4096,
395
+ sampleRate: 24000,
396
+ },
397
+ { backend, sink },
398
+ {
399
+ onPhrase: (p) => phraseEvents.push(p),
400
+ onAudio: (c) => audioEvents.push(c),
401
+ },
402
+ );
403
+
404
+ const tokens: TextToken[] = [
405
+ tok(0, "Hello"),
406
+ tok(1, " world"),
407
+ tok(2, "."),
408
+ tok(3, " Bye"),
409
+ tok(4, "."),
410
+ ];
411
+ for (const t of tokens) await sched.accept(t);
412
+ await sched.waitIdle();
413
+
414
+ expect(phraseEvents.map((p) => p.text)).toEqual(["Hello world.", " Bye."]);
415
+ expect(audioEvents).toHaveLength(2);
416
+ expect(backend.calls).toBe(2);
417
+ expect(sink.totalWritten()).toBeGreaterThan(0);
418
+ });
419
+
420
+ it("marks agent speaking while committed audio can be interrupted", async () => {
421
+ vi.useFakeTimers();
422
+ try {
423
+ const backend = new FakeBackend();
424
+ backend.samplesPerToken = 240;
425
+ const sink = new InMemoryAudioSink();
426
+ const sched = new VoiceScheduler(
427
+ {
428
+ chunkerConfig: { maxTokensPerPhrase: 10 },
429
+ preset: makePreset(),
430
+ ringBufferCapacity: 4096,
431
+ sampleRate: 24000,
432
+ },
433
+ { backend, sink },
434
+ );
435
+
436
+ await sched.accept(tok(0, "Hi."));
437
+ await sched.waitIdle();
438
+
439
+ expect(sink.totalWritten()).toBeGreaterThan(0);
440
+ expect(sched.bargeIn.isAgentSpeaking).toBe(true);
441
+ await vi.advanceTimersByTimeAsync(100);
442
+ expect(sched.bargeIn.isAgentSpeaking).toBe(false);
443
+ } finally {
444
+ vi.useRealTimers();
445
+ }
446
+ });
447
+
448
+ it("time-budget flushes a buffered phrase without waiting for another token", async () => {
449
+ vi.useFakeTimers();
450
+ try {
451
+ const backend = new FakeBackend();
452
+ const sink = new InMemoryAudioSink();
453
+ const phraseEvents: Phrase[] = [];
454
+ const sched = new VoiceScheduler(
455
+ {
456
+ chunkerConfig: { maxAccumulationMs: 50, maxTokensPerPhrase: 100 },
457
+ preset: makePreset(),
458
+ ringBufferCapacity: 4096,
459
+ sampleRate: 24000,
460
+ },
461
+ { backend, sink },
462
+ { onPhrase: (p) => phraseEvents.push(p) },
463
+ );
464
+
465
+ await sched.accept(tok(0, "Hello"));
466
+ expect(phraseEvents).toHaveLength(0);
467
+
468
+ await vi.advanceTimersByTimeAsync(51);
469
+ await sched.waitIdle();
470
+
471
+ expect(phraseEvents.map((p) => p.text)).toEqual(["Hello"]);
472
+ expect(phraseEvents[0].terminator).toBe("max-cap");
473
+ expect(backend.calls).toBe(1);
474
+ expect(sink.totalWritten()).toBeGreaterThan(0);
475
+ } finally {
476
+ vi.useRealTimers();
477
+ }
478
+ });
479
+
480
+ it("streams TTS chunks into the ring buffer and caches the assembled phrase", async () => {
481
+ const backend = new StreamingBackend();
482
+ const sink = new InMemoryAudioSink();
483
+ const firstAudioEvents: VoiceSchedulerTelemetryEvent[] = [];
484
+ const sched = new VoiceScheduler(
485
+ {
486
+ chunkerConfig: { maxTokensPerPhrase: 10 },
487
+ preset: makePreset(),
488
+ ringBufferCapacity: 4096,
489
+ sampleRate: 24000,
490
+ },
491
+ { backend, sink },
492
+ {
493
+ onTelemetry: (event) => {
494
+ if (event.type === "tts-first-audio") firstAudioEvents.push(event);
495
+ },
496
+ },
497
+ );
498
+
499
+ await sched.accept(tok(0, "Stream"));
500
+ await sched.accept(tok(1, "."));
501
+ await sched.waitIdle();
502
+ await sched.accept(tok(2, " stream"));
503
+ await sched.accept(tok(3, "."));
504
+ await sched.waitIdle();
505
+
506
+ expect(backend.calls).toBe(0);
507
+ expect(backend.streamCalls).toBe(1);
508
+ expect(sink.totalWritten()).toBe(10);
509
+ expect(firstAudioEvents).toHaveLength(2);
510
+ });
511
+
512
+ it("drops audio for phrases overlapping rejected token range", async () => {
513
+ const backend = new FakeBackend();
514
+ backend.delay = 20;
515
+ const sink = new InMemoryAudioSink();
516
+ const rollbacks: number[] = [];
517
+ const sched = new VoiceScheduler(
518
+ {
519
+ chunkerConfig: { maxTokensPerPhrase: 10 },
520
+ preset: makePreset(),
521
+ ringBufferCapacity: 4096,
522
+ sampleRate: 24000,
523
+ },
524
+ { backend, sink },
525
+ { onRollback: (id) => rollbacks.push(id) },
526
+ );
527
+
528
+ await sched.accept(tok(0, "First"));
529
+ await sched.accept(tok(1, " phrase"));
530
+ await sched.accept(tok(2, "."));
531
+ await sched.accept(tok(3, " Second"));
532
+ await sched.accept(tok(4, " phrase"));
533
+ await sched.accept(tok(5, "."));
534
+
535
+ await sched.reject({ fromIndex: 4, toIndex: 5 });
536
+ await sched.waitIdle();
537
+
538
+ expect(rollbacks).toContain(1);
539
+ expect(rollbacks).not.toContain(0);
540
+ expect(sink.chunks.length).toBe(1);
541
+ });
542
+
543
+ it("barge-in cancels in-flight synthesis at next kernel boundary", async () => {
544
+ const backend = new FakeBackend();
545
+ backend.delay = 30;
546
+ const sink = new InMemoryAudioSink();
547
+ let cancelEmitted = 0;
548
+ const sched = new VoiceScheduler(
549
+ {
550
+ chunkerConfig: { maxTokensPerPhrase: 10 },
551
+ preset: makePreset(),
552
+ ringBufferCapacity: 4096,
553
+ sampleRate: 24000,
554
+ },
555
+ { backend, sink },
556
+ { onCancel: () => cancelEmitted++ },
557
+ );
558
+
559
+ await sched.accept(tok(0, "Hello"));
560
+ await sched.accept(tok(1, " there"));
561
+ await sched.accept(tok(2, "."));
562
+
563
+ const ticksBefore = sched.kernelTickCount();
564
+ sched.bargeIn.onMicActive();
565
+ await sched.waitIdle();
566
+ const ticksAfter = sched.kernelTickCount();
567
+
568
+ expect(cancelEmitted).toBe(1);
569
+ expect(sched.bargeIn.cancelSignal().cancelled).toBe(true);
570
+ expect(ticksAfter - ticksBefore).toBeLessThanOrEqual(1);
571
+ expect(sink.totalWritten()).toBe(0);
572
+ });
573
+
574
+ it("drops audio for IPA-mode sub-phrase chunks overlapping rejected range", async () => {
575
+ const backend = new FakeBackend();
576
+ backend.delay = 20;
577
+ const sink = new InMemoryAudioSink();
578
+ const tokenizer = new RuleBasedEnglishPhonemeTokenizer();
579
+ const rollbacks: number[] = [];
580
+ const sched = new VoiceScheduler(
581
+ {
582
+ chunkerConfig: {
583
+ maxTokensPerPhrase: 100,
584
+ chunkOn: "phoneme-stream",
585
+ phonemesPerChunk: 4,
586
+ },
587
+ preset: makePreset(),
588
+ ringBufferCapacity: 4096,
589
+ sampleRate: 24000,
590
+ },
591
+ { backend, sink, phonemeTokenizer: tokenizer },
592
+ { onRollback: (id) => rollbacks.push(id) },
593
+ );
594
+
595
+ // Each token is 4 chars => 4 phonemes => exactly one chunk per token.
596
+ await sched.accept(tok(0, "abcd"));
597
+ await sched.accept(tok(1, "efgh"));
598
+ await sched.accept(tok(2, "ijkl"));
599
+
600
+ // Reject token 1; chunk #1 (token range [1..1]) must roll back; #0 stays.
601
+ await sched.reject({ fromIndex: 1, toIndex: 1 });
602
+ await sched.waitIdle();
603
+
604
+ expect(rollbacks).toContain(1);
605
+ expect(rollbacks).not.toContain(0);
606
+ });
607
+
608
+ it("uses phrase cache for precomputed common utterances (no backend call)", async () => {
609
+ const backend = new FakeBackend();
610
+ const sink = new InMemoryAudioSink();
611
+ const phraseCache = new PhraseCache();
612
+ phraseCache.put({
613
+ text: "Sure.",
614
+ pcm: new Float32Array([0.42, 0.42, 0.42]),
615
+ sampleRate: 24000,
616
+ });
617
+ const sched = new VoiceScheduler(
618
+ {
619
+ chunkerConfig: { maxTokensPerPhrase: 10 },
620
+ preset: makePreset(),
621
+ ringBufferCapacity: 4096,
622
+ sampleRate: 24000,
623
+ },
624
+ { backend, sink, phraseCache },
625
+ );
626
+
627
+ await sched.accept(tok(0, "Sure"));
628
+ await sched.accept(tok(1, "."));
629
+ await sched.waitIdle();
630
+
631
+ expect(backend.calls).toBe(0);
632
+ expect(sink.chunks).toHaveLength(1);
633
+ expect(sink.chunks[0].pcm).toHaveLength(3);
634
+ for (const v of sink.chunks[0].pcm) {
635
+ expect(v).toBeCloseTo(0.42, 5);
636
+ }
637
+ });
638
+
639
+ it("opportunistically caches synthesized phrases for repeated stream text", async () => {
640
+ const backend = new FakeBackend();
641
+ const sink = new InMemoryAudioSink();
642
+ const sched = new VoiceScheduler(
643
+ {
644
+ chunkerConfig: { maxTokensPerPhrase: 10 },
645
+ preset: makePreset(),
646
+ ringBufferCapacity: 4096,
647
+ sampleRate: 24000,
648
+ },
649
+ { backend, sink },
650
+ );
651
+
652
+ await sched.accept(tok(0, "Okay"));
653
+ await sched.accept(tok(1, "."));
654
+ await sched.waitIdle();
655
+ await sched.accept(tok(2, " OKAY"));
656
+ await sched.accept(tok(3, "."));
657
+ await sched.waitIdle();
658
+
659
+ expect(backend.calls).toBe(1);
660
+ expect(sink.chunks).toHaveLength(2);
661
+ });
662
+
663
+ it("opportunistically caches direct TEXT_TO_SPEECH calls", async () => {
664
+ const backend = new FakeBackend();
665
+ const sched = new VoiceScheduler(
666
+ {
667
+ chunkerConfig: { maxTokensPerPhrase: 10 },
668
+ preset: makePreset(),
669
+ ringBufferCapacity: 4096,
670
+ sampleRate: 24000,
671
+ },
672
+ { backend },
673
+ );
674
+
675
+ const first = await sched.synthesizeText("One moment.");
676
+ const second = await sched.synthesizeText(" one moment. ");
677
+
678
+ expect(backend.calls).toBe(1);
679
+ expect(Array.from(second.pcm)).toEqual(Array.from(first.pcm));
680
+ });
681
+
682
+ it("pauses TTS on a provisional barge-in, resumes on a blip (no audio lost)", async () => {
683
+ const backend = new FakeBackend();
684
+ const sink = new InMemoryAudioSink();
685
+ const paused: number[] = [];
686
+ const resumed: number[] = [];
687
+ const sched = new VoiceScheduler(
688
+ {
689
+ chunkerConfig: { maxTokensPerPhrase: 10 },
690
+ preset: makePreset(),
691
+ ringBufferCapacity: 4096,
692
+ sampleRate: 24000,
693
+ },
694
+ { backend, sink },
695
+ {
696
+ onTtsPause: () => paused.push(1),
697
+ onTtsResume: () => resumed.push(1),
698
+ },
699
+ );
700
+ // Fake VAD source the barge-in controller binds to.
701
+ const listeners = new Set<(e: VadEvent) => void>();
702
+ sched.bargeIn.bindVad({
703
+ onVadEvent: (l) => {
704
+ listeners.add(l);
705
+ return () => listeners.delete(l);
706
+ },
707
+ });
708
+ sched.bargeIn.setAgentSpeaking(true);
709
+ const emit = (e: VadEvent) => {
710
+ for (const l of listeners) l(e);
711
+ };
712
+
713
+ // Agent speaking → a VAD voice hit pauses playback.
714
+ emit({
715
+ type: "speech-active",
716
+ timestampMs: 1,
717
+ probability: 0.9,
718
+ speechDurationMs: 100,
719
+ });
720
+ expect(sched.ttsPaused).toBe(true);
721
+ expect(paused).toHaveLength(1);
722
+
723
+ // A phrase synthesized while paused stays buffered (not flushed to sink).
724
+ await sched.accept(tok(0, "Hold"));
725
+ await sched.accept(tok(1, " on"));
726
+ await sched.accept(tok(2, "."));
727
+ await sched.waitIdle();
728
+ expect(sink.totalWritten()).toBe(0);
729
+ expect(sched.ringBuffer.size()).toBeGreaterThan(0);
730
+
731
+ // Blip → not real speech → resume; the buffered PCM flushes to the sink.
732
+ emit({ type: "blip", timestampMs: 2, durationMs: 30, peakRms: 0.2 });
733
+ expect(sched.ttsPaused).toBe(false);
734
+ expect(resumed).toHaveLength(1);
735
+ expect(sink.totalWritten()).toBeGreaterThan(0);
736
+ });
737
+
738
+ it("hard-stop barge-in drains the ring buffer and cancels in-flight TTS", async () => {
739
+ const backend = new FakeBackend();
740
+ backend.delay = 25;
741
+ const sink = new InMemoryAudioSink();
742
+ let cancels = 0;
743
+ const sched = new VoiceScheduler(
744
+ {
745
+ chunkerConfig: { maxTokensPerPhrase: 10 },
746
+ preset: makePreset(),
747
+ ringBufferCapacity: 4096,
748
+ sampleRate: 24000,
749
+ },
750
+ { backend, sink },
751
+ { onCancel: () => cancels++ },
752
+ );
753
+ const listeners = new Set<(e: VadEvent) => void>();
754
+ sched.bargeIn.bindVad({
755
+ onVadEvent: (l) => {
756
+ listeners.add(l);
757
+ return () => listeners.delete(l);
758
+ },
759
+ });
760
+ sched.bargeIn.setAgentSpeaking(true);
761
+ const emit = (e: VadEvent) => {
762
+ for (const l of listeners) l(e);
763
+ };
764
+
765
+ await sched.accept(tok(0, "Just"));
766
+ await sched.accept(tok(1, " a"));
767
+ await sched.accept(tok(2, "."));
768
+ // Provisional pause then ASR-confirmed words → hard-stop.
769
+ emit({
770
+ type: "speech-active",
771
+ timestampMs: 1,
772
+ probability: 0.9,
773
+ speechDurationMs: 100,
774
+ });
775
+ sched.bargeIn.onWordsDetected({
776
+ wordCount: 2,
777
+ partialText: "no wait",
778
+ timestampMs: 2,
779
+ });
780
+
781
+ await sched.waitIdle();
782
+ expect(cancels).toBe(1);
783
+ expect(sched.ttsPaused).toBe(false);
784
+ expect(sched.ringBuffer.size()).toBe(0);
785
+ expect(sink.totalWritten()).toBe(0);
786
+ });
787
+
788
+ it("hard-stop calls native TTS cancel for an active streaming backend", async () => {
789
+ const backend = new PausingStreamingBackend();
790
+ const sink = new InMemoryAudioSink();
791
+ const sched = new VoiceScheduler(
792
+ {
793
+ chunkerConfig: { maxTokensPerPhrase: 10 },
794
+ preset: makePreset(),
795
+ ringBufferCapacity: 4096,
796
+ sampleRate: 24000,
797
+ },
798
+ { backend, sink },
799
+ );
800
+ const listeners = new Set<(e: VadEvent) => void>();
801
+ sched.bargeIn.bindVad({
802
+ onVadEvent: (l) => {
803
+ listeners.add(l);
804
+ return () => listeners.delete(l);
805
+ },
806
+ });
807
+ sched.bargeIn.setAgentSpeaking(true);
808
+ const emit = (e: VadEvent) => {
809
+ for (const l of listeners) l(e);
810
+ };
811
+
812
+ emit({
813
+ type: "speech-active",
814
+ timestampMs: 1,
815
+ probability: 0.9,
816
+ speechDurationMs: 100,
817
+ });
818
+ await sched.accept(tok(0, "Wait"));
819
+ await sched.accept(tok(1, "."));
820
+ await backend.afterFirstChunk;
821
+ expect(sched.ringBuffer.size()).toBeGreaterThan(0);
822
+
823
+ sched.bargeIn.onWordsDetected({
824
+ wordCount: 1,
825
+ partialText: "stop",
826
+ timestampMs: 2,
827
+ });
828
+ expect(backend.cancelCalls).toBe(1);
829
+ expect(sched.ringBuffer.size()).toBe(0);
830
+
831
+ backend.finish();
832
+ await sched.waitIdle();
833
+ // Prefix-preserving rollback may have replayed audio for already-committed
834
+ // tokens into the sink; what matters is the ring buffer is clear and cancel
835
+ // was signalled — not that zero bytes reached the sink.
836
+ expect(sched.ringBuffer.size()).toBe(0);
837
+ });
838
+
839
+ it("rejecting an active streaming phrase calls native cancel before the stream finishes", async () => {
840
+ const backend = new PausingStreamingBackend();
841
+ const sink = new InMemoryAudioSink();
842
+ const rollbacks: number[] = [];
843
+ const sched = new VoiceScheduler(
844
+ {
845
+ chunkerConfig: { maxTokensPerPhrase: 10 },
846
+ preset: makePreset(),
847
+ ringBufferCapacity: 4096,
848
+ sampleRate: 24000,
849
+ },
850
+ { backend, sink },
851
+ { onRollback: (id) => rollbacks.push(id) },
852
+ );
853
+
854
+ await sched.accept(tok(0, "Wait"));
855
+ await sched.accept(tok(1, "."));
856
+ await backend.afterFirstChunk;
857
+
858
+ expect(sched.rollback.snapshot()).toEqual([
859
+ {
860
+ phrase: {
861
+ id: 0,
862
+ text: "Wait.",
863
+ fromIndex: 0,
864
+ toIndex: 1,
865
+ terminator: "punctuation",
866
+ },
867
+ state: "ringbuffered",
868
+ },
869
+ ]);
870
+
871
+ await sched.reject({ fromIndex: 1, toIndex: 1 });
872
+ expect(rollbacks).toEqual([0]);
873
+ expect(backend.cancelCalls).toBe(1);
874
+
875
+ backend.finish();
876
+ await sched.waitIdle();
877
+ expect(backend.streamCalls).toBe(1);
878
+ });
879
+
880
+ it("cancelPendingTts drops not-yet-spoken audio without signalling a barge-in", async () => {
881
+ const backend = new FakeBackend();
882
+ backend.delay = 20;
883
+ const sink = new InMemoryAudioSink();
884
+ let cancels = 0;
885
+ const sched = new VoiceScheduler(
886
+ {
887
+ chunkerConfig: { maxTokensPerPhrase: 10 },
888
+ preset: makePreset(),
889
+ ringBufferCapacity: 4096,
890
+ sampleRate: 24000,
891
+ },
892
+ { backend, sink },
893
+ { onCancel: () => cancels++ },
894
+ );
895
+ await sched.accept(tok(0, "Speculative"));
896
+ await sched.accept(tok(1, "."));
897
+ sched.cancelPendingTts();
898
+ await sched.waitIdle();
899
+ expect(sched.ringBuffer.size()).toBe(0);
900
+ expect(sink.totalWritten()).toBe(0);
901
+ // No barge-in signalled.
902
+ expect(cancels).toBe(0);
903
+ });
904
+ });
905
+
906
+ describe("VoicePresetFormat", () => {
907
+ it("round-trips a synthetic preset (embedding + phrase cache seed)", () => {
908
+ const embedding = new Float32Array([0.1, -0.2, 0.3, 0.4]);
909
+ const phrases = [
910
+ {
911
+ text: "sure.",
912
+ sampleRate: 24000,
913
+ pcm: new Float32Array([0.1, 0.2, 0.3]),
914
+ },
915
+ {
916
+ text: "one moment.",
917
+ sampleRate: 24000,
918
+ pcm: new Float32Array([0.4, 0.5, 0.6, 0.7]),
919
+ },
920
+ ];
921
+ const blob = writeVoicePresetFile({ embedding, phrases });
922
+ const parsed = readVoicePresetFile(blob);
923
+ expect(parsed.version).toBe(1);
924
+ expect(parsed.embedding.length).toBe(embedding.length);
925
+ for (let i = 0; i < embedding.length; i++) {
926
+ expect(parsed.embedding[i]).toBeCloseTo(embedding[i], 5);
927
+ }
928
+ expect(parsed.phrases).toHaveLength(2);
929
+ expect(parsed.phrases[0].text).toBe("sure.");
930
+ expect(parsed.phrases[0].sampleRate).toBe(24000);
931
+ expect(parsed.phrases[0].pcm.length).toBe(3);
932
+ expect(parsed.phrases[0].pcm[0]).toBeCloseTo(0.1, 5);
933
+ expect(parsed.phrases[0].pcm[1]).toBeCloseTo(0.2, 5);
934
+ expect(parsed.phrases[0].pcm[2]).toBeCloseTo(0.3, 5);
935
+ expect(parsed.phrases[1].text).toBe("one moment.");
936
+ expect(parsed.phrases[1].pcm.length).toBe(4);
937
+ });
938
+
939
+ it("round-trips an empty phrase cache seed (N=0)", () => {
940
+ const embedding = new Float32Array([1, 2, 3]);
941
+ const blob = writeVoicePresetFile({ embedding, phrases: [] });
942
+ const parsed = readVoicePresetFile(blob);
943
+ expect(parsed.phrases).toHaveLength(0);
944
+ expect(parsed.embedding.length).toBe(3);
945
+ expect(parsed.embedding[0]).toBeCloseTo(1, 5);
946
+ expect(parsed.embedding[1]).toBeCloseTo(2, 5);
947
+ expect(parsed.embedding[2]).toBeCloseTo(3, 5);
948
+ });
949
+
950
+ it("rejects bad magic with VoicePresetFormatError", () => {
951
+ const bytes = new Uint8Array(24);
952
+ expect(() => readVoicePresetFile(bytes)).toThrow(VoicePresetFormatError);
953
+ });
954
+
955
+ it("rejects truncated header", () => {
956
+ const bytes = new Uint8Array(8);
957
+ expect(() => readVoicePresetFile(bytes)).toThrow(VoicePresetFormatError);
958
+ });
959
+
960
+ it("rejects unsupported version", () => {
961
+ const blob = writeVoicePresetFile({
962
+ embedding: new Float32Array([0]),
963
+ phrases: [],
964
+ });
965
+ new DataView(blob.buffer).setUint32(4, 999, true);
966
+ expect(() => readVoicePresetFile(blob)).toThrow(VoicePresetFormatError);
967
+ });
968
+ });
969
+
970
+ describe("PhraseCache.seed", () => {
971
+ it("pre-populates the cache from voice-preset seed entries", () => {
972
+ const cache = new PhraseCache();
973
+ cache.seed([
974
+ {
975
+ text: "sure.",
976
+ pcm: new Float32Array([0.1]),
977
+ sampleRate: 24000,
978
+ },
979
+ {
980
+ text: "one moment.",
981
+ pcm: new Float32Array([0.2]),
982
+ sampleRate: 24000,
983
+ },
984
+ ]);
985
+ expect(cache.size()).toBe(2);
986
+ expect(cache.has("Sure.")).toBe(true);
987
+ expect(cache.get("ONE MOMENT.")?.pcm[0]).toBeCloseTo(0.2, 5);
988
+ });
989
+ });
990
+
991
+ describe("PhraseChunker IPA mode", () => {
992
+ it("punctuation mode (default) is unchanged when no tokenizer is passed", () => {
993
+ const tokens: TextToken[] = [
994
+ tok(0, "Hello"),
995
+ tok(1, " world"),
996
+ tok(2, "."),
997
+ ];
998
+ const phrases = chunkTokens(tokens, { maxTokensPerPhrase: 100 });
999
+ expect(phrases).toHaveLength(1);
1000
+ expect(phrases[0].terminator).toBe("punctuation");
1001
+ expect(phrases[0].text).toBe("Hello world.");
1002
+ });
1003
+
1004
+ it("phoneme-stream mode emits sub-phrase chunks at phoneme boundaries", () => {
1005
+ const tokenizer = new RuleBasedEnglishPhonemeTokenizer();
1006
+ // 'abcde' = 5 approximate phonemes, 'fgh' = 3, 'ij' = 2.
1007
+ // Cumulative phoneme count after each: 5, 8, 10.
1008
+ // With phonemesPerChunk=4: token 0 alone => 5 >= 4 => chunk #0.
1009
+ // Then token 1 (3) + token 2 (2) = 5 >= 4 after token 2 => chunk #1.
1010
+ const tokens: TextToken[] = [tok(0, "abcde"), tok(1, "fgh"), tok(2, "ij")];
1011
+ const phrases = chunkTokens(
1012
+ tokens,
1013
+ {
1014
+ maxTokensPerPhrase: 100,
1015
+ chunkOn: "phoneme-stream",
1016
+ phonemesPerChunk: 4,
1017
+ },
1018
+ 0,
1019
+ tokenizer,
1020
+ );
1021
+ expect(phrases).toHaveLength(2);
1022
+ expect(phrases[0].terminator).toBe("phoneme-stream");
1023
+ expect(phrases[0].fromIndex).toBe(0);
1024
+ expect(phrases[0].toIndex).toBe(0);
1025
+ expect(phrases[0].text).toBe("abcde");
1026
+ expect(phrases[1].fromIndex).toBe(1);
1027
+ expect(phrases[1].toIndex).toBe(2);
1028
+ expect(phrases[1].text).toBe("fghij");
1029
+ });
1030
+
1031
+ it("phoneme-stream mode still respects punctuation as a hard boundary", () => {
1032
+ const tokenizer = new RuleBasedEnglishPhonemeTokenizer();
1033
+ const tokens: TextToken[] = [tok(0, "hi"), tok(1, ".")];
1034
+ const phrases = chunkTokens(
1035
+ tokens,
1036
+ {
1037
+ maxTokensPerPhrase: 100,
1038
+ chunkOn: "phoneme-stream",
1039
+ phonemesPerChunk: 16,
1040
+ },
1041
+ 0,
1042
+ tokenizer,
1043
+ );
1044
+ expect(phrases).toHaveLength(1);
1045
+ expect(phrases[0].terminator).toBe("punctuation");
1046
+ });
1047
+
1048
+ it("throws if phoneme-stream mode is selected without a tokenizer", () => {
1049
+ expect(() =>
1050
+ chunkTokens(
1051
+ [tok(0, "x")],
1052
+ { maxTokensPerPhrase: 100, chunkOn: "phoneme-stream" },
1053
+ 0,
1054
+ null,
1055
+ ),
1056
+ ).toThrow();
1057
+ });
1058
+
1059
+ it("RuleBasedEnglishPhonemeTokenizer emits approximate IPA", () => {
1060
+ const t = new RuleBasedEnglishPhonemeTokenizer();
1061
+ expect(t.quality).toBe("approximate");
1062
+ expect(t.name).toBe("RuleBasedEnglishPhonemeTokenizer");
1063
+ expect(t.tokenize("hello", 7)).toEqual([
1064
+ { ipa: "h", sourceTokenIndex: 7 },
1065
+ { ipa: "ə", sourceTokenIndex: 7 },
1066
+ { ipa: "l", sourceTokenIndex: 7 },
1067
+ { ipa: "oʊ", sourceTokenIndex: 7 },
1068
+ ]);
1069
+ });
1070
+ });