@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,699 @@
1
+ export interface TextToken {
2
+ index: number;
3
+ text: string;
4
+ /**
5
+ * Text-model vocabulary token id, when the producer knows it. ASR
6
+ * (fused Qwen3-ASR) and the text backbone share the Qwen2 BPE 151 936
7
+ * vocab + merges (AGENTS.md §1), so an ASR-emitted token id is the same
8
+ * id the text model would assign — a downstream in-process handoff can
9
+ * inject `id` directly into the text KV cache without detokenize →
10
+ * retokenize. Absent for producers that only have surface text (the
11
+ * word-chunk approximation in `splitTranscriptToTokens`).
12
+ */
13
+ id?: number;
14
+ }
15
+
16
+ export interface AcceptedToken extends TextToken {
17
+ acceptedAt: number;
18
+ }
19
+
20
+ export interface RejectedTokenRange {
21
+ fromIndex: number;
22
+ toIndex: number;
23
+ }
24
+
25
+ export interface Phrase {
26
+ id: number;
27
+ text: string;
28
+ fromIndex: number;
29
+ toIndex: number;
30
+ terminator: "punctuation" | "max-cap" | "phoneme-stream";
31
+ }
32
+
33
+ export interface AudioChunk {
34
+ phraseId: number;
35
+ fromIndex: number;
36
+ toIndex: number;
37
+ pcm: Float32Array;
38
+ sampleRate: number;
39
+ }
40
+
41
+ /**
42
+ * Reference-audio-token payload mirrored on `SpeakerPreset` for v2 presets.
43
+ * Empty (K=0, refT=0, tokens.length=0) when the preset has no reference
44
+ * audio attached (instruct-only voice or legacy v1 file).
45
+ */
46
+ export interface SpeakerPresetRefAudioTokens {
47
+ K: number;
48
+ refT: number;
49
+ tokens: Int32Array;
50
+ }
51
+
52
+ export interface SpeakerPreset {
53
+ voiceId: string;
54
+ embedding: Float32Array;
55
+ bytes: Uint8Array;
56
+ /**
57
+ * Preset file format version. `1` for legacy Kokoro-style presets; `2`
58
+ * for the OmniVoice freeze format that also carries `refAudioTokens`,
59
+ * `refText`, and `instruct`. Defaulted to `1` for older readers that
60
+ * haven't been rebuilt yet.
61
+ */
62
+ version?: number;
63
+ /**
64
+ * OmniVoice reference-audio-token packet (`[K, refT]` int32). Empty for
65
+ * v1 files and v2 files that intentionally ship instruct-only voices.
66
+ * The FFI bridge passes the tokens through to `params.ref_audio_tokens`
67
+ * + `params.ref_T`.
68
+ */
69
+ refAudioTokens?: SpeakerPresetRefAudioTokens;
70
+ /**
71
+ * UTF-8 transcript of the reference clip that produced `refAudioTokens`.
72
+ * The FFI bridge passes this through to `params.ref_text`. Empty when
73
+ * the preset is instruct-only or v1.
74
+ */
75
+ refText?: string;
76
+ /**
77
+ * Resolved VoiceDesign instruct string (e.g. `"female, young adult,
78
+ * american accent, moderate pitch"`). The FFI bridge passes this
79
+ * through to `params.instruct` instead of the historical "use the
80
+ * voiceId as the instruct string" misreading.
81
+ */
82
+ instruct?: string;
83
+ /**
84
+ * Free-form metadata attached at freeze time (codec sha256, corpus hash,
85
+ * source bundle id, etc.). The runtime never relies on this for
86
+ * correctness.
87
+ */
88
+ metadata?: Record<string, unknown>;
89
+ }
90
+
91
+ export interface AudioSink {
92
+ write(pcm: Float32Array, sampleRate: number): void;
93
+ drain(): void;
94
+ bufferedSamples(): number;
95
+ }
96
+
97
+ export interface OmniVoiceBackend {
98
+ synthesize(args: {
99
+ phrase: Phrase;
100
+ preset: SpeakerPreset;
101
+ cancelSignal: { cancelled: boolean };
102
+ onKernelTick?: () => void;
103
+ }): Promise<AudioChunk>;
104
+ }
105
+
106
+ /**
107
+ * One PCM segment delivered by a streaming OmniVoice runtime. This is the
108
+ * scheduler-facing TypeScript contract for the native streaming ABI extension:
109
+ * the current v1/batch ABI remains valid, and backends that implement this
110
+ * seam can additionally surface first-audio before a full phrase finishes.
111
+ */
112
+ export interface TtsPcmChunk {
113
+ pcm: Float32Array;
114
+ sampleRate: number;
115
+ isFinal: boolean;
116
+ }
117
+
118
+ export interface StreamingTtsBackend {
119
+ synthesizeStream(args: {
120
+ phrase: Phrase;
121
+ preset: SpeakerPreset;
122
+ cancelSignal: { cancelled: boolean };
123
+ onChunk: (chunk: TtsPcmChunk) => boolean | undefined;
124
+ onKernelTick?: () => void;
125
+ }): Promise<{ cancelled: boolean }>;
126
+ }
127
+
128
+ /** Opaque native handle for a streaming ASR session in the v2 ABI shape. */
129
+ export type StreamingAsrHandle = bigint;
130
+
131
+ /**
132
+ * TS-only v2 streaming ABI contract. Implementations can satisfy this beside
133
+ * the existing synchronous v1 methods; callers should test the support flags
134
+ * rather than probe-and-catch. Native bindings may carry context handles on
135
+ * top of this shape; the scheduler-facing stream semantics stay the same.
136
+ */
137
+ export interface VoiceStreamingAbiV2 {
138
+ ttsStreamSupported(): boolean;
139
+ ttsSynthesizeStream(args: {
140
+ text: string;
141
+ speakerPresetId: string | null;
142
+ onChunk: (chunk: {
143
+ pcm: Float32Array;
144
+ isFinal: boolean;
145
+ }) => boolean | undefined;
146
+ }): { cancelled: boolean };
147
+ cancelTts(): void;
148
+ asrStreamSupported(): boolean;
149
+ asrStreamOpen(args: { sampleRateHz: number }): StreamingAsrHandle;
150
+ asrStreamFeed(args: { stream: StreamingAsrHandle; pcm: Float32Array }): void;
151
+ asrStreamPartial(args: {
152
+ stream: StreamingAsrHandle;
153
+ maxTextBytes?: number;
154
+ maxTokens?: number;
155
+ }): { partial: string; tokens?: number[] };
156
+ asrStreamFinish(args: {
157
+ stream: StreamingAsrHandle;
158
+ maxTextBytes?: number;
159
+ maxTokens?: number;
160
+ }): { partial: string; tokens?: number[] };
161
+ asrStreamClose(stream: StreamingAsrHandle): void;
162
+ }
163
+
164
+ export interface TranscriptionAudio {
165
+ pcm: Float32Array;
166
+ sampleRate: number;
167
+ }
168
+
169
+ export type VoiceInputKind =
170
+ | "local_mic"
171
+ | "discord"
172
+ | "telegram"
173
+ | "signal"
174
+ | "whatsapp"
175
+ | "phone"
176
+ | "browser"
177
+ | "file"
178
+ | "unknown";
179
+
180
+ /**
181
+ * Where speech audio entered the voice loop. Keep this structural so local
182
+ * mic, Discord, phone, and connector captures can share the same
183
+ * turn-taking and attribution path without branching on prompt text.
184
+ */
185
+ export interface VoiceInputSource {
186
+ kind: VoiceInputKind;
187
+ /** Connector account, device, guild/channel, call, or upload id. */
188
+ sourceId?: string;
189
+ roomId?: string;
190
+ conversationId?: string;
191
+ messageId?: string;
192
+ deviceId?: string;
193
+ connectorAccountId?: string;
194
+ channelId?: string;
195
+ guildId?: string;
196
+ callId?: string;
197
+ participantId?: string;
198
+ metadata?: Record<string, unknown>;
199
+ }
200
+
201
+ /**
202
+ * Speaker attribution for diarized speech. `imprintClusterId` is evidence,
203
+ * not identity: callers that want to attach this to a LifeOps person must
204
+ * submit a normal `EntityStore.observeIdentity` observation with this
205
+ * cluster/observation id in its evidence list. Do not use voice imprints as
206
+ * a parallel identity graph or as authorization for voice synthesis.
207
+ */
208
+ export interface VoiceSpeaker {
209
+ id: string;
210
+ label?: string;
211
+ displayName?: string;
212
+ source?: VoiceInputSource;
213
+ imprintClusterId?: string;
214
+ imprintObservationId?: string;
215
+ entityId?: string;
216
+ confidence?: number;
217
+ isLocalUser?: boolean;
218
+ metadata?: Record<string, unknown>;
219
+ }
220
+
221
+ /** One diarized span within a transcript snapshot or finalized voice turn. */
222
+ export interface VoiceSegment {
223
+ id?: string;
224
+ text: string;
225
+ startMs: number;
226
+ endMs: number;
227
+ speaker?: VoiceSpeaker;
228
+ speakerId?: string;
229
+ source?: VoiceInputSource;
230
+ confidence?: number;
231
+ tokens?: number[];
232
+ metadata?: Record<string, unknown>;
233
+ }
234
+
235
+ export interface VoiceDiarizationMetadata {
236
+ provider: "local" | "connector" | "cloud" | "unknown";
237
+ model?: string;
238
+ version?: string;
239
+ confidence?: number;
240
+ metadata?: Record<string, unknown>;
241
+ }
242
+
243
+ export interface VoiceTurnMetadata {
244
+ turnId?: string;
245
+ source?: VoiceInputSource;
246
+ primarySpeaker?: VoiceSpeaker;
247
+ segments?: VoiceSegment[];
248
+ startedAtMs?: number;
249
+ endedAtMs?: number;
250
+ diarization?: VoiceDiarizationMetadata;
251
+ metadata?: Record<string, unknown>;
252
+ }
253
+
254
+ /* -------------------------------------------------------------------- *
255
+ * Streaming ASR — frame-fed transcription with incremental partials.
256
+ *
257
+ * Owned jointly by the transcriber adapters (`voice/transcriber.ts`), the
258
+ * VAD gating + barge-in word-confirm (`voice/vad.ts`, `voice/barge-in.ts`),
259
+ * the turn controller / speculative-on-pause path, and the overlapped
260
+ * `VoicePipeline` (`voice/pipeline.ts`). The `StreamingTranscriber` below
261
+ * is the single ASR contract; the two fused adapters (fused Qwen3-ASR
262
+ * streaming and fused batch, both via libelizainference) implement it in
263
+ * `voice/transcriber.ts`. It consumes the canonical `PcmFrame` (defined
264
+ * below in the audio front-end section) off a `MicSource` and is gated by
265
+ * the `VadEvent` stream. The `VoicePipeline` drives the same contract as a
266
+ * batch (feed the whole utterance buffer, `flush()`, split the final
267
+ * transcript into contiguous text tokens) — there is no separate batch ASR
268
+ * interface.
269
+ * -------------------------------------------------------------------- */
270
+
271
+ /** A running or final transcript snapshot from a `StreamingTranscriber`. */
272
+ export interface TranscriptUpdate {
273
+ /** The full running transcript (not a delta) at this point. */
274
+ partial: string;
275
+ /** True for the snapshot emitted by `flush()` / on `speech-end`. */
276
+ isFinal: boolean;
277
+ /** Channel/device/call metadata for attribution and storage. */
278
+ source?: VoiceInputSource;
279
+ /** Best speaker attribution for single-speaker snapshots. */
280
+ speaker?: VoiceSpeaker;
281
+ /** Diarized spans for multi-speaker snapshots, when available. */
282
+ segments?: VoiceSegment[];
283
+ /** Turn-level metadata carried through to generation and storage. */
284
+ turn?: VoiceTurnMetadata;
285
+ /**
286
+ * Text-model token ids for `partial`, when the backend can supply them
287
+ * cheaply (fused Qwen3-ASR shares the text vocabulary). Absent when the
288
+ * decoder reports surface text only (re-tokenization is the LLM stage's
289
+ * job there).
290
+ */
291
+ tokens?: number[];
292
+ /**
293
+ * Voice-side emotion attribution attached to `isFinal` snapshots only.
294
+ * Running partials never carry this — the acoustic classifier wants a
295
+ * stable utterance window and the lexicon read on partial text is noise.
296
+ * Produced by `attributeVoiceEmotion()` after fusing the acoustic
297
+ * classifier output (`VoiceEmotionClassifier`) with text-side evidence;
298
+ * the fusion rule lives in `emotion-attribution.ts` so no consumer
299
+ * re-implements it. See R3-emotion §3 + §5.
300
+ */
301
+ voiceEmotion?: import("./emotion-attribution").VoiceEmotionAttribution;
302
+ }
303
+
304
+ /** Events a `StreamingTranscriber` emits while consuming PCM frames. */
305
+ export type TranscriberEvent =
306
+ | { kind: "partial"; update: TranscriptUpdate }
307
+ | { kind: "final"; update: TranscriptUpdate }
308
+ /**
309
+ * Fired the first instant ≥1 real word is recognized in the current
310
+ * speech segment. Wired to W1's barge-in word-confirm gate
311
+ * (`onWordsDetected`) so the agent hard-stops TTS + aborts in-flight
312
+ * LLM/drafter generation only on real speech, not a blip.
313
+ */
314
+ | { kind: "words"; words: string[] };
315
+
316
+ export type TranscriberEventListener = (event: TranscriberEvent) => void;
317
+
318
+ /**
319
+ * Live transcription. `feed()` is called per PCM frame off a `MicSource`.
320
+ * The adapter runs windowed decode passes internally and emits `partial`
321
+ * events as the running transcript grows; `flush()` force-finalizes (call
322
+ * it when the VAD reports `speech-end`). Implementations gate on the VAD
323
+ * event stream — they only decode while the VAD is in `speech-active`.
324
+ *
325
+ * No silent degrade: a transcriber whose backend is unavailable throws on
326
+ * construction (or on first `feed`), it does not quietly produce empty
327
+ * transcripts.
328
+ */
329
+ export interface StreamingTranscriber {
330
+ /** Feed one PCM frame. Frames received while VAD is not active are buffered/ignored per the VAD-gating policy. */
331
+ feed(frame: PcmFrame): void;
332
+ /**
333
+ * Force-finalize: drain any buffered audio, run a final decode pass,
334
+ * emit the `final` event, and resolve with the final transcript. Safe
335
+ * to call when no audio is buffered (resolves with an empty final).
336
+ * After `flush()` the transcriber is reset and ready for the next
337
+ * speech segment.
338
+ */
339
+ flush(): Promise<TranscriptUpdate>;
340
+ /** Subscribe to transcriber events. Returns an unsubscribe fn. */
341
+ on(listener: TranscriberEventListener): () => void;
342
+ /** Release any held native resources (FFI stream handle, temp files). Idempotent. */
343
+ dispose(): void;
344
+ }
345
+
346
+ export interface PhraseChunkerConfig {
347
+ /**
348
+ * Hard word cap before a phrase is force-flushed even without a
349
+ * `, . ! ? ; :` boundary. Defaults to 30 (the brief's A6 "first 30 words").
350
+ */
351
+ maxTokensPerPhrase?: number;
352
+ /**
353
+ * Characters that close a phrase. Default `, . ! ? ; :` — punctuation
354
+ * boundaries let the first clause reach TTS without waiting for a
355
+ * sentence-final mark.
356
+ */
357
+ sentenceTerminators?: ReadonlySet<string>;
358
+ /**
359
+ * Where the chunker emits a phrase boundary.
360
+ * 'punctuation' — default. Wait for `, . ! ? ; :` or the max-token cap.
361
+ * 'phoneme-stream' — additionally emit a sub-phrase chunk every
362
+ * `phonemesPerChunk` phonemes. Cuts first-audio
363
+ * latency by handing partial phrases to TTS at
364
+ * phoneme boundaries.
365
+ */
366
+ chunkOn?: "punctuation" | "phoneme-stream";
367
+ /** Phonemes per chunk in `phoneme-stream` mode. Default 8. */
368
+ phonemesPerChunk?: number;
369
+ /**
370
+ * Maximum milliseconds a phrase may sit in the chunker before the
371
+ * scheduler force-flushes it even without punctuation / phoneme / cap
372
+ * boundaries. Default 700 ms. Set to 0 to disable.
373
+ */
374
+ maxAccumulationMs?: number;
375
+ /**
376
+ * Shorter budget applied ONLY to the first phrase of each reply, so first
377
+ * audio (TTFA) plays sooner on punctuation-sparse openings while later
378
+ * phrases keep `maxAccumulationMs` (no fragmentation). When omitted,
379
+ * derives from `maxAccumulationMs` (half, capped at 350 ms) and honors the
380
+ * `ELIZA_PHRASE_FLUSH_FIRST_MS` env override. Clamped to `maxAccumulationMs`.
381
+ */
382
+ firstPhraseMaxAccumulationMs?: number;
383
+ }
384
+
385
+ export interface VerifierStreamEvent {
386
+ kind: "accept" | "reject";
387
+ tokens: TextToken[];
388
+ /**
389
+ * Optional per-event metadata. Today only the very first `accept` of a
390
+ * streaming completion carries `firstTokenMs` (L5 — time from the fetch
391
+ * being issued to the first SSE chunk arriving). Other consumers MAY
392
+ * ignore this field; producers MUST omit it on non-first events.
393
+ */
394
+ meta?: {
395
+ /** Milliseconds from request issue (`performance.now()`) to first chunk. */
396
+ firstTokenMs?: number;
397
+ };
398
+ }
399
+
400
+ // ---------------------------------------------------------------------------
401
+ // Audio front-end contract (mic capture · VAD · barge-in).
402
+ //
403
+ // Shared by W1 (this module), W2 (`StreamingTranscriber`), and W9 (the voice
404
+ // turn controller / scheduler). Two-tier design:
405
+ //
406
+ // 1. The cheap always-on RMS energy gate is the *fast* path. It only
407
+ // decides "is there acoustic activity right now". A rising edge wakes
408
+ // the response pipeline (KV-prefill, drafter preload, first-filler
409
+ // pre-generation) speculatively.
410
+ // 2. The fused Silero VAD (via the `libelizainference` native VAD ABI) is
411
+ // the *authoritative* speech/no-speech signal. It gates ASR (skip silent
412
+ // frames) and drives turn-taking.
413
+ //
414
+ // Both run on every mic frame. The RMS gate never substitutes for Silero —
415
+ // if the native VAD runtime is unavailable that is a hard "VAD unavailable"
416
+ // error, never a silent downgrade (AGENTS.md §3).
417
+ // ---------------------------------------------------------------------------
418
+
419
+ /** A fixed-size block of mono PCM samples in [-1, 1] at a known sample rate. */
420
+ export interface PcmFrame {
421
+ pcm: Float32Array;
422
+ sampleRate: number;
423
+ /**
424
+ * Monotonic timestamp (ms, `performance.now()` domain) of the *first*
425
+ * sample in this frame. Used to age VAD events and barge-in latency.
426
+ */
427
+ timestampMs: number;
428
+ }
429
+
430
+ /**
431
+ * Event emitted by `VadDetector` on the authoritative (Silero) timeline.
432
+ *
433
+ * - `speech-start` — speech onset (a run of speech frames crossed the
434
+ * onset threshold). Carries the probability of the
435
+ * triggering frame.
436
+ * - `speech-active` — a periodic heartbeat while speech is ongoing. The
437
+ * barge-in controller uses this to pause TTS.
438
+ * - `speech-pause` — speech has been quiet for `pauseStartedMs..now`
439
+ * but not long enough to count as end-of-utterance.
440
+ * The turn controller uses this to kick a speculative
441
+ * response off the partial transcript.
442
+ * - `speech-end` — end of utterance (silence held past the hangover
443
+ * window). Carries the total speech duration.
444
+ * - `blip` — a short burst of energy that the Silero VAD rejected
445
+ * (or that was too short to be speech). The barge-in
446
+ * controller treats this as "resume TTS".
447
+ */
448
+ export type VadEvent =
449
+ | { type: "speech-start"; timestampMs: number; probability: number }
450
+ | {
451
+ type: "speech-active";
452
+ timestampMs: number;
453
+ probability: number;
454
+ speechDurationMs: number;
455
+ }
456
+ | { type: "speech-pause"; timestampMs: number; pauseDurationMs: number }
457
+ | { type: "speech-end"; timestampMs: number; speechDurationMs: number }
458
+ | { type: "blip"; timestampMs: number; durationMs: number; peakRms: number };
459
+
460
+ /** Cheap RMS energy gate event — the fast pre-warm path. Distinct timeline
461
+ * from `VadEvent`; this fires with sub-frame latency and never blocks on a
462
+ * model forward pass. */
463
+ export type EnergyGateEvent =
464
+ | { type: "energy-rise"; timestampMs: number; rms: number }
465
+ | { type: "energy-fall"; timestampMs: number; quietMs: number };
466
+
467
+ export type VadEventListener = (event: VadEvent) => void;
468
+ export type EnergyGateListener = (event: EnergyGateEvent) => void;
469
+
470
+ /**
471
+ * Subscribable VAD event stream. `VadDetector` (`voice/vad.ts`) is the
472
+ * concrete implementation; the streaming transcriber and the barge-in
473
+ * controller take this structural view so they don't pull in the optional
474
+ * `onnxruntime-node` surface.
475
+ */
476
+ export interface VadEventSource {
477
+ onVadEvent(listener: VadEventListener): () => void;
478
+ }
479
+
480
+ /**
481
+ * Source of mic PCM. The desktop/Electrobun impl in `mic-source.ts` is the
482
+ * first concrete implementation; Discord / Telegram / mobile connectors
483
+ * implement the same interface so the rest of the voice loop is source-
484
+ * agnostic. A `MicSource` produces fixed-size mono frames at a fixed sample
485
+ * rate and tees them to any number of consumers (the VAD, the ring buffer
486
+ * the ASR reads from, instrumentation taps).
487
+ */
488
+ export interface MicSource {
489
+ /** Nominal sample rate of every emitted frame (Hz). */
490
+ readonly sampleRate: number;
491
+ /** Samples per emitted frame. */
492
+ readonly frameSamples: number;
493
+ /** True once `start()` has resolved and frames are flowing. */
494
+ readonly running: boolean;
495
+ /** Begin capture. Resolves when the underlying device is producing audio.
496
+ * Throws (never silently no-ops) when no mic backend is available. */
497
+ start(): Promise<void>;
498
+ /** Stop capture and release the device. Idempotent. */
499
+ stop(): Promise<void>;
500
+ /** Subscribe to PCM frames. Returns an unsubscribe function. */
501
+ onFrame(listener: (frame: PcmFrame) => void): () => void;
502
+ /** Subscribe to fatal capture errors (device lost, process died). The
503
+ * source is no longer `running` after one of these. */
504
+ onError(listener: (error: Error) => void): () => void;
505
+ }
506
+
507
+ /**
508
+ * Cancellation token threaded from the barge-in controller down through the
509
+ * voice scheduler (TTS) *and* the engine layer (in-flight LLM / MTP
510
+ * drafter generation). `cancelled` is a plain boolean so the synthesis loop
511
+ * and the SSE-consuming generate loop can both poll it cheaply at a kernel
512
+ * boundary; `reason` records *why* for diagnostics; `signal` is the standard
513
+ * `AbortSignal` the engine's HTTP/stream layer aborts on.
514
+ *
515
+ * (W1 owns the controller; W9 threads `signal` into `dispatcher.generate`.)
516
+ */
517
+ export interface BargeInCancelToken {
518
+ cancelled: boolean;
519
+ reason: "barge-in-words" | "manual" | null;
520
+ readonly signal: AbortSignal;
521
+ }
522
+
523
+ /** Signal emitted by `BargeInController` to the scheduler / engine. */
524
+ export type BargeInSignal =
525
+ | { type: "pause-tts"; timestampMs: number }
526
+ | { type: "resume-tts"; timestampMs: number }
527
+ | { type: "hard-stop"; timestampMs: number; token: BargeInCancelToken };
528
+
529
+ export type BargeInSignalListener = (signal: BargeInSignal) => void;
530
+
531
+ /**
532
+ * Contract the ASR layer (W2's `StreamingTranscriber`) calls into the
533
+ * barge-in controller with. When the transcriber has parsed at least one
534
+ * real word from the user's barge-in audio, it calls `onWordsDetected` with
535
+ * the running word count; the controller promotes a `pause-tts` into a
536
+ * `hard-stop`. This is the *authoritative* blip-vs-words gate — the energy-
537
+ * duration heuristic is only a fast provisional guess until ASR confirms.
538
+ */
539
+ export interface WordsDetectedSink {
540
+ onWordsDetected(args: {
541
+ /** Number of parsed words observed so far in this barge-in segment. */
542
+ wordCount: number;
543
+ /** Best partial transcript so far (may be empty). */
544
+ partialText: string;
545
+ timestampMs: number;
546
+ }): void;
547
+ }
548
+
549
+ export interface SchedulerConfig {
550
+ chunkerConfig: PhraseChunkerConfig;
551
+ preset: SpeakerPreset;
552
+ ringBufferCapacity: number;
553
+ sampleRate: number;
554
+ /**
555
+ * Max concurrent TTS dispatches. When this many phrases are in flight,
556
+ * `accept()` awaits the oldest before dispatching the next, propagating
557
+ * backpressure upstream to the verifier loop. Default 4 — small enough
558
+ * to bound memory under runaway producers without serialising the
559
+ * common case (text gen leads TTS by a phrase or two).
560
+ */
561
+ maxInFlightPhrases?: number;
562
+ /**
563
+ * Enable the streaming-TTS path (`synthesizeStream`) for phrase
564
+ * synthesis. When `true` (default), the scheduler uses the chunk-by-chunk
565
+ * streaming ABI when the backend supports it, delivering first audio
566
+ * before the full phrase finishes synthesizing and enabling per-chunk
567
+ * prefix-preserving barge-in rollback.
568
+ *
569
+ * Previously this was implicitly gated by `ttsStreamSupported()` from the
570
+ * native FFI layer. On macOS, a `ggml_conv_transpose_1d` stall in the
571
+ * DAC codec region caused the Metal path to hang — that stall is now
572
+ * fixed in the llama.cpp merge (native Metal kernels for
573
+ * `ggml_conv_transpose_1d`; the CPU fallback causing the hang is gone).
574
+ * The flag is therefore `true` by default. Set to `false` only when
575
+ * testing against a non-streaming build or reproducing the pre-fix
576
+ * behaviour.
577
+ */
578
+ streamingTtsActive?: boolean;
579
+ }
580
+
581
+ export interface VoiceSchedulerPhraseTelemetry {
582
+ id: number;
583
+ text: string;
584
+ fromIndex: number;
585
+ toIndex: number;
586
+ terminator: Phrase["terminator"];
587
+ tokenCount: number;
588
+ textBytes: number;
589
+ }
590
+
591
+ export type VoiceAudioSource = "cache" | "synthesis";
592
+
593
+ export type VoiceTtsCancelReason =
594
+ | "barge-in"
595
+ | "rollback"
596
+ | "pending-tts"
597
+ | "synthesis-cancelled";
598
+
599
+ export type VoiceSchedulerTelemetryEvent =
600
+ | {
601
+ type: "phrase-dispatch";
602
+ atMs: number;
603
+ phrase: VoiceSchedulerPhraseTelemetry;
604
+ inFlightPhrases: number;
605
+ }
606
+ | {
607
+ type: "phrase-cache-hit" | "phrase-cache-miss";
608
+ atMs: number;
609
+ phrase: VoiceSchedulerPhraseTelemetry;
610
+ }
611
+ | {
612
+ type: "tts-start";
613
+ atMs: number;
614
+ phrase: VoiceSchedulerPhraseTelemetry;
615
+ inFlightPhrases: number;
616
+ }
617
+ | {
618
+ type: "tts-first-audio";
619
+ atMs: number;
620
+ phrase: VoiceSchedulerPhraseTelemetry;
621
+ source: VoiceAudioSource;
622
+ samples: number;
623
+ sampleRate: number;
624
+ }
625
+ | {
626
+ type: "audio-committed";
627
+ atMs: number;
628
+ phrase: VoiceSchedulerPhraseTelemetry;
629
+ source: VoiceAudioSource;
630
+ samples: number;
631
+ sampleRate: number;
632
+ flushedSamples: number;
633
+ paused: boolean;
634
+ ringBufferSamples: number;
635
+ sinkBufferedSamples: number;
636
+ }
637
+ | {
638
+ type: "tts-cancel";
639
+ atMs: number;
640
+ phrase: VoiceSchedulerPhraseTelemetry;
641
+ reason: VoiceTtsCancelReason;
642
+ }
643
+ | {
644
+ type: "rollback";
645
+ atMs: number;
646
+ phraseId: number;
647
+ range: RejectedTokenRange;
648
+ reason: "rejected-tokens";
649
+ }
650
+ | {
651
+ type: "barge-in";
652
+ atMs: number;
653
+ ringBufferSamplesDrained: number;
654
+ sinkBufferedSamplesDrained: number;
655
+ inFlightPhrasesCancelled: number;
656
+ wasPaused: boolean;
657
+ }
658
+ | {
659
+ /**
660
+ * Fired when the prefix-preserving rollback queue partitions
661
+ * in-flight audio chunks on barge-in. `retainedChunks` are replayed
662
+ * into the sink; `droppedChunks` are discarded. Present only when
663
+ * `PrefixPreservingQueue` is active (at least one chunk was tagged).
664
+ */
665
+ type: "barge-in-prefix-rollback";
666
+ atMs: number;
667
+ divergencePoint: number;
668
+ retainedChunks: number;
669
+ droppedChunks: number;
670
+ straddledChunks: number;
671
+ retainedDurationMs: number;
672
+ droppedDurationMs: number;
673
+ };
674
+
675
+ export type VoiceSchedulerTelemetryListener = (
676
+ event: VoiceSchedulerTelemetryEvent,
677
+ ) => void;
678
+
679
+ // ---------------------------------------------------------------------------
680
+ // Shared interfaces extracted here to break circular dependencies between
681
+ // vad.ts and its consumers, and wake-word.ts ↔ wake-word-ggml.ts.
682
+ // ---------------------------------------------------------------------------
683
+
684
+ /** Minimal VAD model contract consumed by the fused `GgmlSileroVad` and the
685
+ * optional injected qwen-toolkit adapter. */
686
+ export interface VadLike {
687
+ readonly windowSamples: number;
688
+ readonly sampleRate: number;
689
+ process(window: Float32Array): Promise<number>;
690
+ reset(): void;
691
+ }
692
+
693
+ /** Minimal wake-word model contract consumed by OpenWakeWordGgmlModel. */
694
+ export interface WakeWordModel {
695
+ readonly frameSamples: number;
696
+ readonly sampleRate: number;
697
+ scoreFrame(frame: Float32Array): Promise<number>;
698
+ reset(): void;
699
+ }