@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (893) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +157 -0
  3. package/dist/actions/generate-media.d.ts +59 -0
  4. package/dist/actions/generate-media.d.ts.map +1 -0
  5. package/dist/actions/identify-speaker.d.ts +23 -0
  6. package/dist/actions/identify-speaker.d.ts.map +1 -0
  7. package/dist/actions/transcription-control.d.ts +29 -0
  8. package/dist/actions/transcription-control.d.ts.map +1 -0
  9. package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
  10. package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
  11. package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
  12. package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
  13. package/dist/adapters/capacitor-llama/index.d.ts +18 -0
  14. package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
  15. package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
  16. package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
  17. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
  18. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
  19. package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
  20. package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
  21. package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
  22. package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
  23. package/dist/adapters/capacitor-llama/types.d.ts +338 -0
  24. package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
  25. package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
  26. package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
  27. package/dist/backends/apple-foundation.d.ts +56 -0
  28. package/dist/backends/apple-foundation.d.ts.map +1 -0
  29. package/dist/index.d.ts +8 -37
  30. package/dist/index.d.ts.map +1 -0
  31. package/dist/index.js +38979 -430
  32. package/dist/index.js.map +217 -0
  33. package/dist/local-inference-routes.d.ts +47 -0
  34. package/dist/local-inference-routes.d.ts.map +1 -0
  35. package/dist/provider.d.ts +21 -0
  36. package/dist/provider.d.ts.map +1 -0
  37. package/dist/routes/compat-helpers.d.ts +18 -0
  38. package/dist/routes/compat-helpers.d.ts.map +1 -0
  39. package/dist/routes/family-member-route.d.ts +62 -0
  40. package/dist/routes/family-member-route.d.ts.map +1 -0
  41. package/dist/routes/index.d.ts +20 -0
  42. package/dist/routes/index.d.ts.map +1 -0
  43. package/dist/routes/index.js +42040 -0
  44. package/dist/routes/index.js.map +236 -0
  45. package/dist/routes/live-diarization-route.d.ts +33 -0
  46. package/dist/routes/live-diarization-route.d.ts.map +1 -0
  47. package/dist/routes/local-inference-asr-route.d.ts +4 -0
  48. package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
  49. package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
  50. package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  51. package/dist/routes/local-inference-compat-routes.d.ts +16 -0
  52. package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
  53. package/dist/routes/local-inference-tts-route.d.ts +7 -0
  54. package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/dist/routes/native-pcm-turn-route.d.ts +3 -0
  56. package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
  57. package/dist/routes/transcript-audio-store.d.ts +15 -0
  58. package/dist/routes/transcript-audio-store.d.ts.map +1 -0
  59. package/dist/routes/transcripts-routes.d.ts +44 -0
  60. package/dist/routes/transcripts-routes.d.ts.map +1 -0
  61. package/dist/routes/voice-first-run-routes.d.ts +62 -0
  62. package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
  63. package/dist/routes/voice-models-routes.d.ts +62 -0
  64. package/dist/routes/voice-models-routes.d.ts.map +1 -0
  65. package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
  66. package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  67. package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
  68. package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
  69. package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/dist/runtime/embedding-manager-support.d.ts +77 -0
  72. package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
  73. package/dist/runtime/embedding-presets.d.ts +16 -0
  74. package/dist/runtime/embedding-presets.d.ts.map +1 -0
  75. package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
  76. package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
  77. package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
  78. package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  79. package/dist/runtime/index.d.ts +15 -0
  80. package/dist/runtime/index.d.ts.map +1 -0
  81. package/dist/runtime/index.js +38768 -0
  82. package/dist/runtime/index.js.map +217 -0
  83. package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
  84. package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  85. package/dist/runtime/voice-entity-binding.d.ts +113 -0
  86. package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
  87. package/dist/services/active-model.d.ts +310 -0
  88. package/dist/services/active-model.d.ts.map +1 -0
  89. package/dist/services/asr-provenance.d.ts +5 -0
  90. package/dist/services/asr-provenance.d.ts.map +1 -0
  91. package/dist/services/assignments.d.ts +84 -0
  92. package/dist/services/assignments.d.ts.map +1 -0
  93. package/dist/services/backend-selector.d.ts +55 -0
  94. package/dist/services/backend-selector.d.ts.map +1 -0
  95. package/dist/services/backend.d.ts +440 -0
  96. package/dist/services/backend.d.ts.map +1 -0
  97. package/dist/services/bionic-host-loader.d.ts +67 -0
  98. package/dist/services/bionic-host-loader.d.ts.map +1 -0
  99. package/dist/services/bundled-models.d.ts +34 -0
  100. package/dist/services/bundled-models.d.ts.map +1 -0
  101. package/dist/services/cache-bridge.d.ts +206 -0
  102. package/dist/services/cache-bridge.d.ts.map +1 -0
  103. package/dist/services/catalog.d.ts +10 -0
  104. package/dist/services/catalog.d.ts.map +1 -0
  105. package/dist/services/checkpoint-client.d.ts +109 -0
  106. package/dist/services/checkpoint-client.d.ts.map +1 -0
  107. package/dist/services/checkpoint-manager.d.ts +217 -0
  108. package/dist/services/checkpoint-manager.d.ts.map +1 -0
  109. package/dist/services/cloud-fallback.d.ts +102 -0
  110. package/dist/services/cloud-fallback.d.ts.map +1 -0
  111. package/dist/services/context-fit.d.ts +36 -0
  112. package/dist/services/context-fit.d.ts.map +1 -0
  113. package/dist/services/conversation-registry.d.ts +142 -0
  114. package/dist/services/conversation-registry.d.ts.map +1 -0
  115. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
  116. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  117. package/dist/services/device-bridge.d.ts +188 -0
  118. package/dist/services/device-bridge.d.ts.map +1 -0
  119. package/dist/services/device-resource-metrics.d.ts +149 -0
  120. package/dist/services/device-resource-metrics.d.ts.map +1 -0
  121. package/dist/services/device-tier.d.ts +133 -0
  122. package/dist/services/device-tier.d.ts.map +1 -0
  123. package/dist/services/downloader.d.ts +94 -0
  124. package/dist/services/downloader.d.ts.map +1 -0
  125. package/dist/services/engine.d.ts +579 -0
  126. package/dist/services/engine.d.ts.map +1 -0
  127. package/dist/services/ensure-local-artifacts.d.ts +82 -0
  128. package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
  129. package/dist/services/external-scanner.d.ts +17 -0
  130. package/dist/services/external-scanner.d.ts.map +1 -0
  131. package/dist/services/ffi-llm-mock.d.ts +90 -0
  132. package/dist/services/ffi-llm-mock.d.ts.map +1 -0
  133. package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
  134. package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
  135. package/dist/services/ffi-streaming-backend.d.ts +201 -0
  136. package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
  137. package/dist/services/ffi-streaming-runner.d.ts +146 -0
  138. package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
  139. package/dist/services/gpu-autotune.d.ts +150 -0
  140. package/dist/services/gpu-autotune.d.ts.map +1 -0
  141. package/dist/services/gpu-detect.d.ts +56 -0
  142. package/dist/services/gpu-detect.d.ts.map +1 -0
  143. package/dist/services/handler-registry.d.ts +72 -0
  144. package/dist/services/handler-registry.d.ts.map +1 -0
  145. package/dist/services/hardware.d.ts +63 -0
  146. package/dist/services/hardware.d.ts.map +1 -0
  147. package/dist/services/image-description-runtime.d.ts +14 -0
  148. package/dist/services/image-description-runtime.d.ts.map +1 -0
  149. package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
  150. package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  151. package/dist/services/imagegen/backend-selector.d.ts +118 -0
  152. package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
  153. package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
  154. package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  155. package/dist/services/imagegen/errors.d.ts +16 -0
  156. package/dist/services/imagegen/errors.d.ts.map +1 -0
  157. package/dist/services/imagegen/index.d.ts +58 -0
  158. package/dist/services/imagegen/index.d.ts.map +1 -0
  159. package/dist/services/imagegen/mflux.d.ts +74 -0
  160. package/dist/services/imagegen/mflux.d.ts.map +1 -0
  161. package/dist/services/imagegen/sd-cpp.d.ts +181 -0
  162. package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
  163. package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  164. package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  165. package/dist/services/imagegen/types.d.ts +181 -0
  166. package/dist/services/imagegen/types.d.ts.map +1 -0
  167. package/dist/services/index.d.ts +31 -0
  168. package/dist/services/index.d.ts.map +1 -0
  169. package/dist/services/index.js +39453 -0
  170. package/dist/services/index.js.map +227 -0
  171. package/dist/services/inference-capabilities.d.ts +132 -0
  172. package/dist/services/inference-capabilities.d.ts.map +1 -0
  173. package/dist/services/inference-telemetry.d.ts +59 -0
  174. package/dist/services/inference-telemetry.d.ts.map +1 -0
  175. package/dist/services/ios-llama-streaming.d.ts +119 -0
  176. package/dist/services/ios-llama-streaming.d.ts.map +1 -0
  177. package/dist/services/kv-spill.d.ts +189 -0
  178. package/dist/services/kv-spill.d.ts.map +1 -0
  179. package/dist/services/latency-trace.d.ts +346 -0
  180. package/dist/services/latency-trace.d.ts.map +1 -0
  181. package/dist/services/lib-target.d.ts +55 -0
  182. package/dist/services/lib-target.d.ts.map +1 -0
  183. package/dist/services/live-signals.d.ts +86 -0
  184. package/dist/services/live-signals.d.ts.map +1 -0
  185. package/dist/services/llama-server-metrics.d.ts +114 -0
  186. package/dist/services/llama-server-metrics.d.ts.map +1 -0
  187. package/dist/services/llm-streaming-binding.d.ts +96 -0
  188. package/dist/services/llm-streaming-binding.d.ts.map +1 -0
  189. package/dist/services/load-args.d.ts +82 -0
  190. package/dist/services/load-args.d.ts.map +1 -0
  191. package/dist/services/manifest/index.d.ts +4 -0
  192. package/dist/services/manifest/index.d.ts.map +1 -0
  193. package/dist/services/manifest/schema.d.ts +903 -0
  194. package/dist/services/manifest/schema.d.ts.map +1 -0
  195. package/dist/services/manifest/types.d.ts +32 -0
  196. package/dist/services/manifest/types.d.ts.map +1 -0
  197. package/dist/services/manifest/validator.d.ts +66 -0
  198. package/dist/services/manifest/validator.d.ts.map +1 -0
  199. package/dist/services/memory-arbiter.d.ts +348 -0
  200. package/dist/services/memory-arbiter.d.ts.map +1 -0
  201. package/dist/services/memory-benchmark.d.ts +76 -0
  202. package/dist/services/memory-benchmark.d.ts.map +1 -0
  203. package/dist/services/memory-monitor.d.ts +128 -0
  204. package/dist/services/memory-monitor.d.ts.map +1 -0
  205. package/dist/services/memory-pressure.d.ts +130 -0
  206. package/dist/services/memory-pressure.d.ts.map +1 -0
  207. package/dist/services/mtp-doctor.d.ts +13 -0
  208. package/dist/services/mtp-doctor.d.ts.map +1 -0
  209. package/dist/services/network-policy.d.ts +127 -0
  210. package/dist/services/network-policy.d.ts.map +1 -0
  211. package/dist/services/paths.d.ts +6 -0
  212. package/dist/services/paths.d.ts.map +1 -0
  213. package/dist/services/planner-skeleton.d.ts +124 -0
  214. package/dist/services/planner-skeleton.d.ts.map +1 -0
  215. package/dist/services/providers.d.ts +38 -0
  216. package/dist/services/providers.d.ts.map +1 -0
  217. package/dist/services/ram-budget.d.ts +110 -0
  218. package/dist/services/ram-budget.d.ts.map +1 -0
  219. package/dist/services/readiness.d.ts +9 -0
  220. package/dist/services/readiness.d.ts.map +1 -0
  221. package/dist/services/recommendation.d.ts +111 -0
  222. package/dist/services/recommendation.d.ts.map +1 -0
  223. package/dist/services/registry.d.ts +33 -0
  224. package/dist/services/registry.d.ts.map +1 -0
  225. package/dist/services/router-handler.d.ts +92 -0
  226. package/dist/services/router-handler.d.ts.map +1 -0
  227. package/dist/services/routing-policy.d.ts +92 -0
  228. package/dist/services/routing-policy.d.ts.map +1 -0
  229. package/dist/services/routing-preferences.d.ts +8 -0
  230. package/dist/services/routing-preferences.d.ts.map +1 -0
  231. package/dist/services/runtime-target.d.ts +98 -0
  232. package/dist/services/runtime-target.d.ts.map +1 -0
  233. package/dist/services/service.d.ts +128 -0
  234. package/dist/services/service.d.ts.map +1 -0
  235. package/dist/services/session-pool.d.ts +72 -0
  236. package/dist/services/session-pool.d.ts.map +1 -0
  237. package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
  238. package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
  239. package/dist/services/structured-output/index.d.ts +2 -0
  240. package/dist/services/structured-output/index.d.ts.map +1 -0
  241. package/dist/services/structured-output.d.ts +311 -0
  242. package/dist/services/structured-output.d.ts.map +1 -0
  243. package/dist/services/system-memory.d.ts +33 -0
  244. package/dist/services/system-memory.d.ts.map +1 -0
  245. package/dist/services/types.d.ts +19 -0
  246. package/dist/services/types.d.ts.map +1 -0
  247. package/dist/services/verify-on-device.d.ts +34 -0
  248. package/dist/services/verify-on-device.d.ts.map +1 -0
  249. package/dist/services/verify.d.ts +8 -0
  250. package/dist/services/verify.d.ts.map +1 -0
  251. package/dist/services/vision/aosp-unavailable.d.ts +115 -0
  252. package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
  253. package/dist/services/vision/capacitor-llama.d.ts +99 -0
  254. package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
  255. package/dist/services/vision/cloud-fallback.d.ts +47 -0
  256. package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
  257. package/dist/services/vision/hash.d.ts +71 -0
  258. package/dist/services/vision/hash.d.ts.map +1 -0
  259. package/dist/services/vision/index.d.ts +95 -0
  260. package/dist/services/vision/index.d.ts.map +1 -0
  261. package/dist/services/vision/llama-server.d.ts +73 -0
  262. package/dist/services/vision/llama-server.d.ts.map +1 -0
  263. package/dist/services/vision/types.d.ts +162 -0
  264. package/dist/services/vision/types.d.ts.map +1 -0
  265. package/dist/services/vision/vast-fallback.d.ts +18 -0
  266. package/dist/services/vision/vast-fallback.d.ts.map +1 -0
  267. package/dist/services/vision-embedding-cache.d.ts +98 -0
  268. package/dist/services/vision-embedding-cache.d.ts.map +1 -0
  269. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
  270. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
  271. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
  272. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
  273. package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
  274. package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
  275. package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
  276. package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
  277. package/dist/services/voice/barge-in.d.ts +112 -0
  278. package/dist/services/voice/barge-in.d.ts.map +1 -0
  279. package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
  280. package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
  281. package/dist/services/voice/checkpoint-manager.d.ts +199 -0
  282. package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
  283. package/dist/services/voice/checkpoint-policy.d.ts +178 -0
  284. package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
  285. package/dist/services/voice/corpus-augment.d.ts +111 -0
  286. package/dist/services/voice/corpus-augment.d.ts.map +1 -0
  287. package/dist/services/voice/corpus-generator.d.ts +134 -0
  288. package/dist/services/voice/corpus-generator.d.ts.map +1 -0
  289. package/dist/services/voice/diarization-error-rate.d.ts +40 -0
  290. package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
  291. package/dist/services/voice/e2e-harness.d.ts +297 -0
  292. package/dist/services/voice/e2e-harness.d.ts.map +1 -0
  293. package/dist/services/voice/eager-context-builder.d.ts +170 -0
  294. package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
  295. package/dist/services/voice/echo-delay.d.ts +67 -0
  296. package/dist/services/voice/echo-delay.d.ts.map +1 -0
  297. package/dist/services/voice/echo-metrics.d.ts +7 -0
  298. package/dist/services/voice/echo-metrics.d.ts.map +1 -0
  299. package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
  300. package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
  301. package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
  302. package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  303. package/dist/services/voice/embedding-server.d.ts +37 -0
  304. package/dist/services/voice/embedding-server.d.ts.map +1 -0
  305. package/dist/services/voice/embedding.d.ts +132 -0
  306. package/dist/services/voice/embedding.d.ts.map +1 -0
  307. package/dist/services/voice/emotion-attribution.d.ts +68 -0
  308. package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
  309. package/dist/services/voice/engine-bridge.d.ts +762 -0
  310. package/dist/services/voice/engine-bridge.d.ts.map +1 -0
  311. package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
  312. package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  313. package/dist/services/voice/eot-classifier.d.ts +211 -0
  314. package/dist/services/voice/eot-classifier.d.ts.map +1 -0
  315. package/dist/services/voice/errors.d.ts +20 -0
  316. package/dist/services/voice/errors.d.ts.map +1 -0
  317. package/dist/services/voice/expressive-tags.d.ts +158 -0
  318. package/dist/services/voice/expressive-tags.d.ts.map +1 -0
  319. package/dist/services/voice/ffi-bindings.d.ts +696 -0
  320. package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
  321. package/dist/services/voice/first-line-cache.d.ts +181 -0
  322. package/dist/services/voice/first-line-cache.d.ts.map +1 -0
  323. package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
  324. package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
  325. package/dist/services/voice/index.d.ts +96 -0
  326. package/dist/services/voice/index.d.ts.map +1 -0
  327. package/dist/services/voice/kokoro/index.d.ts +24 -0
  328. package/dist/services/voice/kokoro/index.d.ts.map +1 -0
  329. package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
  330. package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  331. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  332. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  333. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  334. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  335. package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  336. package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  337. package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
  338. package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
  339. package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
  340. package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  341. package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
  342. package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  343. package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
  344. package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  345. package/dist/services/voice/kokoro/types.d.ts +82 -0
  346. package/dist/services/voice/kokoro/types.d.ts.map +1 -0
  347. package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
  348. package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  349. package/dist/services/voice/kokoro/voices.d.ts +30 -0
  350. package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
  351. package/dist/services/voice/lifecycle.d.ts +135 -0
  352. package/dist/services/voice/lifecycle.d.ts.map +1 -0
  353. package/dist/services/voice/live-diarization-session.d.ts +196 -0
  354. package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
  355. package/dist/services/voice/metric-math.d.ts +10 -0
  356. package/dist/services/voice/metric-math.d.ts.map +1 -0
  357. package/dist/services/voice/mic-source.d.ts +136 -0
  358. package/dist/services/voice/mic-source.d.ts.map +1 -0
  359. package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
  360. package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
  361. package/dist/services/voice/optimistic-policy.d.ts +109 -0
  362. package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
  363. package/dist/services/voice/optimistic-rollback.d.ts +151 -0
  364. package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
  365. package/dist/services/voice/partial-stabilizer.d.ts +73 -0
  366. package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
  367. package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
  368. package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  369. package/dist/services/voice/phrase-cache.d.ts +76 -0
  370. package/dist/services/voice/phrase-cache.d.ts.map +1 -0
  371. package/dist/services/voice/phrase-chunker.d.ts +62 -0
  372. package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
  373. package/dist/services/voice/pipeline-impls.d.ts +151 -0
  374. package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
  375. package/dist/services/voice/pipeline.d.ts +216 -0
  376. package/dist/services/voice/pipeline.d.ts.map +1 -0
  377. package/dist/services/voice/prefill-client.d.ts +123 -0
  378. package/dist/services/voice/prefill-client.d.ts.map +1 -0
  379. package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
  380. package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  381. package/dist/services/voice/profile-store.d.ts +248 -0
  382. package/dist/services/voice/profile-store.d.ts.map +1 -0
  383. package/dist/services/voice/ring-buffer.d.ts +40 -0
  384. package/dist/services/voice/ring-buffer.d.ts.map +1 -0
  385. package/dist/services/voice/rollback-queue.d.ts +24 -0
  386. package/dist/services/voice/rollback-queue.d.ts.map +1 -0
  387. package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
  388. package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  389. package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
  390. package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  391. package/dist/services/voice/scheduler.d.ts +146 -0
  392. package/dist/services/voice/scheduler.d.ts.map +1 -0
  393. package/dist/services/voice/self-voice-imprint.d.ts +33 -0
  394. package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
  395. package/dist/services/voice/shared-resources.d.ts +204 -0
  396. package/dist/services/voice/shared-resources.d.ts.map +1 -0
  397. package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  398. package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  399. package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
  400. package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  401. package/dist/services/voice/speaker/diarizer.d.ts +75 -0
  402. package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
  403. package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
  404. package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  405. package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
  406. package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  407. package/dist/services/voice/speaker/encoder.d.ts +37 -0
  408. package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
  409. package/dist/services/voice/speaker-imprint.d.ts +83 -0
  410. package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
  411. package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
  412. package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
  413. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
  414. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
  415. package/dist/services/voice/system-audio-sink.d.ts +73 -0
  416. package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
  417. package/dist/services/voice/transcriber.d.ts +244 -0
  418. package/dist/services/voice/transcriber.d.ts.map +1 -0
  419. package/dist/services/voice/transcript-knowledge.d.ts +37 -0
  420. package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
  421. package/dist/services/voice/transcript-service.d.ts +60 -0
  422. package/dist/services/voice/transcript-service.d.ts.map +1 -0
  423. package/dist/services/voice/transcript-store.d.ts +64 -0
  424. package/dist/services/voice/transcript-store.d.ts.map +1 -0
  425. package/dist/services/voice/turn-controller.d.ts +183 -0
  426. package/dist/services/voice/turn-controller.d.ts.map +1 -0
  427. package/dist/services/voice/types.d.ts +643 -0
  428. package/dist/services/voice/types.d.ts.map +1 -0
  429. package/dist/services/voice/vad.d.ts +283 -0
  430. package/dist/services/voice/vad.d.ts.map +1 -0
  431. package/dist/services/voice/voice-budget.d.ts +241 -0
  432. package/dist/services/voice/voice-budget.d.ts.map +1 -0
  433. package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
  434. package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  435. package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
  436. package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
  437. package/dist/services/voice/voice-preset-format.d.ts +158 -0
  438. package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
  439. package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
  440. package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
  441. package/dist/services/voice/voice-profile-routes.d.ts +83 -0
  442. package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
  443. package/dist/services/voice/voice-scenario.d.ts +131 -0
  444. package/dist/services/voice/voice-scenario.d.ts.map +1 -0
  445. package/dist/services/voice/voice-state-machine.d.ts +364 -0
  446. package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
  447. package/dist/services/voice/voice-workbench-report.d.ts +117 -0
  448. package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
  449. package/dist/services/voice/wake-word-ggml.d.ts +100 -0
  450. package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
  451. package/dist/services/voice/wake-word.d.ts +255 -0
  452. package/dist/services/voice/wake-word.d.ts.map +1 -0
  453. package/dist/services/voice/wav-codec.d.ts +11 -0
  454. package/dist/services/voice/wav-codec.d.ts.map +1 -0
  455. package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
  456. package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
  457. package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
  458. package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
  459. package/dist/services/voice/workbench-logic-services.d.ts +36 -0
  460. package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
  461. package/dist/services/voice/workbench-real-services.d.ts +17 -0
  462. package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
  463. package/dist/services/voice/workbench-scenarios.d.ts +24 -0
  464. package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
  465. package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  466. package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  467. package/dist/services/voice-model-updater.d.ts +240 -0
  468. package/dist/services/voice-model-updater.d.ts.map +1 -0
  469. package/dist/services/voice-prewarm.d.ts +3 -0
  470. package/dist/services/voice-prewarm.d.ts.map +1 -0
  471. package/dist/voice-workbench.d.ts +18 -0
  472. package/dist/voice-workbench.d.ts.map +1 -0
  473. package/dist/voice-workbench.js +5259 -0
  474. package/dist/voice-workbench.js.map +34 -0
  475. package/package.json +101 -15
  476. package/registry-entry.json +137 -0
  477. package/src/actions/generate-media.ts +647 -0
  478. package/src/actions/identify-speaker.ts +171 -0
  479. package/src/actions/transcription-control.test.ts +100 -0
  480. package/src/actions/transcription-control.ts +127 -0
  481. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  482. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  483. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  484. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  485. package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
  486. package/src/adapters/capacitor-llama/environment.ts +71 -0
  487. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  488. package/src/adapters/capacitor-llama/index.ts +831 -0
  489. package/src/adapters/capacitor-llama/loader.ts +109 -0
  490. package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
  491. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  492. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  493. package/src/adapters/capacitor-llama/types.ts +374 -0
  494. package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
  495. package/src/backends/apple-foundation.ts +127 -0
  496. package/src/index.ts +62 -0
  497. package/src/local-inference-routes.test.ts +390 -0
  498. package/src/local-inference-routes.ts +1625 -0
  499. package/src/provider.ts +1111 -0
  500. package/src/routes/compat-helpers.ts +275 -0
  501. package/src/routes/family-member-route.ts +353 -0
  502. package/src/routes/index.ts +61 -0
  503. package/src/routes/live-diarization-route.test.ts +347 -0
  504. package/src/routes/live-diarization-route.ts +198 -0
  505. package/src/routes/local-inference-asr-route.test.ts +246 -0
  506. package/src/routes/local-inference-asr-route.ts +166 -0
  507. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  508. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  509. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  510. package/src/routes/local-inference-compat-routes.ts +775 -0
  511. package/src/routes/local-inference-tts-route.test.ts +179 -0
  512. package/src/routes/local-inference-tts-route.ts +230 -0
  513. package/src/routes/native-pcm-turn-route.test.ts +136 -0
  514. package/src/routes/native-pcm-turn-route.ts +121 -0
  515. package/src/routes/transcript-audio-store.ts +27 -0
  516. package/src/routes/transcripts-routes.test.ts +195 -0
  517. package/src/routes/transcripts-routes.ts +191 -0
  518. package/src/routes/voice-first-run-routes.ts +524 -0
  519. package/src/routes/voice-models-routes.ts +554 -0
  520. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  521. package/src/routes/voice-profiles-management-routes.ts +476 -0
  522. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  523. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  524. package/src/runtime/bionic-wire-encoding.test.ts +147 -0
  525. package/src/runtime/capacitor-llama.d.ts +25 -0
  526. package/src/runtime/embedding-manager-support.ts +497 -0
  527. package/src/runtime/embedding-presets.ts +81 -0
  528. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  529. package/src/runtime/embedding-warmup-policy.ts +48 -0
  530. package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
  531. package/src/runtime/ensure-local-inference-handler.ts +1640 -0
  532. package/src/runtime/index.ts +36 -0
  533. package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
  534. package/src/runtime/mobile-local-inference-gate.ts +99 -0
  535. package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
  536. package/src/runtime/voice-entity-binding.ts +368 -0
  537. package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
  538. package/src/services/README.md +71 -0
  539. package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
  540. package/src/services/__tests__/backend-selector.test.ts +101 -0
  541. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  542. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  543. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  544. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  545. package/src/services/__tests__/runtime-target.test.ts +176 -0
  546. package/src/services/active-model-context-fit.test.ts +125 -0
  547. package/src/services/active-model-switch-rollback.test.ts +183 -0
  548. package/src/services/active-model.ts +1416 -0
  549. package/src/services/asr-provenance.ts +68 -0
  550. package/src/services/assignment-validation.test.ts +118 -0
  551. package/src/services/assignments.test.ts +106 -0
  552. package/src/services/assignments.ts +278 -0
  553. package/src/services/backend-selector.ts +95 -0
  554. package/src/services/backend.test.ts +84 -0
  555. package/src/services/backend.ts +791 -0
  556. package/src/services/bionic-host-loader.test.ts +226 -0
  557. package/src/services/bionic-host-loader.ts +252 -0
  558. package/src/services/bundled-models.ts +129 -0
  559. package/src/services/cache-bridge.test.ts +516 -0
  560. package/src/services/cache-bridge.ts +423 -0
  561. package/src/services/catalog.test.ts +259 -0
  562. package/src/services/catalog.ts +33 -0
  563. package/src/services/checkpoint-client.ts +258 -0
  564. package/src/services/checkpoint-manager.ts +474 -0
  565. package/src/services/cloud-fallback.ts +230 -0
  566. package/src/services/context-fit.test.ts +121 -0
  567. package/src/services/context-fit.ts +113 -0
  568. package/src/services/conversation-registry.test.ts +235 -0
  569. package/src/services/conversation-registry.ts +264 -0
  570. package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
  571. package/src/services/device-bridge.ts +1237 -0
  572. package/src/services/device-resource-metrics.test.ts +98 -0
  573. package/src/services/device-resource-metrics.ts +346 -0
  574. package/src/services/device-tier.test.ts +458 -0
  575. package/src/services/device-tier.ts +502 -0
  576. package/src/services/downloader.test.ts +888 -0
  577. package/src/services/downloader.ts +1039 -0
  578. package/src/services/engine-direct-bundle.test.ts +90 -0
  579. package/src/services/engine-streaming.test.ts +80 -0
  580. package/src/services/engine.ts +2096 -0
  581. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  582. package/src/services/ensure-local-artifacts.test.ts +368 -0
  583. package/src/services/ensure-local-artifacts.ts +351 -0
  584. package/src/services/external-scanner.ts +312 -0
  585. package/src/services/ffi-llm-mock.ts +354 -0
  586. package/src/services/ffi-llm-streaming-abi.ts +445 -0
  587. package/src/services/ffi-streaming-backend.ts +418 -0
  588. package/src/services/ffi-streaming-runner.test.ts +220 -0
  589. package/src/services/ffi-streaming-runner.ts +407 -0
  590. package/src/services/ffi-unload-ordering.test.ts +166 -0
  591. package/src/services/fused-eliza1-no-regression.test.ts +144 -0
  592. package/src/services/gpu-autotune.ts +534 -0
  593. package/src/services/gpu-detect.ts +139 -0
  594. package/src/services/handler-registry.ts +240 -0
  595. package/src/services/hardware.test.ts +236 -0
  596. package/src/services/hardware.ts +438 -0
  597. package/src/services/image-description-runtime.test.ts +61 -0
  598. package/src/services/image-description-runtime.ts +118 -0
  599. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  600. package/src/services/imagegen/backend-selector.test.ts +190 -0
  601. package/src/services/imagegen/backend-selector.ts +277 -0
  602. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  603. package/src/services/imagegen/errors.ts +40 -0
  604. package/src/services/imagegen/index.ts +144 -0
  605. package/src/services/imagegen/mflux.ts +313 -0
  606. package/src/services/imagegen/sd-cpp.ts +715 -0
  607. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  608. package/src/services/imagegen/types.ts +193 -0
  609. package/src/services/index.ts +229 -0
  610. package/src/services/inference-capabilities.test.ts +75 -0
  611. package/src/services/inference-capabilities.ts +204 -0
  612. package/src/services/inference-telemetry.ts +143 -0
  613. package/src/services/ios-llama-streaming.ts +248 -0
  614. package/src/services/kv-spill.test.ts +222 -0
  615. package/src/services/kv-spill.ts +357 -0
  616. package/src/services/latency-trace.test.ts +266 -0
  617. package/src/services/latency-trace.ts +844 -0
  618. package/src/services/lib-target.test.ts +145 -0
  619. package/src/services/lib-target.ts +102 -0
  620. package/src/services/live-signals.test.ts +132 -0
  621. package/src/services/live-signals.ts +177 -0
  622. package/src/services/llama-server-metrics.test.ts +168 -0
  623. package/src/services/llama-server-metrics.ts +304 -0
  624. package/src/services/llm-streaming-binding.ts +136 -0
  625. package/src/services/load-args.ts +81 -0
  626. package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
  627. package/src/services/manifest/index.ts +72 -0
  628. package/src/services/manifest/manifest.test.ts +791 -0
  629. package/src/services/manifest/schema.ts +761 -0
  630. package/src/services/manifest/types.ts +61 -0
  631. package/src/services/manifest/validator.ts +633 -0
  632. package/src/services/memory-arbiter.test.ts +558 -0
  633. package/src/services/memory-arbiter.ts +991 -0
  634. package/src/services/memory-benchmark.test.ts +91 -0
  635. package/src/services/memory-benchmark.ts +354 -0
  636. package/src/services/memory-monitor.test.ts +232 -0
  637. package/src/services/memory-monitor.ts +309 -0
  638. package/src/services/memory-pressure.ts +414 -0
  639. package/src/services/mtp-doctor.ts +86 -0
  640. package/src/services/network-policy.ts +346 -0
  641. package/src/services/paths.ts +25 -0
  642. package/src/services/planner-skeleton.ts +175 -0
  643. package/src/services/providers.ts +507 -0
  644. package/src/services/ram-budget-cache.test.ts +164 -0
  645. package/src/services/ram-budget.ts +309 -0
  646. package/src/services/readiness.test.ts +87 -0
  647. package/src/services/readiness.ts +238 -0
  648. package/src/services/recommendation.test.ts +216 -0
  649. package/src/services/recommendation.ts +671 -0
  650. package/src/services/registry.ts +157 -0
  651. package/src/services/required-kernels-gate.test.ts +64 -0
  652. package/src/services/router-handler.test.ts +45 -0
  653. package/src/services/router-handler.ts +426 -0
  654. package/src/services/routing-policy.test.ts +352 -0
  655. package/src/services/routing-policy.ts +367 -0
  656. package/src/services/routing-preferences.ts +17 -0
  657. package/src/services/runtime-target.ts +154 -0
  658. package/src/services/service.test.ts +223 -0
  659. package/src/services/service.ts +750 -0
  660. package/src/services/session-pool.ts +153 -0
  661. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  662. package/src/services/structured-output/deterministic-repair.ts +443 -0
  663. package/src/services/structured-output/index.ts +4 -0
  664. package/src/services/structured-output.test.ts +483 -0
  665. package/src/services/structured-output.ts +712 -0
  666. package/src/services/system-memory.test.ts +47 -0
  667. package/src/services/system-memory.ts +67 -0
  668. package/src/services/transcription-priority.test.ts +211 -0
  669. package/src/services/types.ts +59 -0
  670. package/src/services/verify-on-device.test.ts +87 -0
  671. package/src/services/verify-on-device.ts +127 -0
  672. package/src/services/verify.ts +13 -0
  673. package/src/services/vision/aosp-unavailable.ts +163 -0
  674. package/src/services/vision/capacitor-llama.ts +255 -0
  675. package/src/services/vision/cloud-fallback.test.ts +243 -0
  676. package/src/services/vision/cloud-fallback.ts +268 -0
  677. package/src/services/vision/fallback-chain.test.ts +86 -0
  678. package/src/services/vision/hash.ts +157 -0
  679. package/src/services/vision/index.ts +251 -0
  680. package/src/services/vision/llama-server.ts +177 -0
  681. package/src/services/vision/types.ts +163 -0
  682. package/src/services/vision/vast-fallback.ts +127 -0
  683. package/src/services/vision-embedding-cache.ts +189 -0
  684. package/src/services/voice/VOICE_WORKBENCH.md +133 -0
  685. package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
  686. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  687. package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
  688. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  689. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  690. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  691. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  692. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  693. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  694. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  695. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  696. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  697. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  698. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  699. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  700. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  701. package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
  702. package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
  703. package/src/services/voice/asr-timed.real.test.ts +139 -0
  704. package/src/services/voice/audio-frame-consumer.test.ts +669 -0
  705. package/src/services/voice/audio-frame-consumer.ts +651 -0
  706. package/src/services/voice/barge-in.test.ts +244 -0
  707. package/src/services/voice/barge-in.ts +335 -0
  708. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  709. package/src/services/voice/cancellation-coordinator.ts +269 -0
  710. package/src/services/voice/checkpoint-manager.ts +401 -0
  711. package/src/services/voice/checkpoint-policy.ts +336 -0
  712. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  713. package/src/services/voice/corpus-augment.test.ts +276 -0
  714. package/src/services/voice/corpus-augment.ts +451 -0
  715. package/src/services/voice/corpus-generator.test.ts +201 -0
  716. package/src/services/voice/corpus-generator.ts +413 -0
  717. package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
  718. package/src/services/voice/diarization-error-rate.test.ts +100 -0
  719. package/src/services/voice/diarization-error-rate.ts +249 -0
  720. package/src/services/voice/e2e-harness.der.test.ts +94 -0
  721. package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
  722. package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
  723. package/src/services/voice/e2e-harness.test.ts +182 -0
  724. package/src/services/voice/e2e-harness.ts +902 -0
  725. package/src/services/voice/eager-context-builder.ts +262 -0
  726. package/src/services/voice/echo-delay.test.ts +118 -0
  727. package/src/services/voice/echo-delay.ts +135 -0
  728. package/src/services/voice/echo-metrics.test.ts +17 -0
  729. package/src/services/voice/echo-metrics.ts +20 -0
  730. package/src/services/voice/echo-reference-buffer.test.ts +86 -0
  731. package/src/services/voice/echo-reference-buffer.ts +165 -0
  732. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  733. package/src/services/voice/embedding-server.ts +200 -0
  734. package/src/services/voice/embedding.test.ts +131 -0
  735. package/src/services/voice/embedding.ts +242 -0
  736. package/src/services/voice/emotion-attribution.test.ts +129 -0
  737. package/src/services/voice/emotion-attribution.ts +361 -0
  738. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  739. package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
  740. package/src/services/voice/engine-bridge.test.ts +384 -0
  741. package/src/services/voice/engine-bridge.ts +2343 -0
  742. package/src/services/voice/eot-classifier-ggml.ts +569 -0
  743. package/src/services/voice/eot-classifier.test.ts +98 -0
  744. package/src/services/voice/eot-classifier.ts +422 -0
  745. package/src/services/voice/errors.ts +34 -0
  746. package/src/services/voice/expressive-tags.asr.test.ts +77 -0
  747. package/src/services/voice/expressive-tags.test.ts +102 -0
  748. package/src/services/voice/expressive-tags.ts +405 -0
  749. package/src/services/voice/ffi-bindings.test.ts +735 -0
  750. package/src/services/voice/ffi-bindings.ts +3387 -0
  751. package/src/services/voice/first-line-cache.ts +725 -0
  752. package/src/services/voice/fused-eot-scorer.ts +139 -0
  753. package/src/services/voice/index.ts +502 -0
  754. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
  755. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
  756. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  757. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  758. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  759. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  760. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  761. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
  762. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  763. package/src/services/voice/kokoro/index.ts +79 -0
  764. package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
  765. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  766. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  767. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  768. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  769. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  770. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  771. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  772. package/src/services/voice/kokoro/runtime-selection.ts +64 -0
  773. package/src/services/voice/kokoro/types.ts +95 -0
  774. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  775. package/src/services/voice/kokoro/voices.ts +64 -0
  776. package/src/services/voice/lifecycle.test.ts +315 -0
  777. package/src/services/voice/lifecycle.ts +301 -0
  778. package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
  779. package/src/services/voice/live-diarization-session.ts +622 -0
  780. package/src/services/voice/metric-math.test.ts +61 -0
  781. package/src/services/voice/metric-math.ts +25 -0
  782. package/src/services/voice/mic-source.test.ts +210 -0
  783. package/src/services/voice/mic-source.ts +503 -0
  784. package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
  785. package/src/services/voice/nlms-echo-canceller.ts +317 -0
  786. package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
  787. package/src/services/voice/optimistic-policy.test.ts +101 -0
  788. package/src/services/voice/optimistic-policy.ts +192 -0
  789. package/src/services/voice/optimistic-rollback.ts +343 -0
  790. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  791. package/src/services/voice/partial-stabilizer.ts +140 -0
  792. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  793. package/src/services/voice/phrase-cache.test.ts +242 -0
  794. package/src/services/voice/phrase-cache.ts +186 -0
  795. package/src/services/voice/phrase-chunker.test.ts +239 -0
  796. package/src/services/voice/phrase-chunker.ts +281 -0
  797. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  798. package/src/services/voice/pipeline-impls.test.ts +292 -0
  799. package/src/services/voice/pipeline-impls.ts +315 -0
  800. package/src/services/voice/pipeline.ts +504 -0
  801. package/src/services/voice/prefill-client.ts +316 -0
  802. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  803. package/src/services/voice/profile-store.ts +887 -0
  804. package/src/services/voice/real-audio-decode.test.ts +148 -0
  805. package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
  806. package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
  807. package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
  808. package/src/services/voice/ring-buffer.test.ts +129 -0
  809. package/src/services/voice/ring-buffer.ts +123 -0
  810. package/src/services/voice/rollback-queue.ts +74 -0
  811. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  812. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  813. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  814. package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
  815. package/src/services/voice/scheduler.t2.test.ts +141 -0
  816. package/src/services/voice/scheduler.ts +927 -0
  817. package/src/services/voice/self-voice-imprint.test.ts +59 -0
  818. package/src/services/voice/self-voice-imprint.ts +102 -0
  819. package/src/services/voice/shared-resources.ts +343 -0
  820. package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
  821. package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
  822. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  823. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  824. package/src/services/voice/speaker/diarizer.ts +218 -0
  825. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  826. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  827. package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
  828. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  829. package/src/services/voice/speaker/encoder.ts +105 -0
  830. package/src/services/voice/speaker-imprint.test.ts +185 -0
  831. package/src/services/voice/speaker-imprint.ts +312 -0
  832. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  833. package/src/services/voice/speaker-preset-cache.ts +195 -0
  834. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  835. package/src/services/voice/system-audio-sink.test.ts +29 -0
  836. package/src/services/voice/system-audio-sink.ts +366 -0
  837. package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
  838. package/src/services/voice/transcriber.test.ts +392 -0
  839. package/src/services/voice/transcriber.ts +704 -0
  840. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  841. package/src/services/voice/transcript-knowledge.ts +75 -0
  842. package/src/services/voice/transcript-service.test.ts +195 -0
  843. package/src/services/voice/transcript-service.ts +205 -0
  844. package/src/services/voice/transcript-store.test.ts +189 -0
  845. package/src/services/voice/transcript-store.ts +164 -0
  846. package/src/services/voice/turn-controller.test.ts +575 -0
  847. package/src/services/voice/turn-controller.ts +596 -0
  848. package/src/services/voice/types.ts +699 -0
  849. package/src/services/voice/vad.test.ts +498 -0
  850. package/src/services/voice/vad.ts +832 -0
  851. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  852. package/src/services/voice/voice-budget.test.ts +415 -0
  853. package/src/services/voice/voice-budget.ts +635 -0
  854. package/src/services/voice/voice-duet.test.ts +375 -0
  855. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  856. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  857. package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
  858. package/src/services/voice/voice-preload-predictor.test.ts +130 -0
  859. package/src/services/voice/voice-preload-predictor.ts +113 -0
  860. package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
  861. package/src/services/voice/voice-preset-format.test.ts +75 -0
  862. package/src/services/voice/voice-preset-format.ts +713 -0
  863. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  864. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  865. package/src/services/voice/voice-profile-artifact.ts +518 -0
  866. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  867. package/src/services/voice/voice-profile-routes.ts +425 -0
  868. package/src/services/voice/voice-scenario.test.ts +159 -0
  869. package/src/services/voice/voice-scenario.ts +280 -0
  870. package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
  871. package/src/services/voice/voice-state-machine.ts +727 -0
  872. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  873. package/src/services/voice/voice-workbench-report.ts +367 -0
  874. package/src/services/voice/voice-workbench.test.ts +158 -0
  875. package/src/services/voice/voice.test.ts +1070 -0
  876. package/src/services/voice/wake-word-ggml.ts +319 -0
  877. package/src/services/voice/wake-word.test.ts +298 -0
  878. package/src/services/voice/wake-word.ts +554 -0
  879. package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
  880. package/src/services/voice/wav-codec.test.ts +32 -0
  881. package/src/services/voice/wav-codec.ts +101 -0
  882. package/src/services/voice/workbench-entrypoint.test.ts +55 -0
  883. package/src/services/voice/workbench-entrypoint.ts +88 -0
  884. package/src/services/voice/workbench-headless-runner.test.ts +162 -0
  885. package/src/services/voice/workbench-headless-runner.ts +396 -0
  886. package/src/services/voice/workbench-logic-services.test.ts +225 -0
  887. package/src/services/voice/workbench-logic-services.ts +184 -0
  888. package/src/services/voice/workbench-real-services.ts +629 -0
  889. package/src/services/voice/workbench-scenarios.ts +407 -0
  890. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  891. package/src/services/voice-model-updater.ts +724 -0
  892. package/src/services/voice-prewarm.ts +51 -0
  893. package/src/voice-workbench.ts +71 -0
@@ -0,0 +1,413 @@
1
+ /**
2
+ * Voice Workbench corpus generator (#8785).
3
+ *
4
+ * Turns a declarative {@link VoiceScenario} into one labeled audio stream + a
5
+ * ground-truth JSON the headless runner scores against. Two synthesis paths,
6
+ * one shape:
7
+ *
8
+ * - Synthetic (default, deterministic, NO native model): formant-resonator
9
+ * speech-like PCM (`__test-helpers__/synthetic-speech.ts`) the Silero VAD
10
+ * reads as speech. Reproducible in CI with no artifacts — it exercises the
11
+ * pipeline plumbing + the scorers/labels, not diarization/WER *accuracy*.
12
+ * - Real TTS (gated): an injected {@link CorpusTtsSynthesizer} (Kokoro /
13
+ * OmniVoice via the TTS route) produces natural speech. Real diarization
14
+ * DER and transcription WER benchmarking need this path.
15
+ *
16
+ * `generateVoiceCorpus` is pure (no I/O) so it is unit-testable without disk;
17
+ * `writeVoiceCorpus` / `readVoiceCorpus` handle the versioned on-disk corpus.
18
+ * A turn's labels (speaker, transcript, respond decision, entity) come straight
19
+ * from the scenario, so the ground truth is reproducible regardless of path.
20
+ */
21
+
22
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
23
+ import path from "node:path";
24
+ import {
25
+ AGENT_VOICE_TIMBRE,
26
+ makeSpeechWithSilenceFixture,
27
+ type SpeakerTimbre,
28
+ speakerTimbreForIndex,
29
+ } from "./__test-helpers__/synthetic-speech";
30
+ import {
31
+ type AugmentationSpec,
32
+ augmentPcm,
33
+ specIsClean,
34
+ } from "./corpus-augment";
35
+ import {
36
+ resolveTurnEnvironment,
37
+ turnReferenceTranscript,
38
+ turnSpeakerLabel,
39
+ type VoiceEnvironment,
40
+ type VoiceScenario,
41
+ validateVoiceScenario,
42
+ } from "./voice-scenario";
43
+ import { encodeMonoPcm16Wav } from "./wav-codec";
44
+
45
+ const DEFAULT_SAMPLE_RATE = 16_000;
46
+ /** Natural speaking rate used to size synthetic speech from text length. */
47
+ const DEFAULT_CHARS_PER_SECOND = 13;
48
+ const DEFAULT_INTER_TURN_SILENCE_SEC = 0.4;
49
+ const MIN_SPEECH_SEC = 0.4;
50
+ /** Cap a single synthetic turn (long-form monologue still lands well under). */
51
+ const MAX_SPEECH_SEC = 45;
52
+ const SYNTHETIC_LEAD_SILENCE_SEC = 0.15;
53
+ const SYNTHETIC_TAIL_SILENCE_SEC = 0.15;
54
+
55
+ /** Per-turn ground-truth label with sample-accurate timing. */
56
+ export interface CorpusTurnLabel {
57
+ index: number;
58
+ /** Diarization ground-truth label (the participant who spoke). */
59
+ speaker: string;
60
+ /** Resolved elizaOS entity id for the speaker, when the scenario binds one. */
61
+ entityId?: string;
62
+ /** First sample of voiced speech in this turn (after any lead silence). */
63
+ speechStartSample: number;
64
+ /** Sample just past the voiced speech (before trailing pauses). */
65
+ speechEndSample: number;
66
+ /** First sample of this turn's whole segment in the stream. */
67
+ segmentStartSample: number;
68
+ /** Sample just past this turn's whole segment (incl. trailing pauses). */
69
+ segmentEndSample: number;
70
+ /** Reference transcript for WER scoring. */
71
+ referenceTranscript: string;
72
+ /** Ground truth: should the agent respond to this turn? */
73
+ expectRespond: boolean;
74
+ /** Ground truth: is this segment a real end-of-turn boundary? */
75
+ expectEndOfTurn?: boolean;
76
+ /** Expected inferred/recognized entity, when the scenario asserts one. */
77
+ expectedEntity?: string;
78
+ /** TTS voice id used for this turn (real-TTS path), when set. */
79
+ ttsVoiceId?: string;
80
+ /** True when this turn was formant-synthesized rather than real TTS. */
81
+ synthetic: boolean;
82
+ /** Acoustic degradation applied to this turn's audio (when any). */
83
+ environment?: VoiceEnvironment;
84
+ /** True when this "turn" is the agent's own TTS echoed back (not a user turn). */
85
+ isAgentEcho?: boolean;
86
+ /** Ground truth: the speaker is the device owner / primary enrolled voice. */
87
+ isOwner?: boolean;
88
+ /** The agent's spoken reply to this turn (drives the echo gate downstream). */
89
+ agentReplyText?: string;
90
+ }
91
+
92
+ /**
93
+ * On-disk corpus ground-truth schema version. Bump when the labeled-corpus
94
+ * shape changes incompatibly; `readVoiceCorpusGroundTruth` treats a corpus
95
+ * written by a different version as absent (→ `skipped`, never a stale `pass`).
96
+ */
97
+ export const CORPUS_SCHEMA_VERSION = 1;
98
+
99
+ export interface CorpusGroundTruth {
100
+ /** Labeled-corpus schema version (see {@link CORPUS_SCHEMA_VERSION}). */
101
+ schemaVersion: number;
102
+ scenarioId: string;
103
+ classes: VoiceScenario["classes"];
104
+ sampleRate: number;
105
+ totalSamples: number;
106
+ durationSec: number;
107
+ participants: Array<{
108
+ label: string;
109
+ entityId?: string;
110
+ isOwner?: boolean;
111
+ ttsVoiceId?: string;
112
+ }>;
113
+ agents?: string[];
114
+ /** Entity ids the agent answers without a wake word (owner + enrolled). */
115
+ knownSpeakerEntityIds?: string[];
116
+ turns: CorpusTurnLabel[];
117
+ /** True when EVERY turn was synthetic (no real TTS used anywhere). */
118
+ synthetic: boolean;
119
+ }
120
+
121
+ export interface GeneratedVoiceCorpus {
122
+ pcm: Float32Array;
123
+ sampleRate: number;
124
+ groundTruth: CorpusGroundTruth;
125
+ }
126
+
127
+ /**
128
+ * Real-TTS synthesizer. Gated: when omitted, the generator uses deterministic
129
+ * synthetic speech. An implementation wraps the TTS route / Kokoro engine and
130
+ * returns mono PCM at the requested sample rate.
131
+ */
132
+ export interface CorpusTtsSynthesizer {
133
+ synthesize(args: {
134
+ text: string;
135
+ voiceId?: string;
136
+ speakerLabel: string;
137
+ turnIndex: number;
138
+ isAgentEcho: boolean;
139
+ sampleRate: number;
140
+ }): Promise<Float32Array>;
141
+ }
142
+
143
+ export interface GenerateVoiceCorpusOptions {
144
+ sampleRate?: number;
145
+ /** Inject a real-TTS synthesizer to produce natural speech (else synthetic). */
146
+ synthesizer?: CorpusTtsSynthesizer;
147
+ /** Silence (s) spliced after a turn that declares no explicit pauses. */
148
+ interTurnSilenceSec?: number;
149
+ /** Synthetic-speech sizing: characters of text per second of audio. */
150
+ charsPerSecond?: number;
151
+ }
152
+
153
+ /** Deterministic 32-bit FNV-1a of a label → a stable per-speaker synthesis seed. */
154
+ function labelSeed(label: string): number {
155
+ let h = 0x811c9dc5;
156
+ for (let i = 0; i < label.length; i++) {
157
+ h ^= label.charCodeAt(i);
158
+ h = Math.imul(h, 0x01000193);
159
+ }
160
+ return h >>> 0;
161
+ }
162
+
163
+ function silenceSamples(ms: number, sampleRate: number): number {
164
+ return Math.max(0, Math.round((ms / 1000) * sampleRate));
165
+ }
166
+
167
+ /**
168
+ * A continuous competing-talker stream for `backgroundTalkersDb`, built from
169
+ * formant-synth speech (no models) so it is deterministic. `augmentPcm` mixes +
170
+ * loops it under the real turn at the requested level.
171
+ */
172
+ function synthesizeBabble(
173
+ sampleRate: number,
174
+ lengthSamples: number,
175
+ seed: number,
176
+ ): Float32Array {
177
+ const fixture = makeSpeechWithSilenceFixture({
178
+ sampleRate,
179
+ leadSilenceSec: 0,
180
+ speechSec: Math.max(0.3, lengthSamples / sampleRate),
181
+ tailSilenceSec: 0,
182
+ seed,
183
+ });
184
+ return fixture.pcm;
185
+ }
186
+
187
+ /**
188
+ * Generate one labeled audio stream for a scenario. Throws on an invalid
189
+ * scenario (fail loud — a malformed corpus must not silently produce garbage).
190
+ * A turn that carries only `audioRef` (no `text`) is not synthesizable here and
191
+ * is rejected; pre-rendered audio is supplied through the corpus on disk.
192
+ */
193
+ export async function generateVoiceCorpus(
194
+ scenario: VoiceScenario,
195
+ options: GenerateVoiceCorpusOptions = {},
196
+ ): Promise<GeneratedVoiceCorpus> {
197
+ const validation = validateVoiceScenario(scenario);
198
+ if (!validation.valid) {
199
+ throw new Error(
200
+ `[voice-corpus] invalid scenario "${scenario.id}": ${validation.errors.join("; ")}`,
201
+ );
202
+ }
203
+
204
+ const sampleRate = options.sampleRate ?? DEFAULT_SAMPLE_RATE;
205
+ const charsPerSecond = options.charsPerSecond ?? DEFAULT_CHARS_PER_SECOND;
206
+ const interTurnSilence = silenceSamples(
207
+ (options.interTurnSilenceSec ?? DEFAULT_INTER_TURN_SILENCE_SEC) * 1000,
208
+ sampleRate,
209
+ );
210
+ const synthesizer = options.synthesizer;
211
+
212
+ const participantByLabel = new Map(
213
+ scenario.participants.map((p) => [p.label, p]),
214
+ );
215
+ // Each participant gets a distinct voice colour, spread evenly across the
216
+ // timbre range so a blind acoustic diarizer can tell co-present speakers apart
217
+ // from the audio alone (#9427).
218
+ const timbreByLabel = new Map<string, SpeakerTimbre>(
219
+ scenario.participants.map((p, i) => [
220
+ p.label,
221
+ speakerTimbreForIndex(i, scenario.participants.length),
222
+ ]),
223
+ );
224
+
225
+ const segments: Float32Array[] = [];
226
+ const labels: CorpusTurnLabel[] = [];
227
+ let cursor = 0;
228
+ let anyReal = false;
229
+
230
+ for (let i = 0; i < scenario.turns.length; i++) {
231
+ const turn = scenario.turns[i];
232
+ const text = turn.text?.trim();
233
+ if (!text) {
234
+ throw new Error(
235
+ `[voice-corpus] turn[${i}] of "${scenario.id}" has no text to synthesize (audioRef-only turns are supplied via the on-disk corpus, not generated)`,
236
+ );
237
+ }
238
+ const participant = participantByLabel.get(turn.speaker);
239
+ const ttsVoiceId = turn.ttsVoiceId ?? participant?.ttsVoiceId;
240
+
241
+ const segmentStartSample = cursor;
242
+ let speech: Float32Array;
243
+ let speechStartOffset: number;
244
+ let speechEndOffset: number;
245
+ let synthetic: boolean;
246
+
247
+ if (synthesizer) {
248
+ speech = await synthesizer.synthesize({
249
+ text,
250
+ voiceId: ttsVoiceId,
251
+ speakerLabel: turn.speaker,
252
+ turnIndex: i,
253
+ isAgentEcho: turn.isAgentEcho === true,
254
+ sampleRate,
255
+ });
256
+ speechStartOffset = 0;
257
+ speechEndOffset = speech.length;
258
+ synthetic = false;
259
+ anyReal = true;
260
+ } else {
261
+ const speechSec = Math.min(
262
+ MAX_SPEECH_SEC,
263
+ Math.max(MIN_SPEECH_SEC, text.length / charsPerSecond),
264
+ );
265
+ // An agent-echo turn is the agent's OWN TTS bleeding back through the
266
+ // mic, so it carries the agent's voice — not the labelled speaker's. Real
267
+ // speaker turns get their distinct per-speaker timbre (#9427).
268
+ const timbre = turn.isAgentEcho
269
+ ? AGENT_VOICE_TIMBRE
270
+ : (timbreByLabel.get(turn.speaker) ?? AGENT_VOICE_TIMBRE);
271
+ const fixture = makeSpeechWithSilenceFixture({
272
+ sampleRate,
273
+ leadSilenceSec: SYNTHETIC_LEAD_SILENCE_SEC,
274
+ speechSec,
275
+ tailSilenceSec: SYNTHETIC_TAIL_SILENCE_SEC,
276
+ seed: labelSeed(turn.isAgentEcho ? "__agent__" : turn.speaker),
277
+ timbre,
278
+ });
279
+ speech = fixture.pcm;
280
+ speechStartOffset = fixture.speechStartSample;
281
+ speechEndOffset = fixture.speechEndSample;
282
+ synthetic = true;
283
+ }
284
+
285
+ // Trailing pauses: explicit per-turn gaps, else the default inter-turn gap
286
+ // (except after the final turn).
287
+ const pauseTotal =
288
+ turn.pausesMs && turn.pausesMs.length > 0
289
+ ? turn.pausesMs.reduce((n, ms) => n + silenceSamples(ms, sampleRate), 0)
290
+ : i < scenario.turns.length - 1
291
+ ? interTurnSilence
292
+ : 0;
293
+
294
+ // Assemble the turn's full segment (voiced speech + trailing pause) so the
295
+ // per-turn acoustic degradation — reverb ringing into the gap, a noise
296
+ // floor in the "silence" — covers the pause, not just the speech.
297
+ let segment: Float32Array = new Float32Array(speech.length + pauseTotal);
298
+ segment.set(speech, 0);
299
+
300
+ const env = resolveTurnEnvironment(scenario, turn);
301
+ let appliedEnv: VoiceEnvironment | undefined;
302
+ if (env && !specIsClean(env)) {
303
+ const seed =
304
+ env.seed ?? (labelSeed(scenario.id) ^ (i * 0x9e3779b1)) >>> 0;
305
+ const resolvedEnv: AugmentationSpec = { ...env, seed };
306
+ const babble =
307
+ resolvedEnv.backgroundTalkersDb !== undefined
308
+ ? synthesizeBabble(sampleRate, segment.length, (seed ^ 0x1234) >>> 0)
309
+ : undefined;
310
+ segment = augmentPcm(segment, sampleRate, resolvedEnv, {
311
+ ...(babble ? { babble } : {}),
312
+ });
313
+ appliedEnv = resolvedEnv;
314
+ }
315
+
316
+ segments.push(segment);
317
+ cursor += segment.length;
318
+
319
+ labels.push({
320
+ index: i,
321
+ speaker: turnSpeakerLabel(turn),
322
+ ...(participant?.entityId ? { entityId: participant.entityId } : {}),
323
+ speechStartSample: segmentStartSample + speechStartOffset,
324
+ speechEndSample: segmentStartSample + speechEndOffset,
325
+ segmentStartSample,
326
+ segmentEndSample: cursor,
327
+ referenceTranscript: turnReferenceTranscript(turn),
328
+ expectRespond: turn.isAgentEcho ? false : turn.expectRespond,
329
+ expectEndOfTurn: turn.expectEndOfTurn ?? true,
330
+ ...(turn.expectedEntity ? { expectedEntity: turn.expectedEntity } : {}),
331
+ ...(ttsVoiceId ? { ttsVoiceId } : {}),
332
+ synthetic,
333
+ ...(appliedEnv ? { environment: appliedEnv } : {}),
334
+ ...(turn.isAgentEcho ? { isAgentEcho: true } : {}),
335
+ ...(participant?.isOwner ? { isOwner: true } : {}),
336
+ ...(turn.agentReplyText ? { agentReplyText: turn.agentReplyText } : {}),
337
+ });
338
+ }
339
+
340
+ const pcm = new Float32Array(cursor);
341
+ let offset = 0;
342
+ for (const segment of segments) {
343
+ pcm.set(segment, offset);
344
+ offset += segment.length;
345
+ }
346
+
347
+ const groundTruth: CorpusGroundTruth = {
348
+ schemaVersion: CORPUS_SCHEMA_VERSION,
349
+ scenarioId: scenario.id,
350
+ classes: scenario.classes,
351
+ sampleRate,
352
+ totalSamples: pcm.length,
353
+ durationSec: pcm.length / sampleRate,
354
+ participants: scenario.participants.map((p) => ({
355
+ label: p.label,
356
+ ...(p.entityId ? { entityId: p.entityId } : {}),
357
+ ...(p.isOwner ? { isOwner: p.isOwner } : {}),
358
+ ...(p.ttsVoiceId ? { ttsVoiceId: p.ttsVoiceId } : {}),
359
+ })),
360
+ ...(scenario.agents ? { agents: scenario.agents } : {}),
361
+ ...(scenario.knownSpeakerEntityIds
362
+ ? { knownSpeakerEntityIds: scenario.knownSpeakerEntityIds }
363
+ : {}),
364
+ turns: labels,
365
+ synthetic: !anyReal,
366
+ };
367
+
368
+ return { pcm, sampleRate, groundTruth };
369
+ }
370
+
371
+ export interface VoiceCorpusPaths {
372
+ dir: string;
373
+ audioPath: string;
374
+ groundTruthPath: string;
375
+ }
376
+
377
+ /** Persist a generated corpus as `audio.wav` + `ground-truth.json` under `dir`. */
378
+ export function writeVoiceCorpus(
379
+ corpus: GeneratedVoiceCorpus,
380
+ dir: string,
381
+ ): VoiceCorpusPaths {
382
+ mkdirSync(dir, { recursive: true });
383
+ const audioPath = path.join(dir, "audio.wav");
384
+ const groundTruthPath = path.join(dir, "ground-truth.json");
385
+ writeFileSync(audioPath, encodeMonoPcm16Wav(corpus.pcm, corpus.sampleRate));
386
+ writeFileSync(
387
+ groundTruthPath,
388
+ `${JSON.stringify(corpus.groundTruth, null, 2)}\n`,
389
+ );
390
+ return { dir, audioPath, groundTruthPath };
391
+ }
392
+
393
+ /**
394
+ * Read a previously-written corpus's ground truth. Returns null when the corpus
395
+ * directory or its ground-truth file is absent (the honesty contract — the
396
+ * runner reports `skipped`, never `pass`, when corpus artifacts are missing).
397
+ */
398
+ export function readVoiceCorpusGroundTruth(
399
+ dir: string,
400
+ ): CorpusGroundTruth | null {
401
+ const groundTruthPath = path.join(dir, "ground-truth.json");
402
+ if (!existsSync(groundTruthPath)) return null;
403
+ const parsed = JSON.parse(readFileSync(groundTruthPath, "utf8")) as unknown;
404
+ if (!parsed || typeof parsed !== "object") return null;
405
+ // Honesty contract: a corpus written by an incompatible schema version is
406
+ // treated as absent (→ skipped, never a stale pass against drifted labels).
407
+ if (
408
+ (parsed as { schemaVersion?: unknown }).schemaVersion !==
409
+ CORPUS_SCHEMA_VERSION
410
+ )
411
+ return null;
412
+ return parsed as CorpusGroundTruth;
413
+ }
@@ -0,0 +1,140 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import {
3
+ computeDiarizationErrorRate,
4
+ type DerResult,
5
+ type DiarizationSegment,
6
+ } from "./diarization-error-rate";
7
+
8
+ /**
9
+ * Greedy-branch coverage for the DER scorer (issue #9147).
10
+ *
11
+ * `bestMapping` has two arms: an exact injective permutation search for small
12
+ * speaker counts, and an O(n²) greedy fallback once `refSpeakers.length +
13
+ * hypSpeakers.length > maxExactSpeakers` (default 7) — the existing
14
+ * `diarization-error-rate.test.ts` only ever hits the exact arm. A 7+-speaker
15
+ * conversation (a noisy multi-party room, the very case `maxDer` exists to
16
+ * guard) takes the greedy path, so this pins:
17
+ * - greedy fires both when forced via a low `maxExactSpeakers` AND on a
18
+ * genuine 4-ref/4-hyp (=8 combined) conversation under the default,
19
+ * - the greedy mapping stays injective (no ref or hyp reused — the
20
+ * usedHyp/usedRef guard), and
21
+ * - greedy reproduces the exact arm's DER on separable inputs (where greedy
22
+ * is provably optimal), so the fallback is not silently wrong.
23
+ */
24
+
25
+ const seg = (
26
+ speaker: string,
27
+ startMs: number,
28
+ endMs: number,
29
+ ): DiarizationSegment => ({ speaker, startMs, endMs });
30
+
31
+ /** A hyp→ref mapping must be injective: every hyp key distinct (free) AND every
32
+ * ref value distinct (the property the greedy usedRef guard enforces). */
33
+ function expectInjective(mapping: DerResult["mapping"]): void {
34
+ const hyps = Object.keys(mapping);
35
+ const refs = Object.values(mapping);
36
+ expect(new Set(hyps).size).toBe(hyps.length);
37
+ expect(new Set(refs).size).toBe(refs.length);
38
+ }
39
+
40
+ describe("computeDiarizationErrorRate — greedy mapping fallback", () => {
41
+ it("forces the greedy arm via a low maxExactSpeakers and still scores a perfect 2-speaker match", () => {
42
+ const reference = [seg("alice", 0, 1000), seg("bob", 1000, 2000)];
43
+ const hypothesis = [seg("spk0", 0, 1000), seg("spk1", 1000, 2000)];
44
+ // 2 + 2 = 4 combined > maxExact 3 → greedy path (would be exact by default).
45
+ const result = computeDiarizationErrorRate(reference, hypothesis, {
46
+ maxExactSpeakers: 3,
47
+ });
48
+ expect(result.der).toBe(0);
49
+ expect(result.confusionMs).toBe(0);
50
+ expect(result.mapping).toEqual({ spk0: "alice", spk1: "bob" });
51
+ expectInjective(result.mapping);
52
+ });
53
+
54
+ it("takes the greedy path on a genuine 4-speaker / 8-combined conversation (default maxExact 7)", () => {
55
+ // 4 ref + 4 hyp = 8 > 7 default → greedy, no options needed.
56
+ const reference = [
57
+ seg("a", 0, 1000),
58
+ seg("b", 1000, 2000),
59
+ seg("c", 2000, 3000),
60
+ seg("d", 3000, 4000),
61
+ ];
62
+ const hypothesis = [
63
+ seg("w", 0, 1000),
64
+ seg("x", 1000, 2000),
65
+ seg("y", 2000, 3000),
66
+ seg("z", 3000, 4000),
67
+ ];
68
+ const result = computeDiarizationErrorRate(reference, hypothesis);
69
+ expect(result.der).toBe(0);
70
+ expect(result.totalReferenceMs).toBeCloseTo(4000, -1);
71
+ expect(Object.keys(result.mapping)).toHaveLength(4);
72
+ expectInjective(result.mapping);
73
+ });
74
+
75
+ it("greedy attributes a swapped span as confusion, not missed/false-alarm, with a tie-break-invariant DER", () => {
76
+ // 4 ref speakers; the hypothesis reuses label "w" for both a's and d's
77
+ // span. One of those two spans is necessarily a confusion regardless of
78
+ // which the greedy tie-break maps w onto — so the DER is deterministic.
79
+ const reference = [
80
+ seg("a", 0, 1000),
81
+ seg("b", 1000, 2000),
82
+ seg("c", 2000, 3000),
83
+ seg("d", 3000, 4000),
84
+ ];
85
+ const hypothesis = [
86
+ seg("w", 0, 1000),
87
+ seg("x", 1000, 2000),
88
+ seg("y", 2000, 3000),
89
+ seg("w", 3000, 4000),
90
+ ];
91
+ // 4 ref + 3 hyp = 7 combined; force greedy with maxExact 5.
92
+ const result = computeDiarizationErrorRate(reference, hypothesis, {
93
+ maxExactSpeakers: 5,
94
+ });
95
+ expect(result.missedMs).toBe(0);
96
+ expect(result.falseAlarmMs).toBe(0);
97
+ expect(result.confusionMs).toBeCloseTo(1000, -1); // exactly one swapped span
98
+ expect(result.der).toBeCloseTo(0.25, 2); // 1000 confusion / 4000 ref
99
+ expectInjective(result.mapping); // w mapped once, x and y once each
100
+ });
101
+
102
+ it("leaves a zero-overlap hypothesis speaker unmapped (greedy skips s<=0 pairs → false alarm)", () => {
103
+ const reference = [seg("a", 0, 1000), seg("b", 1000, 2000)];
104
+ // spk2 talks over a stretch with no reference speaker at all (2000-3000).
105
+ const hypothesis = [
106
+ seg("h0", 0, 1000),
107
+ seg("h1", 1000, 2000),
108
+ seg("h2", 2000, 3000),
109
+ ];
110
+ const result = computeDiarizationErrorRate(reference, hypothesis, {
111
+ maxExactSpeakers: 4,
112
+ });
113
+ // h2 never co-occurs with any ref speaker → no mapping entry for it.
114
+ expect(result.mapping.h2).toBeUndefined();
115
+ expect(result.falseAlarmMs).toBeCloseTo(1000, -1);
116
+ expectInjective(result.mapping);
117
+ });
118
+
119
+ it("greedy reproduces the exact arm's DER on a separable 3-speaker case", () => {
120
+ const reference = [
121
+ seg("a", 0, 1000),
122
+ seg("b", 1000, 2000),
123
+ seg("c", 2000, 3000),
124
+ ];
125
+ const hypothesis = [
126
+ seg("p", 0, 1000),
127
+ seg("q", 1000, 2000),
128
+ seg("p", 2000, 3000), // c collapsed onto p → a confusion either arm
129
+ ];
130
+ const exact = computeDiarizationErrorRate(reference, hypothesis, {
131
+ maxExactSpeakers: 16,
132
+ });
133
+ const greedy = computeDiarizationErrorRate(reference, hypothesis, {
134
+ maxExactSpeakers: 0,
135
+ });
136
+ expect(greedy.der).toBeCloseTo(exact.der, 5);
137
+ expect(greedy.confusionMs).toBeCloseTo(exact.confusionMs, -1);
138
+ expectInjective(greedy.mapping);
139
+ });
140
+ });
@@ -0,0 +1,100 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import {
3
+ computeDiarizationErrorRate,
4
+ type DiarizationSegment,
5
+ diarizationWithinBudget,
6
+ } from "./diarization-error-rate";
7
+
8
+ /**
9
+ * Diarization Error Rate scorer (issue #9147). The voice scenarios carry a
10
+ * `maxDer` threshold and an `expectedSpeakerLabel` per turn, but nothing
11
+ * computed DER — so a wrong speaker attribution or a missed overlapping talker
12
+ * passed silently. This pins the four DER components (missed / false-alarm /
13
+ * confusion / correct) and the key property that DER is invariant to how the
14
+ * diarizer NAMES its speakers (it's the partition that matters, not the labels).
15
+ */
16
+
17
+ const seg = (
18
+ speaker: string,
19
+ startMs: number,
20
+ endMs: number,
21
+ ): DiarizationSegment => ({
22
+ speaker,
23
+ startMs,
24
+ endMs,
25
+ });
26
+
27
+ describe("computeDiarizationErrorRate", () => {
28
+ it("is 0 for a perfect match (even with different speaker label names)", () => {
29
+ const reference = [seg("alice", 0, 1000), seg("bob", 1000, 2000)];
30
+ // hypothesis splits the timeline identically but calls them spk0/spk1.
31
+ const hypothesis = [seg("spk0", 0, 1000), seg("spk1", 1000, 2000)];
32
+ const result = computeDiarizationErrorRate(reference, hypothesis);
33
+ expect(result.der).toBe(0);
34
+ expect(result.confusionMs).toBe(0);
35
+ // optimal mapping pairs the equivalent speakers.
36
+ expect(result.mapping).toEqual({ spk0: "alice", spk1: "bob" });
37
+ });
38
+
39
+ it("counts missed speech when the system misses a speaker", () => {
40
+ const reference = [seg("alice", 0, 1000), seg("bob", 1000, 2000)];
41
+ const hypothesis = [seg("spk0", 0, 1000)]; // bob's 1000ms missed entirely
42
+ const result = computeDiarizationErrorRate(reference, hypothesis);
43
+ expect(result.missedMs).toBeCloseTo(1000, -1);
44
+ expect(result.der).toBeCloseTo(0.5, 1); // 1000 missed / 2000 ref
45
+ });
46
+
47
+ it("counts false alarm when the system hallucinates speech", () => {
48
+ const reference = [seg("alice", 0, 1000)];
49
+ const hypothesis = [seg("spk0", 0, 1000), seg("spk1", 1000, 2000)];
50
+ const result = computeDiarizationErrorRate(reference, hypothesis);
51
+ expect(result.falseAlarmMs).toBeCloseTo(1000, -1);
52
+ expect(result.totalReferenceMs).toBeCloseTo(1000, -1);
53
+ });
54
+
55
+ it("counts confusion when the same span is attributed to a swapped speaker", () => {
56
+ // 3 distinct ref speakers; hypothesis collapses the 3rd onto speaker 1's id,
57
+ // so the 3rd span is a confusion (wrong speaker), not missed or false alarm.
58
+ const reference = [
59
+ seg("a", 0, 1000),
60
+ seg("b", 1000, 2000),
61
+ seg("c", 2000, 3000),
62
+ ];
63
+ const hypothesis = [
64
+ seg("x", 0, 1000),
65
+ seg("y", 1000, 2000),
66
+ seg("x", 2000, 3000),
67
+ ];
68
+ const result = computeDiarizationErrorRate(reference, hypothesis);
69
+ expect(result.missedMs).toBe(0);
70
+ expect(result.falseAlarmMs).toBe(0);
71
+ expect(result.confusionMs).toBeCloseTo(1000, -1); // c's span mapped to x≠c
72
+ expect(result.der).toBeCloseTo(1 / 3, 2);
73
+ });
74
+
75
+ it("handles overlapping speech (both speakers active in one span)", () => {
76
+ // alice 0-2000, bob 1000-2000 → 1000ms of overlap (2 ref speakers).
77
+ const reference = [seg("alice", 0, 2000), seg("bob", 1000, 2000)];
78
+ const hypothesis = [seg("spk0", 0, 2000), seg("spk1", 1000, 2000)];
79
+ const result = computeDiarizationErrorRate(reference, hypothesis);
80
+ // ref speaker-time = 2000 (alice) + 1000 (bob overlap) = 3000ms.
81
+ expect(result.totalReferenceMs).toBeCloseTo(3000, -1);
82
+ expect(result.der).toBe(0); // perfectly diarized overlap
83
+ });
84
+
85
+ it("penalizes a missed overlapping talker", () => {
86
+ const reference = [seg("alice", 0, 2000), seg("bob", 1000, 2000)];
87
+ const hypothesis = [seg("spk0", 0, 2000)]; // bob's overlapping 1000ms missed
88
+ const result = computeDiarizationErrorRate(reference, hypothesis);
89
+ expect(result.missedMs).toBeCloseTo(1000, -1);
90
+ expect(result.der).toBeCloseTo(1000 / 3000, 2);
91
+ });
92
+ });
93
+
94
+ describe("diarizationWithinBudget", () => {
95
+ it("gates a hypothesis against the scenario maxDer", () => {
96
+ expect(diarizationWithinBudget({ der: 0.1 }, 0.15)).toBe(true);
97
+ expect(diarizationWithinBudget({ der: 0.2 }, 0.15)).toBe(false);
98
+ expect(diarizationWithinBudget({ der: 0 }, 0)).toBe(true);
99
+ });
100
+ });