@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,393 @@
1
+ /**
2
+ * Samantha-preset on-the-fly regeneration via the fused OmniVoice FFI.
3
+ *
4
+ * Path A from W3-11: when the bundle ships the I-wave zero-fill placeholder
5
+ * for `cache/voice-preset-default.bin`, the runtime synthesises a real
6
+ * preset by encoding the bundled Samantha reference clip through the FFI's
7
+ * `eliza_inference_encode_reference` entrypoint and writing the resulting
8
+ * `ref_audio_tokens` + canonical instruct/refText into a v2 preset blob.
9
+ *
10
+ * Determinism contract:
11
+ * - The reference clip bytes (24 kHz mono fp32 WAV) are pinned in the
12
+ * bundle.
13
+ * - The reference transcript (`SAMANTHA_REFERENCE_TRANSCRIPT`) is pinned.
14
+ * - The instruct string (`SAMANTHA_INSTRUCT`) is pinned.
15
+ * - The OmniVoice encode entrypoint does not consume randomness (the
16
+ * HuBERT semantic + RVQ codec passes are pure functions of the input
17
+ * PCM + the model weights).
18
+ * Therefore the produced preset bytes are reproducible byte-for-byte across
19
+ * boots given the same FFI library + bundle.
20
+ */
21
+
22
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
23
+ import path from "node:path";
24
+ import {
25
+ type ElizaInferenceContextHandle,
26
+ type ElizaInferenceFfi,
27
+ loadElizaInferenceFfi,
28
+ } from "./ffi-bindings";
29
+ import {
30
+ detectSamanthaPlaceholder,
31
+ SAMANTHA_INSTRUCT,
32
+ SAMANTHA_REFERENCE_TRANSCRIPT,
33
+ } from "./samantha-preset-placeholder";
34
+ import { writeVoicePresetFileV2 } from "./voice-preset-format";
35
+
36
+ /** Outcome of `ensureSamanthaPresetReady`. Distinct kinds let the caller
37
+ * log the right thing at the right level without re-doing detection. */
38
+ export type EnsureSamanthaPresetOutcome =
39
+ | { kind: "real-preset" }
40
+ | { kind: "missing-bundle-preset" }
41
+ | { kind: "regenerated"; bytes: number; K: number; refT: number }
42
+ | {
43
+ kind: "placeholder-no-regen";
44
+ reason:
45
+ | "missing-reference-wav"
46
+ | "missing-ffi-library"
47
+ | "ffi-no-encode-reference"
48
+ | "encode-reference-failed";
49
+ detail: string;
50
+ };
51
+
52
+ export interface RegenerateOptions {
53
+ bundleRoot: string;
54
+ /** Absolute path the regenerated preset bytes should target. The caller
55
+ * performs the write — this function only produces the bytes + metadata. */
56
+ presetPath: string;
57
+ /** Override path to the Samantha reference WAV. Defaults to the bundle's
58
+ * `tts/omnivoice/samantha-ref.wav`. */
59
+ referenceWav?: string;
60
+ /** Override the canonical reference transcript. Defaults to the pinned
61
+ * `SAMANTHA_REFERENCE_TRANSCRIPT`. */
62
+ referenceText?: string;
63
+ }
64
+
65
+ export interface RegenerateResult {
66
+ bytes: Uint8Array;
67
+ K: number;
68
+ refT: number;
69
+ embeddingDim: number;
70
+ }
71
+
72
+ /**
73
+ * Platform-specific filenames probed when locating the OmniVoice fused
74
+ * shared library inside a bundle. Mirrors the matching helper inside
75
+ * `engine-bridge.ts` (kept private there); regenerator and bridge resolve
76
+ * the same set of names so a bundle that loads at boot also loads at
77
+ * regeneration time.
78
+ */
79
+ function libraryFilenames(): string[] {
80
+ if (process.platform === "darwin") return ["libelizainference.dylib"];
81
+ if (process.platform === "win32") {
82
+ return ["elizainference.dll", "libelizainference.dll"];
83
+ }
84
+ return ["libelizainference.so"];
85
+ }
86
+
87
+ function locateBundleLibrary(bundleRoot: string): string {
88
+ const exact = process.env.ELIZA_INFERENCE_LIBRARY?.trim();
89
+ if (exact && existsSync(exact)) return exact;
90
+ const dirs = [
91
+ path.join(bundleRoot, "lib"),
92
+ exact ? path.dirname(exact) : null,
93
+ process.env.ELIZA_INFERENCE_LIB_DIR?.trim() || null,
94
+ ].filter((dir): dir is string => Boolean(dir));
95
+ for (const dir of dirs) {
96
+ for (const name of libraryFilenames()) {
97
+ const candidate = path.join(dir, name);
98
+ if (existsSync(candidate)) return candidate;
99
+ }
100
+ }
101
+ return path.join(
102
+ dirs[0] ?? path.join(bundleRoot, "lib"),
103
+ libraryFilenames()[0] ?? "libelizainference.so",
104
+ );
105
+ }
106
+
107
+ /**
108
+ * Decode a 24 kHz mono Float32 LE WAV file into a Float32Array of PCM
109
+ * samples. Refuses anything that is not the canonical OmniVoice reference
110
+ * format — encoders happily accept stereo / 16-bit / 48 kHz inputs and
111
+ * silently degrade, which is exactly the kind of fallback sludge AGENTS.md
112
+ * §3 forbids. We require the file be in the right format up front.
113
+ */
114
+ export function decodeMonoFloat32Wav24kHz(bytes: Uint8Array): Float32Array {
115
+ if (bytes.byteLength < 44) {
116
+ throw new Error(
117
+ `[samantha-regen] reference WAV too small (${bytes.byteLength} bytes, need >= 44 for header)`,
118
+ );
119
+ }
120
+ const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
121
+ const riff = String.fromCharCode(...bytes.subarray(0, 4));
122
+ const wave = String.fromCharCode(...bytes.subarray(8, 12));
123
+ if (riff !== "RIFF" || wave !== "WAVE") {
124
+ throw new Error(
125
+ `[samantha-regen] reference WAV bad magic: RIFF=${JSON.stringify(riff)} WAVE=${JSON.stringify(wave)}`,
126
+ );
127
+ }
128
+
129
+ // Walk chunks for "fmt " + "data".
130
+ let cursor = 12;
131
+ let fmtOffset = -1;
132
+ let fmtLen = 0;
133
+ let dataOffset = -1;
134
+ let dataLen = 0;
135
+ while (cursor + 8 <= bytes.byteLength) {
136
+ const id = String.fromCharCode(...bytes.subarray(cursor, cursor + 4));
137
+ const size = view.getUint32(cursor + 4, true);
138
+ const payload = cursor + 8;
139
+ if (id === "fmt ") {
140
+ fmtOffset = payload;
141
+ fmtLen = size;
142
+ } else if (id === "data") {
143
+ dataOffset = payload;
144
+ dataLen = size;
145
+ break;
146
+ }
147
+ cursor = payload + size + (size % 2); // pad byte
148
+ }
149
+ if (fmtOffset < 0 || dataOffset < 0) {
150
+ throw new Error("[samantha-regen] reference WAV missing fmt or data chunk");
151
+ }
152
+ const audioFormat = view.getUint16(fmtOffset + 0, true);
153
+ const channels = view.getUint16(fmtOffset + 2, true);
154
+ const sampleRate = view.getUint32(fmtOffset + 4, true);
155
+ const bitsPerSample = view.getUint16(fmtOffset + 14, true);
156
+
157
+ // Accept WAVE_FORMAT_IEEE_FLOAT (3) or WAVE_FORMAT_EXTENSIBLE (0xFFFE)
158
+ // with 32-bit float samples.
159
+ const isFloat =
160
+ (audioFormat === 3 && bitsPerSample === 32) ||
161
+ (audioFormat === 0xfffe && bitsPerSample === 32 && fmtLen >= 40);
162
+ if (!isFloat) {
163
+ throw new Error(
164
+ `[samantha-regen] reference WAV must be 32-bit float PCM (got format=${audioFormat}, bps=${bitsPerSample})`,
165
+ );
166
+ }
167
+ if (channels !== 1) {
168
+ throw new Error(
169
+ `[samantha-regen] reference WAV must be mono (got ${channels} channels)`,
170
+ );
171
+ }
172
+ if (sampleRate !== 24_000) {
173
+ throw new Error(
174
+ `[samantha-regen] reference WAV must be 24 kHz (got ${sampleRate})`,
175
+ );
176
+ }
177
+
178
+ const sampleCount = Math.floor(dataLen / 4);
179
+ // Copy into an aligned buffer (the input slice is not guaranteed
180
+ // 4-aligned; Float32Array constructor requires alignment).
181
+ const aligned = new Uint8Array(sampleCount * 4);
182
+ aligned.set(bytes.subarray(dataOffset, dataOffset + sampleCount * 4));
183
+ return new Float32Array(aligned.buffer);
184
+ }
185
+
186
+ /**
187
+ * Run the on-the-fly regeneration. Loads the bundle's OmniVoice FFI, calls
188
+ * `encodeReference` against the Samantha reference clip, and serialises the
189
+ * result into an ELZ1 v2 preset blob. The caller writes the bytes to disk.
190
+ */
191
+ export async function regenerateSamanthaPresetFromBundle(
192
+ opts: RegenerateOptions,
193
+ ): Promise<RegenerateResult> {
194
+ const refWav =
195
+ opts.referenceWav ??
196
+ path.join(opts.bundleRoot, "tts", "omnivoice", "samantha-ref.wav");
197
+ if (!existsSync(refWav)) {
198
+ throw new Error(
199
+ `[samantha-regen] Samantha reference WAV not found at ${refWav}. The bundle is missing the OmniVoice samantha reference clip.`,
200
+ );
201
+ }
202
+
203
+ const libPath = locateBundleLibrary(opts.bundleRoot);
204
+ if (!existsSync(libPath)) {
205
+ throw new Error(
206
+ `[samantha-regen] OmniVoice FFI library not found under ${path.join(
207
+ opts.bundleRoot,
208
+ "lib",
209
+ )} (tried ${libraryFilenames().join(", ")}). Build via packages/app-core/scripts/build-llama-cpp-mtp.mjs (omnivoice-merged target).`,
210
+ );
211
+ }
212
+
213
+ const ffi: ElizaInferenceFfi = loadElizaInferenceFfi(libPath);
214
+ let ctx: ElizaInferenceContextHandle | null = null;
215
+ let ttsAcquired = false;
216
+ try {
217
+ if (
218
+ typeof ffi.encodeReferenceSupported !== "function" ||
219
+ !ffi.encodeReferenceSupported()
220
+ ) {
221
+ throw new Error(
222
+ "[samantha-regen] this OmniVoice build does not export eliza_inference_encode_reference (ABI v4 required). Rebuild with the encode-reference target.",
223
+ );
224
+ }
225
+ if (typeof ffi.encodeReference !== "function") {
226
+ throw new Error(
227
+ "[samantha-regen] FFI binding missing encodeReference method despite encodeReferenceSupported()=true",
228
+ );
229
+ }
230
+
231
+ ctx = ffi.create(opts.bundleRoot);
232
+ ffi.mmapAcquire(ctx, "tts");
233
+ ttsAcquired = true;
234
+
235
+ const wavBytes = new Uint8Array(readFileSync(refWav));
236
+ const pcm = decodeMonoFloat32Wav24kHz(wavBytes);
237
+
238
+ const encoded = ffi.encodeReference({
239
+ ctx,
240
+ pcm,
241
+ sampleRateHz: 24_000,
242
+ });
243
+ if (encoded.K <= 0 || encoded.refT <= 0) {
244
+ throw new Error(
245
+ `[samantha-regen] encode_reference returned empty tensor (K=${encoded.K}, refT=${encoded.refT})`,
246
+ );
247
+ }
248
+
249
+ // The FFI encode pass produces ref_audio_tokens; the speaker
250
+ // embedding section stays empty (OmniVoice resolves the speaker
251
+ // identity from the tokens, not from a separate embedding vector).
252
+ const embedding = new Float32Array(0);
253
+ const refText = opts.referenceText ?? SAMANTHA_REFERENCE_TRANSCRIPT;
254
+ const instruct = SAMANTHA_INSTRUCT;
255
+ const metadata: Record<string, unknown> = {
256
+ generator: "samantha-preset-regenerator",
257
+ generatorVersion: 1,
258
+ referenceWavPath: path.basename(refWav),
259
+ referenceWavBytes: wavBytes.byteLength,
260
+ referenceText: refText,
261
+ instruct,
262
+ K: encoded.K,
263
+ refT: encoded.refT,
264
+ };
265
+
266
+ const bytes = writeVoicePresetFileV2({
267
+ embedding,
268
+ phrases: [],
269
+ refAudioTokens: {
270
+ K: encoded.K,
271
+ refT: encoded.refT,
272
+ tokens: encoded.tokens,
273
+ },
274
+ refText,
275
+ instruct,
276
+ metadata,
277
+ });
278
+
279
+ return {
280
+ bytes,
281
+ K: encoded.K,
282
+ refT: encoded.refT,
283
+ embeddingDim: 0,
284
+ };
285
+ } finally {
286
+ if (ctx !== null) {
287
+ if (ttsAcquired) {
288
+ try {
289
+ ffi.mmapEvict(ctx, "tts");
290
+ } catch {
291
+ // Evict is best-effort during regeneration; destroy below
292
+ // tears down the context either way.
293
+ }
294
+ }
295
+ try {
296
+ ffi.destroy(ctx);
297
+ } catch {
298
+ // Destroy is best-effort during regeneration; the OS reclaims
299
+ // the context on process exit.
300
+ }
301
+ }
302
+ try {
303
+ ffi.close();
304
+ } catch {
305
+ // Same — close is best-effort.
306
+ }
307
+ }
308
+ }
309
+
310
+ /**
311
+ * Pre-flight: detect a placeholder preset at the bundle's canonical path
312
+ * and regenerate it via OmniVoice when possible. Called by the engine's
313
+ * `ensureActiveBundleVoiceReady()` before the synchronous preset load.
314
+ *
315
+ * Outcomes:
316
+ *
317
+ * - `real-preset` — nothing to do; the file is a real preset.
318
+ * - `missing-bundle-preset` — file does not exist; the engine's existing
319
+ * error path runs (loud failure).
320
+ * - `regenerated` — preset bytes were generated and written.
321
+ * - `placeholder-no-regen` — placeholder detected but regen could not
322
+ * run (FFI missing, reference clip missing,
323
+ * etc.). Returned for the caller to log; the
324
+ * engine then falls through to the bundled
325
+ * Kokoro default voice.
326
+ */
327
+ export async function ensureSamanthaPresetReady(
328
+ bundleRoot: string,
329
+ ): Promise<EnsureSamanthaPresetOutcome> {
330
+ const presetPath = path.join(bundleRoot, "cache", "voice-preset-default.bin");
331
+ const state = detectSamanthaPlaceholder(presetPath);
332
+
333
+ if (state.kind === "missing") {
334
+ return { kind: "missing-bundle-preset" };
335
+ }
336
+ if (state.kind === "real-preset") {
337
+ return { kind: "real-preset" };
338
+ }
339
+ if (state.kind === "unreadable") {
340
+ return {
341
+ kind: "placeholder-no-regen",
342
+ reason: "missing-ffi-library", // closest match — file is unreadable
343
+ detail: state.reason,
344
+ };
345
+ }
346
+
347
+ // Placeholder detected. Try to regenerate.
348
+ const refWav = path.join(bundleRoot, "tts", "omnivoice", "samantha-ref.wav");
349
+ if (!existsSync(refWav)) {
350
+ return {
351
+ kind: "placeholder-no-regen",
352
+ reason: "missing-reference-wav",
353
+ detail: refWav,
354
+ };
355
+ }
356
+ const libPath = locateBundleLibrary(bundleRoot);
357
+ if (!existsSync(libPath)) {
358
+ return {
359
+ kind: "placeholder-no-regen",
360
+ reason: "missing-ffi-library",
361
+ detail: libPath,
362
+ };
363
+ }
364
+
365
+ let result: RegenerateResult;
366
+ try {
367
+ result = await regenerateSamanthaPresetFromBundle({
368
+ bundleRoot,
369
+ presetPath,
370
+ referenceWav: refWav,
371
+ referenceText: SAMANTHA_REFERENCE_TRANSCRIPT,
372
+ });
373
+ } catch (err) {
374
+ const message = err instanceof Error ? err.message : String(err);
375
+ // Distinguish the FFI-symbol-missing path from a real synth failure
376
+ // — both are placeholder-no-regen but the operator-facing reason
377
+ // differs.
378
+ const reason: "ffi-no-encode-reference" | "encode-reference-failed" =
379
+ /encode_reference|encodeReferenceSupported|ABI v4/.test(message)
380
+ ? "ffi-no-encode-reference"
381
+ : "encode-reference-failed";
382
+ return { kind: "placeholder-no-regen", reason, detail: message };
383
+ }
384
+
385
+ mkdirSync(path.dirname(presetPath), { recursive: true });
386
+ writeFileSync(presetPath, result.bytes);
387
+ return {
388
+ kind: "regenerated",
389
+ bytes: result.bytes.byteLength,
390
+ K: result.K,
391
+ refT: result.refT,
392
+ };
393
+ }
@@ -0,0 +1,146 @@
1
+ import { BargeInController } from "./barge-in";
2
+ import type { PhonemeTokenizer } from "./phoneme-tokenizer";
3
+ import { PhraseCache } from "./phrase-cache";
4
+ import { PhraseChunker } from "./phrase-chunker";
5
+ import { PrefixPreservingQueue } from "./prefix-preserving-queue";
6
+ import { PcmRingBuffer } from "./ring-buffer";
7
+ import { RollbackQueue } from "./rollback-queue";
8
+ import type { AudioChunk, AudioSink, OmniVoiceBackend, Phrase, RejectedTokenRange, SchedulerConfig, SpeakerPreset, TextToken, VoiceSchedulerTelemetryListener } from "./types";
9
+ /**
10
+ * T2 — per-phrase TTS chunk-size telemetry, emitted once per
11
+ * `synthesizePhraseStream` call when `SchedulerEvents.onChunkMetrics` is
12
+ * wired. `chunks` is the in-arrival-order distribution of streamed PCM
13
+ * chunks (size in PCM bytes assuming Float32 samples, duration in ms
14
+ * derived from samples / sampleRate). Used to debug T1-class chunk-size
15
+ * pathologies and to verify T3 time-budget effects.
16
+ */
17
+ export interface TtsPhraseChunkMetrics {
18
+ phraseId: number;
19
+ /** Order-preserving list of per-chunk sizes. Empty when no chunks landed. */
20
+ chunks: ReadonlyArray<{
21
+ chunkBytes: number;
22
+ chunkDurationMs: number;
23
+ }>;
24
+ /** Sum of chunk durations in ms. */
25
+ totalDurationMs: number;
26
+ /** Sum of chunk bytes. */
27
+ totalBytes: number;
28
+ /** Whether the phrase synthesis was cancelled mid-stream. */
29
+ cancelled: boolean;
30
+ }
31
+ export type TtsChunkMetricsListener = (metrics: TtsPhraseChunkMetrics) => void;
32
+ export interface SchedulerEvents {
33
+ onPhrase?(phrase: Phrase): void;
34
+ onRollback?(phraseId: number, range: RejectedTokenRange): void;
35
+ onAudio?(chunk: AudioChunk): void;
36
+ /**
37
+ * Barge-in hard-stop: ring buffer drained, chunker reset, in-flight TTS
38
+ * cancelled. The engine layer's `voiceStreamingArgs` separately threads
39
+ * the `BargeInCancelToken.signal` (`bargeIn.onSignal` → `hard-stop`)
40
+ * into `dispatcher.generate` so the LLM/drafter abort too.
41
+ */
42
+ onCancel?(): void;
43
+ /** Provisional barge-in: a VAD voice hit while the agent is speaking paused TTS playback. */
44
+ onTtsPause?(): void;
45
+ /** Blip resolved the provisional barge-in — TTS playback resumed. */
46
+ onTtsResume?(): void;
47
+ /** Structured scheduler telemetry for latency, cache, rollback, and barge-in metrics. */
48
+ onTelemetry?: VoiceSchedulerTelemetryListener;
49
+ /**
50
+ * T2 — per-phrase TTS chunk-size distribution. Optional; when set, the
51
+ * scheduler emits one summary per streamed phrase synthesis (success or
52
+ * cancelled). Lets test harnesses and metrics consumers verify T1/T3
53
+ * effects without scraping the audio bus.
54
+ */
55
+ onChunkMetrics?: TtsChunkMetricsListener;
56
+ }
57
+ export interface SchedulerDeps {
58
+ backend: OmniVoiceBackend;
59
+ sink?: AudioSink;
60
+ phraseCache?: PhraseCache;
61
+ /** Optional. Required only when `config.chunkerConfig.chunkOn ===
62
+ * 'phoneme-stream'`. Defaults are available from
63
+ * `createDefaultPhonemeTokenizer()`. */
64
+ phonemeTokenizer?: PhonemeTokenizer;
65
+ }
66
+ export declare class VoiceScheduler {
67
+ readonly chunker: PhraseChunker;
68
+ readonly rollback: RollbackQueue;
69
+ readonly bargeIn: BargeInController;
70
+ readonly ringBuffer: PcmRingBuffer;
71
+ readonly sink: AudioSink;
72
+ readonly preset: SpeakerPreset;
73
+ /**
74
+ * Prefix-preserving barge-in queue. When the streaming TTS path is active,
75
+ * each audio chunk is enqueued here tagged with its token range. On
76
+ * hard-stop (barge-in), `rollbackAt(divergencePoint)` partitions the
77
+ * queue: chunks at or before the divergence point are replayed into the
78
+ * sink; chunks after are dropped. This lets audio that was already
79
+ * correct play through without re-synthesizing.
80
+ */
81
+ readonly prefixQueue: PrefixPreservingQueue;
82
+ private readonly backend;
83
+ private readonly phraseCache;
84
+ private readonly events;
85
+ private readonly sampleRate;
86
+ private readonly inFlight;
87
+ private readonly maxInFlight;
88
+ private readonly streamingTtsActive;
89
+ private kernelTicks;
90
+ private nextStandalonePhraseId;
91
+ /** True while a provisional barge-in (`pause-tts`) has paused playback. */
92
+ private paused;
93
+ /**
94
+ * The last committed token index — updated whenever a phrase is dispatched
95
+ * to TTS. Used as the divergence point when a barge-in fires mid-response.
96
+ */
97
+ private lastCommittedTokenIndex;
98
+ private agentSpeakingUntilMs;
99
+ private agentSpeakingTimer;
100
+ private phraseFlushTimer;
101
+ constructor(config: SchedulerConfig, deps: SchedulerDeps, events?: SchedulerEvents);
102
+ accept(token: TextToken, acceptedAt?: number): Promise<void>;
103
+ reject(range: RejectedTokenRange): Promise<void>;
104
+ flushPending(): Promise<void>;
105
+ waitIdle(): Promise<void>;
106
+ synthesizeText(text: string, signal?: AbortSignal): Promise<AudioChunk>;
107
+ prewarmPhrases(texts: ReadonlyArray<string>, opts?: {
108
+ concurrency?: number;
109
+ }): Promise<{
110
+ warmed: number;
111
+ cached: number;
112
+ }>;
113
+ tickKernel(): void;
114
+ kernelTickCount(): number;
115
+ /**
116
+ * Mark the agent as audibly speaking for the duration of audio handed to the
117
+ * sink. This is the barge-in gate: VAD blips only pause/resume TTS while this
118
+ * flag is true, and ASR-confirmed words hard-stop playback plus generation.
119
+ */
120
+ markAgentSpeakingForAudio(samples: number, sampleRate: number): void;
121
+ /** True while a provisional barge-in has paused TTS playback. */
122
+ get ttsPaused(): boolean;
123
+ /**
124
+ * Drop not-yet-spoken TTS without signalling a barge-in: drain the ring
125
+ * buffer, reset the chunker, cancel in-flight synthesis. Used by the turn
126
+ * controller when a speculative response is invalidated (speech resumed) —
127
+ * the speculative TTS was streamed off a stale partial transcript, so it
128
+ * must go, but this is not a user barge-in (`onCancel` is NOT fired).
129
+ */
130
+ cancelPendingTts(): void;
131
+ private dispatchPhrase;
132
+ private runPhraseSynthesis;
133
+ private synthesizePhraseStream;
134
+ private isPhraseTracked;
135
+ private cancelNativeTts;
136
+ private commitAudio;
137
+ private onBargeInSignal;
138
+ private handleBargeIn;
139
+ private emitTtsCancel;
140
+ private emitTelemetry;
141
+ private armPhraseFlushTimer;
142
+ private clearPhraseFlushTimer;
143
+ private armAgentSpeakingTimer;
144
+ private clearAgentSpeaking;
145
+ }
146
+ //# sourceMappingURL=scheduler.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scheduler.d.ts","sourceRoot":"","sources":["scheduler.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAC/C,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EACN,qBAAqB,EAErB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAqB,aAAa,EAAE,MAAM,eAAe,CAAC;AACjE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,KAAK,EAEX,UAAU,EACV,SAAS,EAET,gBAAgB,EAChB,MAAM,EACN,kBAAkB,EAClB,eAAe,EACf,aAAa,EAEb,SAAS,EAKT,+BAA+B,EAE/B,MAAM,SAAS,CAAC;AAEjB;;;;;;;GAOG;AACH,MAAM,WAAW,qBAAqB;IACrC,QAAQ,EAAE,MAAM,CAAC;IACjB,6EAA6E;IAC7E,MAAM,EAAE,aAAa,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,eAAe,EAAE,MAAM,CAAC;KACxB,CAAC,CAAC;IACH,oCAAoC;IACpC,eAAe,EAAE,MAAM,CAAC;IACxB,0BAA0B;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,6DAA6D;IAC7D,SAAS,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,MAAM,uBAAuB,GAAG,CAAC,OAAO,EAAE,qBAAqB,KAAK,IAAI,CAAC;AAE/E,MAAM,WAAW,eAAe;IAC/B,QAAQ,CAAC,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,UAAU,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,kBAAkB,GAAG,IAAI,CAAC;IAC/D,OAAO,CAAC,CAAC,KAAK,EAAE,UAAU,GAAG,IAAI,CAAC;IAClC;;;;;OAKG;IACH,QAAQ,CAAC,IAAI,IAAI,CAAC;IAClB,6FAA6F;IAC7F,UAAU,CAAC,IAAI,IAAI,CAAC;IACpB,qEAAqE;IACrE,WAAW,CAAC,IAAI,IAAI,CAAC;IACrB,yFAAyF;IACzF,WAAW,CAAC,EAAE,+BAA+B,CAAC;IAC9C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,uBAAuB,CAAC;CACzC;AAED,MAAM,WAAW,aAAa;IAC7B,OAAO,EAAE,gBAAgB,CAAC;IAC1B,IAAI,CAAC,EAAE,SAAS,CAAC;IACjB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B;;6CAEyC;IACzC,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;CACpC;AAiED,qBAAa,cAAc;IAC1B,QAAQ,CAAC,OAAO,EAAE,aAAa,CAAC;IAChC,QAAQ,CAAC,QAAQ,gBAAuB;IACxC,QAAQ,CAAC,OAAO,oBAA2B;IAC3C,QAAQ,CAAC,UAAU,EAAE,aAAa,CAAC;IACnC,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IACzB,QAAQ,CAAC,MAAM,EAAE,aAAa,CAAC;IAC/B;;;;;;;OAOG;IACH,QAAQ,CAAC,WAAW,wBAA+B;IACnD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAmB;IAC3C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;IAC1C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAkB;IACzC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA+B;IACxD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAU;IAC7C,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,sBAAsB,CAAM;IACpC,2EAA2E;IAC3E,OAAO,CAAC,MAAM,CAAS;IACvB;;;OAGG;IACH,OAAO,CAAC,uBAAuB,CAAK;IACpC,OAAO,CAAC,oBAAoB,CAAK;IACjC,OAAO,CAAC,kBAAkB,CAA8C;IACxE,OAAO,CAAC,gBAAgB,CAA8C;gBAGrE,MAAM,EAAE,eAAe,EACvB,IAAI,EAAE,aAAa,EACnB,MAAM,GAAE,eAAoB;IAoCvB,MAAM,CAAC,KAAK,EAAE,SAAS,EAAE,UAAU,SAAa,GAAG,OAAO,CAAC,IAAI,CAAC;IAWhE,MAAM,CAAC,KAAK,EAAE,kBAAkB,GAAG,OAAO,CAAC,IAAI,CAAC;IA8BhD,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAQ7B,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAKzB,cAAc,CACnB,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,UAAU,CAAC;IA6FhB,cAAc,CACnB,KAAK,EAAE,aAAa,CAAC,MAAM,CAAC,EAC5B,IAAI,GAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAA;KAAO,GACjC,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IA8C9C,UAAU,IAAI,IAAI;IAIlB,eAAe,IAAI,MAAM;IAIzB;;;;OAIG;IACH,yBAAyB,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI;IAYpE,iEAAiE;IACjE,IAAI,SAAS,IAAI,OAAO,CAEvB;IAED;;;;;;OAMG;IACH,gBAAgB,IAAI,IAAI;YAeV,cAAc;YA4Dd,kBAAkB;YA8ClB,sBAAsB;IAkHpC,OAAO,CAAC,eAAe;IAMvB,OAAO,CAAC,eAAe;IAMvB,OAAO,CAAC,WAAW;IA4CnB,OAAO,CAAC,eAAe;IAgCvB,OAAO,CAAC,aAAa;IAkErB,OAAO,CAAC,aAAa;IASrB,OAAO,CAAC,aAAa;IAIrB,OAAO,CAAC,mBAAmB;IAsB3B,OAAO,CAAC,qBAAqB;IAO7B,OAAO,CAAC,qBAAqB;IAqB7B,OAAO,CAAC,kBAAkB;CAQ1B"}
@@ -0,0 +1,141 @@
1
+ /**
2
+ * T2 — TTS chunk-size telemetry tests for `VoiceScheduler`.
3
+ *
4
+ * The scheduler streams TTS PCM in chunks; T2 records the per-phrase
5
+ * distribution of chunk sizes so we can debug T1-class pathologies (one
6
+ * giant chunk = no streaming) and confirm T3-class effects (more, smaller
7
+ * phrases = more, smaller chunks). The streaming backend is a fake so
8
+ * tests stay hermetic.
9
+ */
10
+
11
+ import { describe, expect, it } from "vitest";
12
+ import { InMemoryAudioSink } from "./ring-buffer";
13
+ import { type TtsPhraseChunkMetrics, VoiceScheduler } from "./scheduler";
14
+ import type {
15
+ AudioChunk,
16
+ OmniVoiceBackend,
17
+ Phrase,
18
+ SpeakerPreset,
19
+ StreamingTtsBackend,
20
+ TextToken,
21
+ TtsPcmChunk,
22
+ } from "./types";
23
+
24
+ function tok(index: number, text: string): TextToken {
25
+ return { index, text };
26
+ }
27
+
28
+ function makePreset(): SpeakerPreset {
29
+ const embedding = new Float32Array([0.1, 0.2]);
30
+ return {
31
+ voiceId: "default",
32
+ embedding,
33
+ bytes: new Uint8Array(embedding.buffer.slice(0)),
34
+ };
35
+ }
36
+
37
+ class ScriptedStreamingBackend
38
+ implements OmniVoiceBackend, StreamingTtsBackend
39
+ {
40
+ constructor(private readonly chunks: ReadonlyArray<Float32Array>) {}
41
+ async synthesize(): Promise<AudioChunk> {
42
+ throw new Error("not used");
43
+ }
44
+ async synthesizeStream(args: {
45
+ phrase: Phrase;
46
+ preset: SpeakerPreset;
47
+ cancelSignal: { cancelled: boolean };
48
+ onChunk: (chunk: TtsPcmChunk) => boolean | undefined;
49
+ onKernelTick?: () => void;
50
+ }): Promise<{ cancelled: boolean }> {
51
+ for (const pcm of this.chunks) {
52
+ args.onKernelTick?.();
53
+ if (args.cancelSignal.cancelled) break;
54
+ args.onChunk({ pcm, sampleRate: 24000, isFinal: false });
55
+ }
56
+ args.onChunk({
57
+ pcm: new Float32Array(0),
58
+ sampleRate: 24000,
59
+ isFinal: true,
60
+ });
61
+ return { cancelled: args.cancelSignal.cancelled };
62
+ }
63
+ }
64
+
65
+ describe("VoiceScheduler T2 chunk-size telemetry", () => {
66
+ it("emits one onChunkMetrics summary per phrase with per-chunk byte and duration", async () => {
67
+ // 240 samples @ 24 kHz = 10 ms per chunk. Two chunks = 20 ms.
68
+ const backend = new ScriptedStreamingBackend([
69
+ new Float32Array(240),
70
+ new Float32Array(480),
71
+ ]);
72
+ const sink = new InMemoryAudioSink();
73
+ const metricsLog: TtsPhraseChunkMetrics[] = [];
74
+ const sched = new VoiceScheduler(
75
+ {
76
+ chunkerConfig: { maxTokensPerPhrase: 10 },
77
+ preset: makePreset(),
78
+ ringBufferCapacity: 4096,
79
+ sampleRate: 24000,
80
+ },
81
+ { backend, sink },
82
+ { onChunkMetrics: (m) => metricsLog.push(m) },
83
+ );
84
+
85
+ await sched.accept(tok(0, "Hello"));
86
+ await sched.accept(tok(1, "."));
87
+ await sched.waitIdle();
88
+
89
+ expect(metricsLog).toHaveLength(1);
90
+ const m = metricsLog[0];
91
+ expect(m.chunks).toHaveLength(2);
92
+ // Float32 => 4 bytes / sample.
93
+ expect(m.chunks[0]).toEqual({ chunkBytes: 240 * 4, chunkDurationMs: 10 });
94
+ expect(m.chunks[1]).toEqual({ chunkBytes: 480 * 4, chunkDurationMs: 20 });
95
+ expect(m.totalBytes).toBe((240 + 480) * 4);
96
+ expect(m.totalDurationMs).toBe(30);
97
+ expect(m.cancelled).toBe(false);
98
+ });
99
+
100
+ it("reports cancelled=false summary when synthesis completes", async () => {
101
+ const backend = new ScriptedStreamingBackend([new Float32Array(120)]);
102
+ const sink = new InMemoryAudioSink();
103
+ const metricsLog: TtsPhraseChunkMetrics[] = [];
104
+ const sched = new VoiceScheduler(
105
+ {
106
+ chunkerConfig: { maxTokensPerPhrase: 10 },
107
+ preset: makePreset(),
108
+ ringBufferCapacity: 4096,
109
+ sampleRate: 24000,
110
+ },
111
+ { backend, sink },
112
+ { onChunkMetrics: (m) => metricsLog.push(m) },
113
+ );
114
+ await sched.accept(tok(0, "Hi"));
115
+ await sched.accept(tok(1, "."));
116
+ await sched.waitIdle();
117
+ expect(metricsLog).toHaveLength(1);
118
+ expect(metricsLog[0].cancelled).toBe(false);
119
+ expect(metricsLog[0].chunks).toHaveLength(1);
120
+ });
121
+
122
+ it("does not invoke onChunkMetrics when the listener is absent", async () => {
123
+ const backend = new ScriptedStreamingBackend([new Float32Array(120)]);
124
+ const sink = new InMemoryAudioSink();
125
+ // No listener — should not throw or do extra work; the scheduler still
126
+ // commits audio normally.
127
+ const sched = new VoiceScheduler(
128
+ {
129
+ chunkerConfig: { maxTokensPerPhrase: 10 },
130
+ preset: makePreset(),
131
+ ringBufferCapacity: 4096,
132
+ sampleRate: 24000,
133
+ },
134
+ { backend, sink },
135
+ );
136
+ await sched.accept(tok(0, "Hi"));
137
+ await sched.accept(tok(1, "."));
138
+ await sched.waitIdle();
139
+ expect(sink.totalWritten()).toBeGreaterThan(0);
140
+ });
141
+ });