@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,233 @@
1
+ /**
2
+ * In-process Kokoro-82M runtime over the fused `libelizainference` FFI
3
+ * (the `eliza_inference_kokoro_*` exports — introduced at ABI v10; the fused
4
+ * library is currently ABI v11, which adds EOT on top, so these symbols are
5
+ * present in every current build — see `ELIZA_INFERENCE_ABI_VERSION` in
6
+ * ffi-bindings.ts).
7
+ *
8
+ * This is the canonical Kokoro execution path on every platform. It replaces
9
+ * the local-TCP `KokoroGgufRuntime` (POST `/v1/audio/speech` on a running
10
+ * llama-server) for the mobile case — iOS and Google Play forbid the app
11
+ * opening a local TCP socket, so the HTTP→llama-server route cannot ship there.
12
+ * Kokoro synthesizes through the same dlopen()-ed handle as OmniVoice: the
13
+ * fused build links Eliza-1's Kokoro engine (its own GGUF reader + iSTFT
14
+ * decoder) behind `eliza_inference_kokoro_supported/load/synthesize/sample_rate`.
15
+ *
16
+ * Ownership: this runtime owns its own FFI handle + context. The context is
17
+ * created with `create(bundleRoot)` anchored at the bundle root (or the Kokoro
18
+ * model root when there is no Eliza-1 bundle), mirroring how the desktop fused
19
+ * text runtime obtains its ctx. The GGUF + the active voice `.bin` are loaded
20
+ * once via `kokoroLoad` and reloaded only when the requested voice changes.
21
+ *
22
+ * No silent fallback (AGENTS.md §3): when the loaded library does not export
23
+ * the Kokoro symbols (`kokoroSupported() === false`) or the model/voice files
24
+ * are missing, construction / first synthesis throws a structured
25
+ * `VoiceLifecycleError` rather than dropping back to the TCP route.
26
+ */
27
+
28
+ import { existsSync } from "node:fs";
29
+ import path from "node:path";
30
+ import { logger } from "@elizaos/core";
31
+ import { resolveFusedLibraryPath } from "../../desktop-fused-ffi-backend-runtime";
32
+ import {
33
+ type ElizaInferenceContextHandle,
34
+ type ElizaInferenceFfi,
35
+ loadElizaInferenceFfi,
36
+ } from "../ffi-bindings";
37
+ import { VoiceLifecycleError } from "../lifecycle";
38
+ import type { KokoroRuntime, KokoroRuntimeInputs } from "./kokoro-runtime";
39
+ import type { KokoroModelLayout } from "./types";
40
+ import { resolveKokoroVoiceOrDefault } from "./voices";
41
+
42
+ /** Kokoro v1.0 style-vector inner dimension. */
43
+ const KOKORO_STYLE_DIM = 256;
44
+
45
+ /**
46
+ * Per-synthesis output ceiling. Kokoro v1.0 emits 24 kHz fp32 PCM; 30 s of
47
+ * headroom (720 000 samples) bounds a single phrase synthesis well past the
48
+ * longest chunk the phrase chunker will hand us. The library returns the real
49
+ * sample count, which we slice to — this is only the allocation cap.
50
+ */
51
+ const MAX_OUTPUT_SAMPLES = 30 * 24_000;
52
+
53
+ export interface KokoroFfiRuntimeOptions {
54
+ /** Resolved on-disk Kokoro layout (GGUF filename + voices dir + root). */
55
+ layout: KokoroModelLayout;
56
+ /**
57
+ * Directory the FFI context anchors at (`create(bundleRoot)`). Defaults to
58
+ * the Kokoro model root, which is sufficient for the standalone Kokoro
59
+ * engine — it loads the GGUF + voice `.bin` by explicit absolute path, not
60
+ * by bundle convention.
61
+ */
62
+ bundleRoot?: string;
63
+ /**
64
+ * Inject a pre-loaded FFI handle (the desktop fused engine already owns one).
65
+ * When omitted the runtime loads its own via `resolveFusedLibraryPath`.
66
+ */
67
+ ffi?: ElizaInferenceFfi;
68
+ /**
69
+ * Inject a context to reuse. When omitted the runtime creates its own with
70
+ * `ffi.create(bundleRoot)` and destroys it on `dispose`.
71
+ */
72
+ ctx?: ElizaInferenceContextHandle;
73
+ }
74
+
75
+ export class KokoroFfiRuntime implements KokoroRuntime {
76
+ readonly id = "gguf" as const;
77
+ readonly sampleRate: number;
78
+
79
+ private readonly layout: KokoroModelLayout;
80
+ private readonly ffi: ElizaInferenceFfi;
81
+ private readonly ownsFfi: boolean;
82
+ private readonly ctx: ElizaInferenceContextHandle;
83
+ private readonly ownsCtx: boolean;
84
+ /** Voice id currently resident on the ctx (null until first load). */
85
+ private loadedVoiceId: string | null = null;
86
+ private disposed = false;
87
+
88
+ constructor(opts: KokoroFfiRuntimeOptions) {
89
+ this.layout = opts.layout;
90
+ const bundleRoot = opts.bundleRoot ?? opts.layout.root;
91
+
92
+ const provided = opts.ffi;
93
+ if (provided) {
94
+ this.ffi = provided;
95
+ this.ownsFfi = false;
96
+ } else {
97
+ const libPath = resolveFusedLibraryPath(bundleRoot);
98
+ if (!libPath) {
99
+ throw new VoiceLifecycleError(
100
+ "kernel-missing",
101
+ `[KokoroFfiRuntime] fused libelizainference not found for the in-process Eliza-1 Kokoro engine (anchored at ${bundleRoot}). ` +
102
+ "Set ELIZA_INFERENCE_LIBRARY or build via packages/app-core/scripts/build-llama-cpp-mtp.mjs.",
103
+ );
104
+ }
105
+ this.ffi = loadElizaInferenceFfi(libPath);
106
+ this.ownsFfi = true;
107
+ }
108
+
109
+ if (
110
+ typeof this.ffi.kokoroSupported !== "function" ||
111
+ !this.ffi.kokoroSupported()
112
+ ) {
113
+ if (this.ownsFfi) this.ffi.close();
114
+ throw new VoiceLifecycleError(
115
+ "kernel-missing",
116
+ `[KokoroFfiRuntime] the loaded libelizainference (ABI v${this.ffi.libraryAbiVersion}) does not link the in-process Eliza-1 Kokoro engine. ` +
117
+ "Rebuild with the Kokoro engine enabled — the mobile path must not fall back to the local-TCP /v1/audio/speech route.",
118
+ );
119
+ }
120
+
121
+ if (opts.ctx !== undefined) {
122
+ this.ctx = opts.ctx;
123
+ this.ownsCtx = false;
124
+ } else {
125
+ this.ctx = this.ffi.create(bundleRoot);
126
+ this.ownsCtx = true;
127
+ }
128
+
129
+ this.sampleRate = this.layout.sampleRate;
130
+ }
131
+
132
+ async synthesize(args: KokoroRuntimeInputs): Promise<{ cancelled: boolean }> {
133
+ if (this.disposed) {
134
+ throw new VoiceLifecycleError(
135
+ "kernel-missing",
136
+ "[KokoroFfiRuntime] synthesize called after dispose",
137
+ );
138
+ }
139
+ this.ensureVoiceLoaded(args.voice.id);
140
+
141
+ if (args.cancelSignal.cancelled) {
142
+ args.onChunk({
143
+ pcm: new Float32Array(0),
144
+ sampleRate: this.sampleRate,
145
+ isFinal: true,
146
+ });
147
+ return { cancelled: true };
148
+ }
149
+
150
+ const maxSamples = args.maxSamples ?? MAX_OUTPUT_SAMPLES;
151
+ // The Kokoro engine produces the full waveform in one synchronous
152
+ // forward. The text it phonemizes internally is the same phoneme string
153
+ // the llama-server `/v1/audio/speech` path sends as `input`.
154
+ const pcm = this.kokoroSynthesize(args.phonemes.phonemes, maxSamples);
155
+
156
+ let cancelled = false;
157
+ if (args.cancelSignal.cancelled) {
158
+ cancelled = true;
159
+ } else if (pcm.length > 0) {
160
+ const want = args.onChunk({
161
+ pcm,
162
+ sampleRate: this.sampleRate,
163
+ isFinal: false,
164
+ });
165
+ if (want === true || args.cancelSignal.cancelled) cancelled = true;
166
+ }
167
+
168
+ args.onChunk({
169
+ pcm: new Float32Array(0),
170
+ sampleRate: this.sampleRate,
171
+ isFinal: true,
172
+ });
173
+ return { cancelled };
174
+ }
175
+
176
+ dispose(): void {
177
+ if (this.disposed) return;
178
+ this.disposed = true;
179
+ if (this.ownsCtx) this.ffi.destroy(this.ctx);
180
+ if (this.ownsFfi) this.ffi.close();
181
+ }
182
+
183
+ /**
184
+ * Load the GGUF + the requested voice `.bin` into the ctx, reloading only
185
+ * when the voice changes (Kokoro keeps the model resident; swapping voices
186
+ * is a cheap re-load of the 256-float style tensor).
187
+ */
188
+ private ensureVoiceLoaded(requestedVoiceId: string): void {
189
+ const voice = resolveKokoroVoiceOrDefault(requestedVoiceId);
190
+ if (this.loadedVoiceId === voice.id) return;
191
+
192
+ const ggufPath = path.join(this.layout.root, this.layout.modelFile);
193
+ const voiceBinPath = path.join(this.layout.voicesDir, voice.file);
194
+ if (!existsSync(ggufPath)) {
195
+ throw new VoiceLifecycleError(
196
+ "kernel-missing",
197
+ `[KokoroFfiRuntime] Eliza-1 Kokoro model file not found at ${ggufPath}`,
198
+ );
199
+ }
200
+ if (!existsSync(voiceBinPath)) {
201
+ throw new VoiceLifecycleError(
202
+ "kernel-missing",
203
+ `[KokoroFfiRuntime] Eliza-1 voice preset not found at ${voiceBinPath} for voice ${voice.id}`,
204
+ );
205
+ }
206
+ if (typeof this.ffi.kokoroLoad !== "function") {
207
+ throw new VoiceLifecycleError(
208
+ "kernel-missing",
209
+ "[KokoroFfiRuntime] eliza_inference_kokoro_load is not exported by the loaded build",
210
+ );
211
+ }
212
+ this.ffi.kokoroLoad({
213
+ ctx: this.ctx,
214
+ ggufPath,
215
+ voiceBinPath,
216
+ styleDim: voice.dim ?? KOKORO_STYLE_DIM,
217
+ });
218
+ this.loadedVoiceId = voice.id;
219
+ logger.info(
220
+ `[KokoroFfiRuntime] loaded Eliza-1 voice ${voice.id} from ${voiceBinPath}`,
221
+ );
222
+ }
223
+
224
+ private kokoroSynthesize(text: string, maxSamples: number): Float32Array {
225
+ if (typeof this.ffi.kokoroSynthesize !== "function") {
226
+ throw new VoiceLifecycleError(
227
+ "kernel-missing",
228
+ "[KokoroFfiRuntime] eliza_inference_kokoro_synthesize is not exported by the loaded build",
229
+ );
230
+ }
231
+ return this.ffi.kokoroSynthesize({ ctx: this.ctx, text, maxSamples });
232
+ }
233
+ }
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Kokoro-82M model runner.
3
+ *
4
+ * Execution paths:
5
+ *
6
+ * 1. GGUF via llama-server (default). When the host llama-server advertises
7
+ * a Kokoro-capable build and exposes `/v1/audio/speech`, we POST text in
8
+ * and stream PCM out.
9
+ *
10
+ * 2. Python subprocess — eval-loop only. Spawns `python -m kokoro_tts`.
11
+ * Never the default in production.
12
+ */
13
+ import type { KokoroPhonemeSequence, KokoroVoicePack } from "./types";
14
+ /** Pinned GGUF candidate location (carried by our llama.cpp fork). The
15
+ * runtime references this only for diagnostics; the fork-side builder
16
+ * produces the file at this path. */
17
+ export declare const KOKORO_GGUF_REL_PATH = "voice/kokoro-82m-v1_0.gguf";
18
+ /** One synthesized PCM segment delivered to the streaming backend. */
19
+ export interface KokoroRuntimeChunk {
20
+ pcm: Float32Array;
21
+ sampleRate: number;
22
+ isFinal: boolean;
23
+ }
24
+ /**
25
+ * Construction-time inputs for a runtime instance. The voice pack contains
26
+ * the style tensor reference; the runtime is responsible for resolving the
27
+ * bytes off `layout.voicesDir/<file>`.
28
+ */
29
+ export interface KokoroRuntimeInputs {
30
+ phonemes: KokoroPhonemeSequence;
31
+ voice: KokoroVoicePack;
32
+ /**
33
+ * Output sample budget. The runtime always honours the model's native
34
+ * sample rate (`layout.sampleRate`, usually 24 kHz) — this caps the
35
+ * total samples to prevent runaway generation. Defaults to 16 seconds
36
+ * at the layout sample rate (matches the longest phrase the chunker
37
+ * will emit + headroom).
38
+ */
39
+ maxSamples?: number;
40
+ /** Cancellation signal — polled at chunk boundaries. */
41
+ cancelSignal: {
42
+ cancelled: boolean;
43
+ };
44
+ /** Per-chunk callback; returning `true` cancels the rest of the run. */
45
+ onChunk: (chunk: KokoroRuntimeChunk) => boolean | undefined;
46
+ }
47
+ /** Shared runtime contract — `KokoroTtsBackend` depends on this, not the
48
+ * concrete classes. Tests inject a mock. */
49
+ export interface KokoroRuntime {
50
+ readonly id: "gguf" | "python" | "mock";
51
+ readonly sampleRate: number;
52
+ synthesize(args: KokoroRuntimeInputs): Promise<{
53
+ cancelled: boolean;
54
+ }>;
55
+ dispose(): void;
56
+ }
57
+ export interface KokoroPythonRuntimeOptions {
58
+ pythonBinary: string;
59
+ /** Resolved layout — the subprocess discovers the model under here. */
60
+ layout: {
61
+ root: string;
62
+ sampleRate: number;
63
+ };
64
+ /** Optional env passed through to the subprocess. */
65
+ env?: NodeJS.ProcessEnv;
66
+ }
67
+ /**
68
+ * Subprocess-backed runtime. Spawns `python -m kokoro_tts ...` per
69
+ * synthesis call (no warm-pool — the Python path is the *eval* path, not
70
+ * the realtime path). Production code paths never select this; the
71
+ * fine-tune evaluator wires it explicitly.
72
+ */
73
+ export declare class KokoroPythonRuntime implements KokoroRuntime {
74
+ readonly id: "python";
75
+ readonly sampleRate: number;
76
+ constructor(opts: KokoroPythonRuntimeOptions);
77
+ synthesize(_args: KokoroRuntimeInputs): Promise<{
78
+ cancelled: boolean;
79
+ }>;
80
+ dispose(): void;
81
+ }
82
+ export interface KokoroMockRuntimeOptions {
83
+ sampleRate: number;
84
+ /** Total samples emitted per synthesis call. */
85
+ totalSamples?: number;
86
+ /** Number of body chunks to split the output across. */
87
+ chunkCount?: number;
88
+ }
89
+ export declare class KokoroMockRuntime implements KokoroRuntime {
90
+ readonly id: "mock";
91
+ readonly sampleRate: number;
92
+ private readonly opts;
93
+ calls: number;
94
+ constructor(opts: KokoroMockRuntimeOptions);
95
+ synthesize(args: KokoroRuntimeInputs): Promise<{
96
+ cancelled: boolean;
97
+ }>;
98
+ dispose(): void;
99
+ }
100
+ //# sourceMappingURL=kokoro-runtime.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"kokoro-runtime.d.ts","sourceRoot":"","sources":["kokoro-runtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EAAE,qBAAqB,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAEtE;;sCAEsC;AACtC,eAAO,MAAM,oBAAoB,+BAA+B,CAAC;AAEjE,sEAAsE;AACtE,MAAM,WAAW,kBAAkB;IAClC,GAAG,EAAE,YAAY,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;CACjB;AAED;;;;GAIG;AACH,MAAM,WAAW,mBAAmB;IACnC,QAAQ,EAAE,qBAAqB,CAAC;IAChC,KAAK,EAAE,eAAe,CAAC;IACvB;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,wDAAwD;IACxD,YAAY,EAAE;QAAE,SAAS,EAAE,OAAO,CAAA;KAAE,CAAC;IACrC,wEAAwE;IACxE,OAAO,EAAE,CAAC,KAAK,EAAE,kBAAkB,KAAK,OAAO,GAAG,SAAS,CAAC;CAC5D;AAED;6CAC6C;AAC7C,MAAM,WAAW,aAAa;IAC7B,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;IACxC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,UAAU,CAAC,IAAI,EAAE,mBAAmB,GAAG,OAAO,CAAC;QAAE,SAAS,EAAE,OAAO,CAAA;KAAE,CAAC,CAAC;IACvE,OAAO,IAAI,IAAI,CAAC;CAChB;AAMD,MAAM,WAAW,0BAA0B;IAC1C,YAAY,EAAE,MAAM,CAAC;IACrB,uEAAuE;IACvE,MAAM,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,CAAC;IAC7C,qDAAqD;IACrD,GAAG,CAAC,EAAE,MAAM,CAAC,UAAU,CAAC;CACxB;AAED;;;;;GAKG;AACH,qBAAa,mBAAoB,YAAW,aAAa;IACxD,QAAQ,CAAC,EAAE,EAAG,QAAQ,CAAU;IAChC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;gBAEhB,IAAI,EAAE,0BAA0B;IAItC,UAAU,CACf,KAAK,EAAE,mBAAmB,GACxB,OAAO,CAAC;QAAE,SAAS,EAAE,OAAO,CAAA;KAAE,CAAC;IASlC,OAAO,IAAI,IAAI;CAGf;AAOD,MAAM,WAAW,wBAAwB;IACxC,UAAU,EAAE,MAAM,CAAC;IACnB,gDAAgD;IAChD,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,iBAAkB,YAAW,aAAa;IACtD,QAAQ,CAAC,EAAE,EAAG,MAAM,CAAU;IAC9B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAqC;IAC1D,KAAK,SAAK;gBAEE,IAAI,EAAE,wBAAwB;IASpC,UAAU,CAAC,IAAI,EAAE,mBAAmB,GAAG,OAAO,CAAC;QAAE,SAAS,EAAE,OAAO,CAAA;KAAE,CAAC;IAsC5E,OAAO,IAAI,IAAI;CAGf"}
@@ -0,0 +1,170 @@
1
+ /**
2
+ * Kokoro-82M model runner.
3
+ *
4
+ * Execution paths:
5
+ *
6
+ * 1. GGUF via llama-server (default). When the host llama-server advertises
7
+ * a Kokoro-capable build and exposes `/v1/audio/speech`, we POST text in
8
+ * and stream PCM out.
9
+ *
10
+ * 2. Python subprocess — eval-loop only. Spawns `python -m kokoro_tts`.
11
+ * Never the default in production.
12
+ */
13
+
14
+ import type { KokoroPhonemeSequence, KokoroVoicePack } from "./types";
15
+
16
+ /** Pinned GGUF candidate location (carried by our llama.cpp fork). The
17
+ * runtime references this only for diagnostics; the fork-side builder
18
+ * produces the file at this path. */
19
+ export const KOKORO_GGUF_REL_PATH = "voice/kokoro-82m-v1_0.gguf";
20
+
21
+ /** One synthesized PCM segment delivered to the streaming backend. */
22
+ export interface KokoroRuntimeChunk {
23
+ pcm: Float32Array;
24
+ sampleRate: number;
25
+ isFinal: boolean;
26
+ }
27
+
28
+ /**
29
+ * Construction-time inputs for a runtime instance. The voice pack contains
30
+ * the style tensor reference; the runtime is responsible for resolving the
31
+ * bytes off `layout.voicesDir/<file>`.
32
+ */
33
+ export interface KokoroRuntimeInputs {
34
+ phonemes: KokoroPhonemeSequence;
35
+ voice: KokoroVoicePack;
36
+ /**
37
+ * Output sample budget. The runtime always honours the model's native
38
+ * sample rate (`layout.sampleRate`, usually 24 kHz) — this caps the
39
+ * total samples to prevent runaway generation. Defaults to 16 seconds
40
+ * at the layout sample rate (matches the longest phrase the chunker
41
+ * will emit + headroom).
42
+ */
43
+ maxSamples?: number;
44
+ /** Cancellation signal — polled at chunk boundaries. */
45
+ cancelSignal: { cancelled: boolean };
46
+ /** Per-chunk callback; returning `true` cancels the rest of the run. */
47
+ onChunk: (chunk: KokoroRuntimeChunk) => boolean | undefined;
48
+ }
49
+
50
+ /** Shared runtime contract — `KokoroTtsBackend` depends on this, not the
51
+ * concrete classes. Tests inject a mock. */
52
+ export interface KokoroRuntime {
53
+ readonly id: "gguf" | "python" | "mock";
54
+ readonly sampleRate: number;
55
+ synthesize(args: KokoroRuntimeInputs): Promise<{ cancelled: boolean }>;
56
+ dispose(): void;
57
+ }
58
+
59
+ // ---------------------------------------------------------------------------
60
+ // Python subprocess path — eval-loop only.
61
+ // ---------------------------------------------------------------------------
62
+
63
+ export interface KokoroPythonRuntimeOptions {
64
+ pythonBinary: string;
65
+ /** Resolved layout — the subprocess discovers the model under here. */
66
+ layout: { root: string; sampleRate: number };
67
+ /** Optional env passed through to the subprocess. */
68
+ env?: NodeJS.ProcessEnv;
69
+ }
70
+
71
+ /**
72
+ * Subprocess-backed runtime. Spawns `python -m kokoro_tts ...` per
73
+ * synthesis call (no warm-pool — the Python path is the *eval* path, not
74
+ * the realtime path). Production code paths never select this; the
75
+ * fine-tune evaluator wires it explicitly.
76
+ */
77
+ export class KokoroPythonRuntime implements KokoroRuntime {
78
+ readonly id = "python" as const;
79
+ readonly sampleRate: number;
80
+
81
+ constructor(opts: KokoroPythonRuntimeOptions) {
82
+ this.sampleRate = opts.layout.sampleRate;
83
+ }
84
+
85
+ async synthesize(
86
+ _args: KokoroRuntimeInputs,
87
+ ): Promise<{ cancelled: boolean }> {
88
+ // The eval driver in `packages/training` is the canonical caller and
89
+ // already wires `child_process.spawn`. Surfacing a clear error here
90
+ // keeps the production runtime from accidentally enabling this path.
91
+ throw new Error(
92
+ "[kokoro] KokoroPythonRuntime is eval-only — use it from the fine-tune driver, not the runtime scheduler",
93
+ );
94
+ }
95
+
96
+ dispose(): void {
97
+ // No long-lived state.
98
+ }
99
+ }
100
+
101
+ // ---------------------------------------------------------------------------
102
+ // Mock runtime — synthesizes a sine sweep keyed to phoneme count so tests
103
+ // can observe deterministic PCM without loading a model.
104
+ // ---------------------------------------------------------------------------
105
+
106
+ export interface KokoroMockRuntimeOptions {
107
+ sampleRate: number;
108
+ /** Total samples emitted per synthesis call. */
109
+ totalSamples?: number;
110
+ /** Number of body chunks to split the output across. */
111
+ chunkCount?: number;
112
+ }
113
+
114
+ export class KokoroMockRuntime implements KokoroRuntime {
115
+ readonly id = "mock" as const;
116
+ readonly sampleRate: number;
117
+ private readonly opts: Required<KokoroMockRuntimeOptions>;
118
+ calls = 0;
119
+
120
+ constructor(opts: KokoroMockRuntimeOptions) {
121
+ this.sampleRate = opts.sampleRate;
122
+ this.opts = {
123
+ sampleRate: opts.sampleRate,
124
+ totalSamples: opts.totalSamples ?? Math.floor(opts.sampleRate * 0.2),
125
+ chunkCount: opts.chunkCount ?? 4,
126
+ };
127
+ }
128
+
129
+ async synthesize(args: KokoroRuntimeInputs): Promise<{ cancelled: boolean }> {
130
+ this.calls++;
131
+ const { totalSamples, chunkCount } = this.opts;
132
+ const perChunk = Math.max(1, Math.ceil(totalSamples / chunkCount));
133
+ const freqHz = 100 + (args.phonemes.ids.length % 200);
134
+ let written = 0;
135
+ let cancelled = false;
136
+ for (let off = 0; off < totalSamples; off += perChunk) {
137
+ if (args.cancelSignal.cancelled) {
138
+ cancelled = true;
139
+ break;
140
+ }
141
+ const n = Math.min(perChunk, totalSamples - off);
142
+ const pcm = new Float32Array(n);
143
+ for (let i = 0; i < n; i++) {
144
+ const t = (off + i) / this.sampleRate;
145
+ pcm[i] = Math.sin(2 * Math.PI * freqHz * t) * 0.1;
146
+ }
147
+ written += n;
148
+ const want = args.onChunk({
149
+ pcm,
150
+ sampleRate: this.sampleRate,
151
+ isFinal: false,
152
+ });
153
+ if (want === true || args.cancelSignal.cancelled) {
154
+ cancelled = true;
155
+ break;
156
+ }
157
+ }
158
+ args.onChunk({
159
+ pcm: new Float32Array(0),
160
+ sampleRate: this.sampleRate,
161
+ isFinal: true,
162
+ });
163
+ void written;
164
+ return { cancelled };
165
+ }
166
+
167
+ dispose(): void {
168
+ /* nothing */
169
+ }
170
+ }
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Phoneme streaming for Kokoro-82M.
3
+ *
4
+ * Kokoro consumes a sequence of phoneme ids (espeak-ng IPA tokenised against a
5
+ * small fixed vocab). The scheduler emits phrases at punctuation or
6
+ * `phoneme-stream` boundaries (see `voice/phrase-chunker.ts` `chunkOn`
7
+ * option). This module is the seam between those phrase boundaries and the
8
+ * model's input tensor:
9
+ *
10
+ * text → phonemizer.phonemize() → KokoroPhonemeSequence (ids) → runtime
11
+ *
12
+ * For maximum responsiveness the runtime can call `streamPhonemes()` against
13
+ * an async text iterator (chunked draft tokens) and forward each window of
14
+ * accumulated ids as soon as a phoneme boundary fires. The default `flushAt`
15
+ * is one phoneme — i.e. emit progress per id — but production deployments
16
+ * lift this to ~8 phonemes to amortise the ONNX forward pass on small
17
+ * windows. This file intentionally has no dependency on the rest of the
18
+ * voice scaffold so it can be reused by the fine-tune evaluator script.
19
+ */
20
+
21
+ import type { KokoroPhonemeSequence, KokoroPhonemizer } from "./types";
22
+
23
+ export interface PhonemeStreamWindow {
24
+ /** Cumulative ids since stream start. The runtime can re-tokenise or
25
+ * carry state by id; the simplest implementation forwards the full
26
+ * window each call. */
27
+ ids: Int32Array;
28
+ /** Cumulative phoneme string for debugging / display. */
29
+ phonemes: string;
30
+ /** True for the final window in the stream. */
31
+ isFinal: boolean;
32
+ }
33
+
34
+ export interface StreamPhonemesOptions {
35
+ phonemizer: KokoroPhonemizer;
36
+ lang: string;
37
+ /** Emit a window every N new phoneme ids. Default 8 (≈ first audio after a
38
+ * short syllable cluster — matches the phrase chunker's default cap). */
39
+ flushAt?: number;
40
+ }
41
+
42
+ /**
43
+ * Phonemize an async text source and emit cumulative windows. The caller
44
+ * consumes the iterator with `for await (const window of streamPhonemes(…))`.
45
+ * A pull-style API keeps this independent of the scheduler's event loop —
46
+ * the bench harness and the eval loop both reuse it without taking on a
47
+ * scheduler dependency.
48
+ */
49
+ export async function* streamPhonemes(
50
+ textChunks: AsyncIterable<string>,
51
+ opts: StreamPhonemesOptions,
52
+ ): AsyncIterable<PhonemeStreamWindow> {
53
+ const flushAt = Math.max(1, opts.flushAt ?? 8);
54
+ const idsAcc: number[] = [];
55
+ let phonemesAcc = "";
56
+ let lastFlushAt = 0;
57
+ let leftover = "";
58
+
59
+ for await (const chunk of textChunks) {
60
+ if (!chunk) continue;
61
+ leftover += chunk;
62
+ // Only phonemize when we have at least a whole word to feed to the
63
+ // phonemizer — espeak-ng is significantly more accurate when fed
64
+ // word-aligned input. Look back to the last whitespace as the split.
65
+ const split = leftover.lastIndexOf(" ");
66
+ if (split === -1) continue;
67
+ const head = leftover.slice(0, split);
68
+ leftover = leftover.slice(split + 1);
69
+ const seq = await opts.phonemizer.phonemize(head, opts.lang);
70
+ appendSeq(seq, idsAcc);
71
+ phonemesAcc += seq.phonemes;
72
+ if (idsAcc.length - lastFlushAt >= flushAt) {
73
+ lastFlushAt = idsAcc.length;
74
+ yield {
75
+ ids: Int32Array.from(idsAcc),
76
+ phonemes: phonemesAcc,
77
+ isFinal: false,
78
+ };
79
+ }
80
+ }
81
+
82
+ if (leftover.length > 0) {
83
+ const seq = await opts.phonemizer.phonemize(leftover, opts.lang);
84
+ appendSeq(seq, idsAcc);
85
+ phonemesAcc += seq.phonemes;
86
+ }
87
+ yield {
88
+ ids: Int32Array.from(idsAcc),
89
+ phonemes: phonemesAcc,
90
+ isFinal: true,
91
+ };
92
+ }
93
+
94
+ function appendSeq(seq: KokoroPhonemeSequence, target: number[]): void {
95
+ // The phonemizer emits a sequence framed with BOS/EOS — strip both when
96
+ // accumulating windows so the model sees one BOS at the head and one EOS
97
+ // at the tail. Defensive against phonemizers that omit framing (the
98
+ // accumulator simply appends raw ids in that case).
99
+ const ids = seq.ids;
100
+ let start = 0;
101
+ let end = ids.length;
102
+ if (ids.length >= 2) {
103
+ // Heuristic: ids ≤ 2 are <pad>/<s>/</s> in the bundled vocab.
104
+ if (ids[0] !== undefined && ids[0] <= 2) start = 1;
105
+ if (ids[end - 1] !== undefined && (ids[end - 1] as number) <= 2) end -= 1;
106
+ }
107
+ for (let i = start; i < end; i++) {
108
+ const id = ids[i];
109
+ if (id !== undefined) target.push(id);
110
+ }
111
+ }
112
+
113
+ /** Synchronous variant for whole-phrase callers (the scheduler dispatches
114
+ * one phrase at a time in the default `punctuation` mode). Returns the
115
+ * full id array — equivalent to draining `streamPhonemes` on a single-item
116
+ * iterator and taking the last window. */
117
+ export async function phonemizePhrase(
118
+ text: string,
119
+ opts: StreamPhonemesOptions,
120
+ ): Promise<PhonemeStreamWindow> {
121
+ const seq = await opts.phonemizer.phonemize(text, opts.lang);
122
+ return { ids: seq.ids, phonemes: seq.phonemes, isFinal: true };
123
+ }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Text → phoneme-id adapter for Kokoro-82M.
3
+ *
4
+ * Kokoro is trained against espeak-ng IPA tokens with a small fixed vocab
5
+ * (~178 entries: IPA symbols + stress/punct markers + special <s>/<pad>).
6
+ * Production deployments should bring real espeak-ng phonemization
7
+ * (`phonemizer` is the pure-JS eSpeak NG package); the bundled fallback here is a
8
+ * deterministic letter-to-pseudo-phoneme adapter that produces audible
9
+ * speech for ASCII English text but loses prosodic accuracy.
10
+ *
11
+ * Resolution order:
12
+ * 1. Caller-provided `KokoroPhonemizer` (preferred — bring your own).
13
+ * 2. Dynamically-imported `phonemizer`/`phonemize` npm package, if installed.
14
+ * 3. Bundled `FallbackG2PPhonemizer` (degrades gracefully, never throws on
15
+ * ASCII input).
16
+ *
17
+ * Non-ASCII text with no real phonemizer raises `KokoroPhonemizerError` —
18
+ * silent garbage out is worse than a surfaced error (AGENTS.md §3).
19
+ */
20
+ import { type KokoroPhonemeSequence, type KokoroPhonemizer } from "./types";
21
+ /**
22
+ * Deterministic ASCII-only G2P used when no real phonemizer is installed.
23
+ * Lossy by design — this exists so dev environments without espeak-ng still
24
+ * produce lexically useful smoke output for common English phrases, not to
25
+ * replace a production Misaki/espeak phonemizer.
26
+ */
27
+ export declare class FallbackG2PPhonemizer implements KokoroPhonemizer {
28
+ readonly id = "fallback-g2p";
29
+ phonemize(text: string, _lang: string): Promise<KokoroPhonemeSequence>;
30
+ }
31
+ /**
32
+ * Wraps the npm `phonemizer` package when present. It returns an IPA string
33
+ * which we tokenise with the same VOCAB above. Real Kokoro inference should
34
+ * use a proper espeak tokenizer — production deployments bring their own;
35
+ * this is the "install npm and it works" middle ground.
36
+ */
37
+ export declare class NpmPhonemizePhonemizer implements KokoroPhonemizer {
38
+ private readonly mod;
39
+ private readonly callStyle;
40
+ readonly id: string;
41
+ private constructor();
42
+ static tryLoad(): Promise<NpmPhonemizePhonemizer | null>;
43
+ phonemize(text: string, lang: string): Promise<KokoroPhonemeSequence>;
44
+ }
45
+ export declare function kokoroLangToPhonemizerLanguage(lang: string): string;
46
+ /** Lazy resolver: caller override → npm `phonemizer` → bundled fallback. */
47
+ export declare function resolvePhonemizer(override?: KokoroPhonemizer): Promise<KokoroPhonemizer>;
48
+ /** Exported for tests and bench-time diagnostics. */
49
+ export declare const KOKORO_PAD_ID: number;
50
+ //# sourceMappingURL=phonemizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"phonemizer.d.ts","sourceRoot":"","sources":["phonemizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EACN,KAAK,qBAAqB,EAC1B,KAAK,gBAAgB,EAErB,MAAM,SAAS,CAAC;AAuLjB;;;;;GAKG;AACH,qBAAa,qBAAsB,YAAW,gBAAgB;IAC7D,QAAQ,CAAC,EAAE,kBAAkB;IAEvB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC;CA2B5E;AAaD;;;;;GAKG;AACH,qBAAa,sBAAuB,YAAW,gBAAgB;IAG7D,OAAO,CAAC,QAAQ,CAAC,GAAG;IAEpB,OAAO,CAAC,QAAQ,CAAC,SAAS;IAJ3B,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,OAAO;WAQM,OAAO,IAAI,OAAO,CAAC,sBAAsB,GAAG,IAAI,CAAC;IAqBxD,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC;CA0B3E;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CASnE;AAED,4EAA4E;AAC5E,wBAAsB,iBAAiB,CACtC,QAAQ,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,gBAAgB,CAAC,CAK3B;AAED,qDAAqD;AACrD,eAAO,MAAM,aAAa,QAAM,CAAC"}