@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (893) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +157 -0
  3. package/dist/actions/generate-media.d.ts +59 -0
  4. package/dist/actions/generate-media.d.ts.map +1 -0
  5. package/dist/actions/identify-speaker.d.ts +23 -0
  6. package/dist/actions/identify-speaker.d.ts.map +1 -0
  7. package/dist/actions/transcription-control.d.ts +29 -0
  8. package/dist/actions/transcription-control.d.ts.map +1 -0
  9. package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
  10. package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
  11. package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
  12. package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
  13. package/dist/adapters/capacitor-llama/index.d.ts +18 -0
  14. package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
  15. package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
  16. package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
  17. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
  18. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
  19. package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
  20. package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
  21. package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
  22. package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
  23. package/dist/adapters/capacitor-llama/types.d.ts +338 -0
  24. package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
  25. package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
  26. package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
  27. package/dist/backends/apple-foundation.d.ts +56 -0
  28. package/dist/backends/apple-foundation.d.ts.map +1 -0
  29. package/dist/index.d.ts +8 -37
  30. package/dist/index.d.ts.map +1 -0
  31. package/dist/index.js +38979 -430
  32. package/dist/index.js.map +217 -0
  33. package/dist/local-inference-routes.d.ts +47 -0
  34. package/dist/local-inference-routes.d.ts.map +1 -0
  35. package/dist/provider.d.ts +21 -0
  36. package/dist/provider.d.ts.map +1 -0
  37. package/dist/routes/compat-helpers.d.ts +18 -0
  38. package/dist/routes/compat-helpers.d.ts.map +1 -0
  39. package/dist/routes/family-member-route.d.ts +62 -0
  40. package/dist/routes/family-member-route.d.ts.map +1 -0
  41. package/dist/routes/index.d.ts +20 -0
  42. package/dist/routes/index.d.ts.map +1 -0
  43. package/dist/routes/index.js +42040 -0
  44. package/dist/routes/index.js.map +236 -0
  45. package/dist/routes/live-diarization-route.d.ts +33 -0
  46. package/dist/routes/live-diarization-route.d.ts.map +1 -0
  47. package/dist/routes/local-inference-asr-route.d.ts +4 -0
  48. package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
  49. package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
  50. package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  51. package/dist/routes/local-inference-compat-routes.d.ts +16 -0
  52. package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
  53. package/dist/routes/local-inference-tts-route.d.ts +7 -0
  54. package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/dist/routes/native-pcm-turn-route.d.ts +3 -0
  56. package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
  57. package/dist/routes/transcript-audio-store.d.ts +15 -0
  58. package/dist/routes/transcript-audio-store.d.ts.map +1 -0
  59. package/dist/routes/transcripts-routes.d.ts +44 -0
  60. package/dist/routes/transcripts-routes.d.ts.map +1 -0
  61. package/dist/routes/voice-first-run-routes.d.ts +62 -0
  62. package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
  63. package/dist/routes/voice-models-routes.d.ts +62 -0
  64. package/dist/routes/voice-models-routes.d.ts.map +1 -0
  65. package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
  66. package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  67. package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
  68. package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
  69. package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/dist/runtime/embedding-manager-support.d.ts +77 -0
  72. package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
  73. package/dist/runtime/embedding-presets.d.ts +16 -0
  74. package/dist/runtime/embedding-presets.d.ts.map +1 -0
  75. package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
  76. package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
  77. package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
  78. package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  79. package/dist/runtime/index.d.ts +15 -0
  80. package/dist/runtime/index.d.ts.map +1 -0
  81. package/dist/runtime/index.js +38768 -0
  82. package/dist/runtime/index.js.map +217 -0
  83. package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
  84. package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  85. package/dist/runtime/voice-entity-binding.d.ts +113 -0
  86. package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
  87. package/dist/services/active-model.d.ts +310 -0
  88. package/dist/services/active-model.d.ts.map +1 -0
  89. package/dist/services/asr-provenance.d.ts +5 -0
  90. package/dist/services/asr-provenance.d.ts.map +1 -0
  91. package/dist/services/assignments.d.ts +84 -0
  92. package/dist/services/assignments.d.ts.map +1 -0
  93. package/dist/services/backend-selector.d.ts +55 -0
  94. package/dist/services/backend-selector.d.ts.map +1 -0
  95. package/dist/services/backend.d.ts +440 -0
  96. package/dist/services/backend.d.ts.map +1 -0
  97. package/dist/services/bionic-host-loader.d.ts +67 -0
  98. package/dist/services/bionic-host-loader.d.ts.map +1 -0
  99. package/dist/services/bundled-models.d.ts +34 -0
  100. package/dist/services/bundled-models.d.ts.map +1 -0
  101. package/dist/services/cache-bridge.d.ts +206 -0
  102. package/dist/services/cache-bridge.d.ts.map +1 -0
  103. package/dist/services/catalog.d.ts +10 -0
  104. package/dist/services/catalog.d.ts.map +1 -0
  105. package/dist/services/checkpoint-client.d.ts +109 -0
  106. package/dist/services/checkpoint-client.d.ts.map +1 -0
  107. package/dist/services/checkpoint-manager.d.ts +217 -0
  108. package/dist/services/checkpoint-manager.d.ts.map +1 -0
  109. package/dist/services/cloud-fallback.d.ts +102 -0
  110. package/dist/services/cloud-fallback.d.ts.map +1 -0
  111. package/dist/services/context-fit.d.ts +36 -0
  112. package/dist/services/context-fit.d.ts.map +1 -0
  113. package/dist/services/conversation-registry.d.ts +142 -0
  114. package/dist/services/conversation-registry.d.ts.map +1 -0
  115. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
  116. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  117. package/dist/services/device-bridge.d.ts +188 -0
  118. package/dist/services/device-bridge.d.ts.map +1 -0
  119. package/dist/services/device-resource-metrics.d.ts +149 -0
  120. package/dist/services/device-resource-metrics.d.ts.map +1 -0
  121. package/dist/services/device-tier.d.ts +133 -0
  122. package/dist/services/device-tier.d.ts.map +1 -0
  123. package/dist/services/downloader.d.ts +94 -0
  124. package/dist/services/downloader.d.ts.map +1 -0
  125. package/dist/services/engine.d.ts +579 -0
  126. package/dist/services/engine.d.ts.map +1 -0
  127. package/dist/services/ensure-local-artifacts.d.ts +82 -0
  128. package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
  129. package/dist/services/external-scanner.d.ts +17 -0
  130. package/dist/services/external-scanner.d.ts.map +1 -0
  131. package/dist/services/ffi-llm-mock.d.ts +90 -0
  132. package/dist/services/ffi-llm-mock.d.ts.map +1 -0
  133. package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
  134. package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
  135. package/dist/services/ffi-streaming-backend.d.ts +201 -0
  136. package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
  137. package/dist/services/ffi-streaming-runner.d.ts +146 -0
  138. package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
  139. package/dist/services/gpu-autotune.d.ts +150 -0
  140. package/dist/services/gpu-autotune.d.ts.map +1 -0
  141. package/dist/services/gpu-detect.d.ts +56 -0
  142. package/dist/services/gpu-detect.d.ts.map +1 -0
  143. package/dist/services/handler-registry.d.ts +72 -0
  144. package/dist/services/handler-registry.d.ts.map +1 -0
  145. package/dist/services/hardware.d.ts +63 -0
  146. package/dist/services/hardware.d.ts.map +1 -0
  147. package/dist/services/image-description-runtime.d.ts +14 -0
  148. package/dist/services/image-description-runtime.d.ts.map +1 -0
  149. package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
  150. package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  151. package/dist/services/imagegen/backend-selector.d.ts +118 -0
  152. package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
  153. package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
  154. package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  155. package/dist/services/imagegen/errors.d.ts +16 -0
  156. package/dist/services/imagegen/errors.d.ts.map +1 -0
  157. package/dist/services/imagegen/index.d.ts +58 -0
  158. package/dist/services/imagegen/index.d.ts.map +1 -0
  159. package/dist/services/imagegen/mflux.d.ts +74 -0
  160. package/dist/services/imagegen/mflux.d.ts.map +1 -0
  161. package/dist/services/imagegen/sd-cpp.d.ts +181 -0
  162. package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
  163. package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  164. package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  165. package/dist/services/imagegen/types.d.ts +181 -0
  166. package/dist/services/imagegen/types.d.ts.map +1 -0
  167. package/dist/services/index.d.ts +31 -0
  168. package/dist/services/index.d.ts.map +1 -0
  169. package/dist/services/index.js +39453 -0
  170. package/dist/services/index.js.map +227 -0
  171. package/dist/services/inference-capabilities.d.ts +132 -0
  172. package/dist/services/inference-capabilities.d.ts.map +1 -0
  173. package/dist/services/inference-telemetry.d.ts +59 -0
  174. package/dist/services/inference-telemetry.d.ts.map +1 -0
  175. package/dist/services/ios-llama-streaming.d.ts +119 -0
  176. package/dist/services/ios-llama-streaming.d.ts.map +1 -0
  177. package/dist/services/kv-spill.d.ts +189 -0
  178. package/dist/services/kv-spill.d.ts.map +1 -0
  179. package/dist/services/latency-trace.d.ts +346 -0
  180. package/dist/services/latency-trace.d.ts.map +1 -0
  181. package/dist/services/lib-target.d.ts +55 -0
  182. package/dist/services/lib-target.d.ts.map +1 -0
  183. package/dist/services/live-signals.d.ts +86 -0
  184. package/dist/services/live-signals.d.ts.map +1 -0
  185. package/dist/services/llama-server-metrics.d.ts +114 -0
  186. package/dist/services/llama-server-metrics.d.ts.map +1 -0
  187. package/dist/services/llm-streaming-binding.d.ts +96 -0
  188. package/dist/services/llm-streaming-binding.d.ts.map +1 -0
  189. package/dist/services/load-args.d.ts +82 -0
  190. package/dist/services/load-args.d.ts.map +1 -0
  191. package/dist/services/manifest/index.d.ts +4 -0
  192. package/dist/services/manifest/index.d.ts.map +1 -0
  193. package/dist/services/manifest/schema.d.ts +903 -0
  194. package/dist/services/manifest/schema.d.ts.map +1 -0
  195. package/dist/services/manifest/types.d.ts +32 -0
  196. package/dist/services/manifest/types.d.ts.map +1 -0
  197. package/dist/services/manifest/validator.d.ts +66 -0
  198. package/dist/services/manifest/validator.d.ts.map +1 -0
  199. package/dist/services/memory-arbiter.d.ts +348 -0
  200. package/dist/services/memory-arbiter.d.ts.map +1 -0
  201. package/dist/services/memory-benchmark.d.ts +76 -0
  202. package/dist/services/memory-benchmark.d.ts.map +1 -0
  203. package/dist/services/memory-monitor.d.ts +128 -0
  204. package/dist/services/memory-monitor.d.ts.map +1 -0
  205. package/dist/services/memory-pressure.d.ts +130 -0
  206. package/dist/services/memory-pressure.d.ts.map +1 -0
  207. package/dist/services/mtp-doctor.d.ts +13 -0
  208. package/dist/services/mtp-doctor.d.ts.map +1 -0
  209. package/dist/services/network-policy.d.ts +127 -0
  210. package/dist/services/network-policy.d.ts.map +1 -0
  211. package/dist/services/paths.d.ts +6 -0
  212. package/dist/services/paths.d.ts.map +1 -0
  213. package/dist/services/planner-skeleton.d.ts +124 -0
  214. package/dist/services/planner-skeleton.d.ts.map +1 -0
  215. package/dist/services/providers.d.ts +38 -0
  216. package/dist/services/providers.d.ts.map +1 -0
  217. package/dist/services/ram-budget.d.ts +110 -0
  218. package/dist/services/ram-budget.d.ts.map +1 -0
  219. package/dist/services/readiness.d.ts +9 -0
  220. package/dist/services/readiness.d.ts.map +1 -0
  221. package/dist/services/recommendation.d.ts +111 -0
  222. package/dist/services/recommendation.d.ts.map +1 -0
  223. package/dist/services/registry.d.ts +33 -0
  224. package/dist/services/registry.d.ts.map +1 -0
  225. package/dist/services/router-handler.d.ts +92 -0
  226. package/dist/services/router-handler.d.ts.map +1 -0
  227. package/dist/services/routing-policy.d.ts +92 -0
  228. package/dist/services/routing-policy.d.ts.map +1 -0
  229. package/dist/services/routing-preferences.d.ts +8 -0
  230. package/dist/services/routing-preferences.d.ts.map +1 -0
  231. package/dist/services/runtime-target.d.ts +98 -0
  232. package/dist/services/runtime-target.d.ts.map +1 -0
  233. package/dist/services/service.d.ts +128 -0
  234. package/dist/services/service.d.ts.map +1 -0
  235. package/dist/services/session-pool.d.ts +72 -0
  236. package/dist/services/session-pool.d.ts.map +1 -0
  237. package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
  238. package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
  239. package/dist/services/structured-output/index.d.ts +2 -0
  240. package/dist/services/structured-output/index.d.ts.map +1 -0
  241. package/dist/services/structured-output.d.ts +311 -0
  242. package/dist/services/structured-output.d.ts.map +1 -0
  243. package/dist/services/system-memory.d.ts +33 -0
  244. package/dist/services/system-memory.d.ts.map +1 -0
  245. package/dist/services/types.d.ts +19 -0
  246. package/dist/services/types.d.ts.map +1 -0
  247. package/dist/services/verify-on-device.d.ts +34 -0
  248. package/dist/services/verify-on-device.d.ts.map +1 -0
  249. package/dist/services/verify.d.ts +8 -0
  250. package/dist/services/verify.d.ts.map +1 -0
  251. package/dist/services/vision/aosp-unavailable.d.ts +115 -0
  252. package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
  253. package/dist/services/vision/capacitor-llama.d.ts +99 -0
  254. package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
  255. package/dist/services/vision/cloud-fallback.d.ts +47 -0
  256. package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
  257. package/dist/services/vision/hash.d.ts +71 -0
  258. package/dist/services/vision/hash.d.ts.map +1 -0
  259. package/dist/services/vision/index.d.ts +95 -0
  260. package/dist/services/vision/index.d.ts.map +1 -0
  261. package/dist/services/vision/llama-server.d.ts +73 -0
  262. package/dist/services/vision/llama-server.d.ts.map +1 -0
  263. package/dist/services/vision/types.d.ts +162 -0
  264. package/dist/services/vision/types.d.ts.map +1 -0
  265. package/dist/services/vision/vast-fallback.d.ts +18 -0
  266. package/dist/services/vision/vast-fallback.d.ts.map +1 -0
  267. package/dist/services/vision-embedding-cache.d.ts +98 -0
  268. package/dist/services/vision-embedding-cache.d.ts.map +1 -0
  269. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
  270. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
  271. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
  272. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
  273. package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
  274. package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
  275. package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
  276. package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
  277. package/dist/services/voice/barge-in.d.ts +112 -0
  278. package/dist/services/voice/barge-in.d.ts.map +1 -0
  279. package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
  280. package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
  281. package/dist/services/voice/checkpoint-manager.d.ts +199 -0
  282. package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
  283. package/dist/services/voice/checkpoint-policy.d.ts +178 -0
  284. package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
  285. package/dist/services/voice/corpus-augment.d.ts +111 -0
  286. package/dist/services/voice/corpus-augment.d.ts.map +1 -0
  287. package/dist/services/voice/corpus-generator.d.ts +134 -0
  288. package/dist/services/voice/corpus-generator.d.ts.map +1 -0
  289. package/dist/services/voice/diarization-error-rate.d.ts +40 -0
  290. package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
  291. package/dist/services/voice/e2e-harness.d.ts +297 -0
  292. package/dist/services/voice/e2e-harness.d.ts.map +1 -0
  293. package/dist/services/voice/eager-context-builder.d.ts +170 -0
  294. package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
  295. package/dist/services/voice/echo-delay.d.ts +67 -0
  296. package/dist/services/voice/echo-delay.d.ts.map +1 -0
  297. package/dist/services/voice/echo-metrics.d.ts +7 -0
  298. package/dist/services/voice/echo-metrics.d.ts.map +1 -0
  299. package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
  300. package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
  301. package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
  302. package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  303. package/dist/services/voice/embedding-server.d.ts +37 -0
  304. package/dist/services/voice/embedding-server.d.ts.map +1 -0
  305. package/dist/services/voice/embedding.d.ts +132 -0
  306. package/dist/services/voice/embedding.d.ts.map +1 -0
  307. package/dist/services/voice/emotion-attribution.d.ts +68 -0
  308. package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
  309. package/dist/services/voice/engine-bridge.d.ts +762 -0
  310. package/dist/services/voice/engine-bridge.d.ts.map +1 -0
  311. package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
  312. package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  313. package/dist/services/voice/eot-classifier.d.ts +211 -0
  314. package/dist/services/voice/eot-classifier.d.ts.map +1 -0
  315. package/dist/services/voice/errors.d.ts +20 -0
  316. package/dist/services/voice/errors.d.ts.map +1 -0
  317. package/dist/services/voice/expressive-tags.d.ts +158 -0
  318. package/dist/services/voice/expressive-tags.d.ts.map +1 -0
  319. package/dist/services/voice/ffi-bindings.d.ts +696 -0
  320. package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
  321. package/dist/services/voice/first-line-cache.d.ts +181 -0
  322. package/dist/services/voice/first-line-cache.d.ts.map +1 -0
  323. package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
  324. package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
  325. package/dist/services/voice/index.d.ts +96 -0
  326. package/dist/services/voice/index.d.ts.map +1 -0
  327. package/dist/services/voice/kokoro/index.d.ts +24 -0
  328. package/dist/services/voice/kokoro/index.d.ts.map +1 -0
  329. package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
  330. package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  331. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  332. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  333. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  334. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  335. package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  336. package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  337. package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
  338. package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
  339. package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
  340. package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  341. package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
  342. package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  343. package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
  344. package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  345. package/dist/services/voice/kokoro/types.d.ts +82 -0
  346. package/dist/services/voice/kokoro/types.d.ts.map +1 -0
  347. package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
  348. package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  349. package/dist/services/voice/kokoro/voices.d.ts +30 -0
  350. package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
  351. package/dist/services/voice/lifecycle.d.ts +135 -0
  352. package/dist/services/voice/lifecycle.d.ts.map +1 -0
  353. package/dist/services/voice/live-diarization-session.d.ts +196 -0
  354. package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
  355. package/dist/services/voice/metric-math.d.ts +10 -0
  356. package/dist/services/voice/metric-math.d.ts.map +1 -0
  357. package/dist/services/voice/mic-source.d.ts +136 -0
  358. package/dist/services/voice/mic-source.d.ts.map +1 -0
  359. package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
  360. package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
  361. package/dist/services/voice/optimistic-policy.d.ts +109 -0
  362. package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
  363. package/dist/services/voice/optimistic-rollback.d.ts +151 -0
  364. package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
  365. package/dist/services/voice/partial-stabilizer.d.ts +73 -0
  366. package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
  367. package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
  368. package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  369. package/dist/services/voice/phrase-cache.d.ts +76 -0
  370. package/dist/services/voice/phrase-cache.d.ts.map +1 -0
  371. package/dist/services/voice/phrase-chunker.d.ts +62 -0
  372. package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
  373. package/dist/services/voice/pipeline-impls.d.ts +151 -0
  374. package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
  375. package/dist/services/voice/pipeline.d.ts +216 -0
  376. package/dist/services/voice/pipeline.d.ts.map +1 -0
  377. package/dist/services/voice/prefill-client.d.ts +123 -0
  378. package/dist/services/voice/prefill-client.d.ts.map +1 -0
  379. package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
  380. package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  381. package/dist/services/voice/profile-store.d.ts +248 -0
  382. package/dist/services/voice/profile-store.d.ts.map +1 -0
  383. package/dist/services/voice/ring-buffer.d.ts +40 -0
  384. package/dist/services/voice/ring-buffer.d.ts.map +1 -0
  385. package/dist/services/voice/rollback-queue.d.ts +24 -0
  386. package/dist/services/voice/rollback-queue.d.ts.map +1 -0
  387. package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
  388. package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  389. package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
  390. package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  391. package/dist/services/voice/scheduler.d.ts +146 -0
  392. package/dist/services/voice/scheduler.d.ts.map +1 -0
  393. package/dist/services/voice/self-voice-imprint.d.ts +33 -0
  394. package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
  395. package/dist/services/voice/shared-resources.d.ts +204 -0
  396. package/dist/services/voice/shared-resources.d.ts.map +1 -0
  397. package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  398. package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  399. package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
  400. package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  401. package/dist/services/voice/speaker/diarizer.d.ts +75 -0
  402. package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
  403. package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
  404. package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  405. package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
  406. package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  407. package/dist/services/voice/speaker/encoder.d.ts +37 -0
  408. package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
  409. package/dist/services/voice/speaker-imprint.d.ts +83 -0
  410. package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
  411. package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
  412. package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
  413. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
  414. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
  415. package/dist/services/voice/system-audio-sink.d.ts +73 -0
  416. package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
  417. package/dist/services/voice/transcriber.d.ts +244 -0
  418. package/dist/services/voice/transcriber.d.ts.map +1 -0
  419. package/dist/services/voice/transcript-knowledge.d.ts +37 -0
  420. package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
  421. package/dist/services/voice/transcript-service.d.ts +60 -0
  422. package/dist/services/voice/transcript-service.d.ts.map +1 -0
  423. package/dist/services/voice/transcript-store.d.ts +64 -0
  424. package/dist/services/voice/transcript-store.d.ts.map +1 -0
  425. package/dist/services/voice/turn-controller.d.ts +183 -0
  426. package/dist/services/voice/turn-controller.d.ts.map +1 -0
  427. package/dist/services/voice/types.d.ts +643 -0
  428. package/dist/services/voice/types.d.ts.map +1 -0
  429. package/dist/services/voice/vad.d.ts +283 -0
  430. package/dist/services/voice/vad.d.ts.map +1 -0
  431. package/dist/services/voice/voice-budget.d.ts +241 -0
  432. package/dist/services/voice/voice-budget.d.ts.map +1 -0
  433. package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
  434. package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  435. package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
  436. package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
  437. package/dist/services/voice/voice-preset-format.d.ts +158 -0
  438. package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
  439. package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
  440. package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
  441. package/dist/services/voice/voice-profile-routes.d.ts +83 -0
  442. package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
  443. package/dist/services/voice/voice-scenario.d.ts +131 -0
  444. package/dist/services/voice/voice-scenario.d.ts.map +1 -0
  445. package/dist/services/voice/voice-state-machine.d.ts +364 -0
  446. package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
  447. package/dist/services/voice/voice-workbench-report.d.ts +117 -0
  448. package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
  449. package/dist/services/voice/wake-word-ggml.d.ts +100 -0
  450. package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
  451. package/dist/services/voice/wake-word.d.ts +255 -0
  452. package/dist/services/voice/wake-word.d.ts.map +1 -0
  453. package/dist/services/voice/wav-codec.d.ts +11 -0
  454. package/dist/services/voice/wav-codec.d.ts.map +1 -0
  455. package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
  456. package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
  457. package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
  458. package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
  459. package/dist/services/voice/workbench-logic-services.d.ts +36 -0
  460. package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
  461. package/dist/services/voice/workbench-real-services.d.ts +17 -0
  462. package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
  463. package/dist/services/voice/workbench-scenarios.d.ts +24 -0
  464. package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
  465. package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  466. package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  467. package/dist/services/voice-model-updater.d.ts +240 -0
  468. package/dist/services/voice-model-updater.d.ts.map +1 -0
  469. package/dist/services/voice-prewarm.d.ts +3 -0
  470. package/dist/services/voice-prewarm.d.ts.map +1 -0
  471. package/dist/voice-workbench.d.ts +18 -0
  472. package/dist/voice-workbench.d.ts.map +1 -0
  473. package/dist/voice-workbench.js +5259 -0
  474. package/dist/voice-workbench.js.map +34 -0
  475. package/package.json +101 -15
  476. package/registry-entry.json +137 -0
  477. package/src/actions/generate-media.ts +647 -0
  478. package/src/actions/identify-speaker.ts +171 -0
  479. package/src/actions/transcription-control.test.ts +100 -0
  480. package/src/actions/transcription-control.ts +127 -0
  481. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  482. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  483. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  484. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  485. package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
  486. package/src/adapters/capacitor-llama/environment.ts +71 -0
  487. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  488. package/src/adapters/capacitor-llama/index.ts +831 -0
  489. package/src/adapters/capacitor-llama/loader.ts +109 -0
  490. package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
  491. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  492. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  493. package/src/adapters/capacitor-llama/types.ts +374 -0
  494. package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
  495. package/src/backends/apple-foundation.ts +127 -0
  496. package/src/index.ts +62 -0
  497. package/src/local-inference-routes.test.ts +390 -0
  498. package/src/local-inference-routes.ts +1625 -0
  499. package/src/provider.ts +1111 -0
  500. package/src/routes/compat-helpers.ts +275 -0
  501. package/src/routes/family-member-route.ts +353 -0
  502. package/src/routes/index.ts +61 -0
  503. package/src/routes/live-diarization-route.test.ts +347 -0
  504. package/src/routes/live-diarization-route.ts +198 -0
  505. package/src/routes/local-inference-asr-route.test.ts +246 -0
  506. package/src/routes/local-inference-asr-route.ts +166 -0
  507. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  508. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  509. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  510. package/src/routes/local-inference-compat-routes.ts +775 -0
  511. package/src/routes/local-inference-tts-route.test.ts +179 -0
  512. package/src/routes/local-inference-tts-route.ts +230 -0
  513. package/src/routes/native-pcm-turn-route.test.ts +136 -0
  514. package/src/routes/native-pcm-turn-route.ts +121 -0
  515. package/src/routes/transcript-audio-store.ts +27 -0
  516. package/src/routes/transcripts-routes.test.ts +195 -0
  517. package/src/routes/transcripts-routes.ts +191 -0
  518. package/src/routes/voice-first-run-routes.ts +524 -0
  519. package/src/routes/voice-models-routes.ts +554 -0
  520. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  521. package/src/routes/voice-profiles-management-routes.ts +476 -0
  522. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  523. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  524. package/src/runtime/bionic-wire-encoding.test.ts +147 -0
  525. package/src/runtime/capacitor-llama.d.ts +25 -0
  526. package/src/runtime/embedding-manager-support.ts +497 -0
  527. package/src/runtime/embedding-presets.ts +81 -0
  528. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  529. package/src/runtime/embedding-warmup-policy.ts +48 -0
  530. package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
  531. package/src/runtime/ensure-local-inference-handler.ts +1640 -0
  532. package/src/runtime/index.ts +36 -0
  533. package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
  534. package/src/runtime/mobile-local-inference-gate.ts +99 -0
  535. package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
  536. package/src/runtime/voice-entity-binding.ts +368 -0
  537. package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
  538. package/src/services/README.md +71 -0
  539. package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
  540. package/src/services/__tests__/backend-selector.test.ts +101 -0
  541. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  542. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  543. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  544. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  545. package/src/services/__tests__/runtime-target.test.ts +176 -0
  546. package/src/services/active-model-context-fit.test.ts +125 -0
  547. package/src/services/active-model-switch-rollback.test.ts +183 -0
  548. package/src/services/active-model.ts +1416 -0
  549. package/src/services/asr-provenance.ts +68 -0
  550. package/src/services/assignment-validation.test.ts +118 -0
  551. package/src/services/assignments.test.ts +106 -0
  552. package/src/services/assignments.ts +278 -0
  553. package/src/services/backend-selector.ts +95 -0
  554. package/src/services/backend.test.ts +84 -0
  555. package/src/services/backend.ts +791 -0
  556. package/src/services/bionic-host-loader.test.ts +226 -0
  557. package/src/services/bionic-host-loader.ts +252 -0
  558. package/src/services/bundled-models.ts +129 -0
  559. package/src/services/cache-bridge.test.ts +516 -0
  560. package/src/services/cache-bridge.ts +423 -0
  561. package/src/services/catalog.test.ts +259 -0
  562. package/src/services/catalog.ts +33 -0
  563. package/src/services/checkpoint-client.ts +258 -0
  564. package/src/services/checkpoint-manager.ts +474 -0
  565. package/src/services/cloud-fallback.ts +230 -0
  566. package/src/services/context-fit.test.ts +121 -0
  567. package/src/services/context-fit.ts +113 -0
  568. package/src/services/conversation-registry.test.ts +235 -0
  569. package/src/services/conversation-registry.ts +264 -0
  570. package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
  571. package/src/services/device-bridge.ts +1237 -0
  572. package/src/services/device-resource-metrics.test.ts +98 -0
  573. package/src/services/device-resource-metrics.ts +346 -0
  574. package/src/services/device-tier.test.ts +458 -0
  575. package/src/services/device-tier.ts +502 -0
  576. package/src/services/downloader.test.ts +888 -0
  577. package/src/services/downloader.ts +1039 -0
  578. package/src/services/engine-direct-bundle.test.ts +90 -0
  579. package/src/services/engine-streaming.test.ts +80 -0
  580. package/src/services/engine.ts +2096 -0
  581. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  582. package/src/services/ensure-local-artifacts.test.ts +368 -0
  583. package/src/services/ensure-local-artifacts.ts +351 -0
  584. package/src/services/external-scanner.ts +312 -0
  585. package/src/services/ffi-llm-mock.ts +354 -0
  586. package/src/services/ffi-llm-streaming-abi.ts +445 -0
  587. package/src/services/ffi-streaming-backend.ts +418 -0
  588. package/src/services/ffi-streaming-runner.test.ts +220 -0
  589. package/src/services/ffi-streaming-runner.ts +407 -0
  590. package/src/services/ffi-unload-ordering.test.ts +166 -0
  591. package/src/services/fused-eliza1-no-regression.test.ts +144 -0
  592. package/src/services/gpu-autotune.ts +534 -0
  593. package/src/services/gpu-detect.ts +139 -0
  594. package/src/services/handler-registry.ts +240 -0
  595. package/src/services/hardware.test.ts +236 -0
  596. package/src/services/hardware.ts +438 -0
  597. package/src/services/image-description-runtime.test.ts +61 -0
  598. package/src/services/image-description-runtime.ts +118 -0
  599. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  600. package/src/services/imagegen/backend-selector.test.ts +190 -0
  601. package/src/services/imagegen/backend-selector.ts +277 -0
  602. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  603. package/src/services/imagegen/errors.ts +40 -0
  604. package/src/services/imagegen/index.ts +144 -0
  605. package/src/services/imagegen/mflux.ts +313 -0
  606. package/src/services/imagegen/sd-cpp.ts +715 -0
  607. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  608. package/src/services/imagegen/types.ts +193 -0
  609. package/src/services/index.ts +229 -0
  610. package/src/services/inference-capabilities.test.ts +75 -0
  611. package/src/services/inference-capabilities.ts +204 -0
  612. package/src/services/inference-telemetry.ts +143 -0
  613. package/src/services/ios-llama-streaming.ts +248 -0
  614. package/src/services/kv-spill.test.ts +222 -0
  615. package/src/services/kv-spill.ts +357 -0
  616. package/src/services/latency-trace.test.ts +266 -0
  617. package/src/services/latency-trace.ts +844 -0
  618. package/src/services/lib-target.test.ts +145 -0
  619. package/src/services/lib-target.ts +102 -0
  620. package/src/services/live-signals.test.ts +132 -0
  621. package/src/services/live-signals.ts +177 -0
  622. package/src/services/llama-server-metrics.test.ts +168 -0
  623. package/src/services/llama-server-metrics.ts +304 -0
  624. package/src/services/llm-streaming-binding.ts +136 -0
  625. package/src/services/load-args.ts +81 -0
  626. package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
  627. package/src/services/manifest/index.ts +72 -0
  628. package/src/services/manifest/manifest.test.ts +791 -0
  629. package/src/services/manifest/schema.ts +761 -0
  630. package/src/services/manifest/types.ts +61 -0
  631. package/src/services/manifest/validator.ts +633 -0
  632. package/src/services/memory-arbiter.test.ts +558 -0
  633. package/src/services/memory-arbiter.ts +991 -0
  634. package/src/services/memory-benchmark.test.ts +91 -0
  635. package/src/services/memory-benchmark.ts +354 -0
  636. package/src/services/memory-monitor.test.ts +232 -0
  637. package/src/services/memory-monitor.ts +309 -0
  638. package/src/services/memory-pressure.ts +414 -0
  639. package/src/services/mtp-doctor.ts +86 -0
  640. package/src/services/network-policy.ts +346 -0
  641. package/src/services/paths.ts +25 -0
  642. package/src/services/planner-skeleton.ts +175 -0
  643. package/src/services/providers.ts +507 -0
  644. package/src/services/ram-budget-cache.test.ts +164 -0
  645. package/src/services/ram-budget.ts +309 -0
  646. package/src/services/readiness.test.ts +87 -0
  647. package/src/services/readiness.ts +238 -0
  648. package/src/services/recommendation.test.ts +216 -0
  649. package/src/services/recommendation.ts +671 -0
  650. package/src/services/registry.ts +157 -0
  651. package/src/services/required-kernels-gate.test.ts +64 -0
  652. package/src/services/router-handler.test.ts +45 -0
  653. package/src/services/router-handler.ts +426 -0
  654. package/src/services/routing-policy.test.ts +352 -0
  655. package/src/services/routing-policy.ts +367 -0
  656. package/src/services/routing-preferences.ts +17 -0
  657. package/src/services/runtime-target.ts +154 -0
  658. package/src/services/service.test.ts +223 -0
  659. package/src/services/service.ts +750 -0
  660. package/src/services/session-pool.ts +153 -0
  661. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  662. package/src/services/structured-output/deterministic-repair.ts +443 -0
  663. package/src/services/structured-output/index.ts +4 -0
  664. package/src/services/structured-output.test.ts +483 -0
  665. package/src/services/structured-output.ts +712 -0
  666. package/src/services/system-memory.test.ts +47 -0
  667. package/src/services/system-memory.ts +67 -0
  668. package/src/services/transcription-priority.test.ts +211 -0
  669. package/src/services/types.ts +59 -0
  670. package/src/services/verify-on-device.test.ts +87 -0
  671. package/src/services/verify-on-device.ts +127 -0
  672. package/src/services/verify.ts +13 -0
  673. package/src/services/vision/aosp-unavailable.ts +163 -0
  674. package/src/services/vision/capacitor-llama.ts +255 -0
  675. package/src/services/vision/cloud-fallback.test.ts +243 -0
  676. package/src/services/vision/cloud-fallback.ts +268 -0
  677. package/src/services/vision/fallback-chain.test.ts +86 -0
  678. package/src/services/vision/hash.ts +157 -0
  679. package/src/services/vision/index.ts +251 -0
  680. package/src/services/vision/llama-server.ts +177 -0
  681. package/src/services/vision/types.ts +163 -0
  682. package/src/services/vision/vast-fallback.ts +127 -0
  683. package/src/services/vision-embedding-cache.ts +189 -0
  684. package/src/services/voice/VOICE_WORKBENCH.md +133 -0
  685. package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
  686. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  687. package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
  688. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  689. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  690. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  691. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  692. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  693. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  694. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  695. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  696. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  697. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  698. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  699. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  700. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  701. package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
  702. package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
  703. package/src/services/voice/asr-timed.real.test.ts +139 -0
  704. package/src/services/voice/audio-frame-consumer.test.ts +669 -0
  705. package/src/services/voice/audio-frame-consumer.ts +651 -0
  706. package/src/services/voice/barge-in.test.ts +244 -0
  707. package/src/services/voice/barge-in.ts +335 -0
  708. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  709. package/src/services/voice/cancellation-coordinator.ts +269 -0
  710. package/src/services/voice/checkpoint-manager.ts +401 -0
  711. package/src/services/voice/checkpoint-policy.ts +336 -0
  712. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  713. package/src/services/voice/corpus-augment.test.ts +276 -0
  714. package/src/services/voice/corpus-augment.ts +451 -0
  715. package/src/services/voice/corpus-generator.test.ts +201 -0
  716. package/src/services/voice/corpus-generator.ts +413 -0
  717. package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
  718. package/src/services/voice/diarization-error-rate.test.ts +100 -0
  719. package/src/services/voice/diarization-error-rate.ts +249 -0
  720. package/src/services/voice/e2e-harness.der.test.ts +94 -0
  721. package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
  722. package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
  723. package/src/services/voice/e2e-harness.test.ts +182 -0
  724. package/src/services/voice/e2e-harness.ts +902 -0
  725. package/src/services/voice/eager-context-builder.ts +262 -0
  726. package/src/services/voice/echo-delay.test.ts +118 -0
  727. package/src/services/voice/echo-delay.ts +135 -0
  728. package/src/services/voice/echo-metrics.test.ts +17 -0
  729. package/src/services/voice/echo-metrics.ts +20 -0
  730. package/src/services/voice/echo-reference-buffer.test.ts +86 -0
  731. package/src/services/voice/echo-reference-buffer.ts +165 -0
  732. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  733. package/src/services/voice/embedding-server.ts +200 -0
  734. package/src/services/voice/embedding.test.ts +131 -0
  735. package/src/services/voice/embedding.ts +242 -0
  736. package/src/services/voice/emotion-attribution.test.ts +129 -0
  737. package/src/services/voice/emotion-attribution.ts +361 -0
  738. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  739. package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
  740. package/src/services/voice/engine-bridge.test.ts +384 -0
  741. package/src/services/voice/engine-bridge.ts +2343 -0
  742. package/src/services/voice/eot-classifier-ggml.ts +569 -0
  743. package/src/services/voice/eot-classifier.test.ts +98 -0
  744. package/src/services/voice/eot-classifier.ts +422 -0
  745. package/src/services/voice/errors.ts +34 -0
  746. package/src/services/voice/expressive-tags.asr.test.ts +77 -0
  747. package/src/services/voice/expressive-tags.test.ts +102 -0
  748. package/src/services/voice/expressive-tags.ts +405 -0
  749. package/src/services/voice/ffi-bindings.test.ts +735 -0
  750. package/src/services/voice/ffi-bindings.ts +3387 -0
  751. package/src/services/voice/first-line-cache.ts +725 -0
  752. package/src/services/voice/fused-eot-scorer.ts +139 -0
  753. package/src/services/voice/index.ts +502 -0
  754. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
  755. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
  756. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  757. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  758. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  759. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  760. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  761. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
  762. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  763. package/src/services/voice/kokoro/index.ts +79 -0
  764. package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
  765. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  766. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  767. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  768. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  769. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  770. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  771. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  772. package/src/services/voice/kokoro/runtime-selection.ts +64 -0
  773. package/src/services/voice/kokoro/types.ts +95 -0
  774. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  775. package/src/services/voice/kokoro/voices.ts +64 -0
  776. package/src/services/voice/lifecycle.test.ts +315 -0
  777. package/src/services/voice/lifecycle.ts +301 -0
  778. package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
  779. package/src/services/voice/live-diarization-session.ts +622 -0
  780. package/src/services/voice/metric-math.test.ts +61 -0
  781. package/src/services/voice/metric-math.ts +25 -0
  782. package/src/services/voice/mic-source.test.ts +210 -0
  783. package/src/services/voice/mic-source.ts +503 -0
  784. package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
  785. package/src/services/voice/nlms-echo-canceller.ts +317 -0
  786. package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
  787. package/src/services/voice/optimistic-policy.test.ts +101 -0
  788. package/src/services/voice/optimistic-policy.ts +192 -0
  789. package/src/services/voice/optimistic-rollback.ts +343 -0
  790. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  791. package/src/services/voice/partial-stabilizer.ts +140 -0
  792. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  793. package/src/services/voice/phrase-cache.test.ts +242 -0
  794. package/src/services/voice/phrase-cache.ts +186 -0
  795. package/src/services/voice/phrase-chunker.test.ts +239 -0
  796. package/src/services/voice/phrase-chunker.ts +281 -0
  797. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  798. package/src/services/voice/pipeline-impls.test.ts +292 -0
  799. package/src/services/voice/pipeline-impls.ts +315 -0
  800. package/src/services/voice/pipeline.ts +504 -0
  801. package/src/services/voice/prefill-client.ts +316 -0
  802. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  803. package/src/services/voice/profile-store.ts +887 -0
  804. package/src/services/voice/real-audio-decode.test.ts +148 -0
  805. package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
  806. package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
  807. package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
  808. package/src/services/voice/ring-buffer.test.ts +129 -0
  809. package/src/services/voice/ring-buffer.ts +123 -0
  810. package/src/services/voice/rollback-queue.ts +74 -0
  811. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  812. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  813. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  814. package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
  815. package/src/services/voice/scheduler.t2.test.ts +141 -0
  816. package/src/services/voice/scheduler.ts +927 -0
  817. package/src/services/voice/self-voice-imprint.test.ts +59 -0
  818. package/src/services/voice/self-voice-imprint.ts +102 -0
  819. package/src/services/voice/shared-resources.ts +343 -0
  820. package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
  821. package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
  822. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  823. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  824. package/src/services/voice/speaker/diarizer.ts +218 -0
  825. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  826. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  827. package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
  828. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  829. package/src/services/voice/speaker/encoder.ts +105 -0
  830. package/src/services/voice/speaker-imprint.test.ts +185 -0
  831. package/src/services/voice/speaker-imprint.ts +312 -0
  832. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  833. package/src/services/voice/speaker-preset-cache.ts +195 -0
  834. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  835. package/src/services/voice/system-audio-sink.test.ts +29 -0
  836. package/src/services/voice/system-audio-sink.ts +366 -0
  837. package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
  838. package/src/services/voice/transcriber.test.ts +392 -0
  839. package/src/services/voice/transcriber.ts +704 -0
  840. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  841. package/src/services/voice/transcript-knowledge.ts +75 -0
  842. package/src/services/voice/transcript-service.test.ts +195 -0
  843. package/src/services/voice/transcript-service.ts +205 -0
  844. package/src/services/voice/transcript-store.test.ts +189 -0
  845. package/src/services/voice/transcript-store.ts +164 -0
  846. package/src/services/voice/turn-controller.test.ts +575 -0
  847. package/src/services/voice/turn-controller.ts +596 -0
  848. package/src/services/voice/types.ts +699 -0
  849. package/src/services/voice/vad.test.ts +498 -0
  850. package/src/services/voice/vad.ts +832 -0
  851. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  852. package/src/services/voice/voice-budget.test.ts +415 -0
  853. package/src/services/voice/voice-budget.ts +635 -0
  854. package/src/services/voice/voice-duet.test.ts +375 -0
  855. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  856. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  857. package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
  858. package/src/services/voice/voice-preload-predictor.test.ts +130 -0
  859. package/src/services/voice/voice-preload-predictor.ts +113 -0
  860. package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
  861. package/src/services/voice/voice-preset-format.test.ts +75 -0
  862. package/src/services/voice/voice-preset-format.ts +713 -0
  863. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  864. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  865. package/src/services/voice/voice-profile-artifact.ts +518 -0
  866. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  867. package/src/services/voice/voice-profile-routes.ts +425 -0
  868. package/src/services/voice/voice-scenario.test.ts +159 -0
  869. package/src/services/voice/voice-scenario.ts +280 -0
  870. package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
  871. package/src/services/voice/voice-state-machine.ts +727 -0
  872. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  873. package/src/services/voice/voice-workbench-report.ts +367 -0
  874. package/src/services/voice/voice-workbench.test.ts +158 -0
  875. package/src/services/voice/voice.test.ts +1070 -0
  876. package/src/services/voice/wake-word-ggml.ts +319 -0
  877. package/src/services/voice/wake-word.test.ts +298 -0
  878. package/src/services/voice/wake-word.ts +554 -0
  879. package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
  880. package/src/services/voice/wav-codec.test.ts +32 -0
  881. package/src/services/voice/wav-codec.ts +101 -0
  882. package/src/services/voice/workbench-entrypoint.test.ts +55 -0
  883. package/src/services/voice/workbench-entrypoint.ts +88 -0
  884. package/src/services/voice/workbench-headless-runner.test.ts +162 -0
  885. package/src/services/voice/workbench-headless-runner.ts +396 -0
  886. package/src/services/voice/workbench-logic-services.test.ts +225 -0
  887. package/src/services/voice/workbench-logic-services.ts +184 -0
  888. package/src/services/voice/workbench-real-services.ts +629 -0
  889. package/src/services/voice/workbench-scenarios.ts +407 -0
  890. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  891. package/src/services/voice-model-updater.ts +724 -0
  892. package/src/services/voice-prewarm.ts +51 -0
  893. package/src/voice-workbench.ts +71 -0
@@ -0,0 +1,2096 @@
1
+ /**
2
+ * Standalone llama.cpp engine.
3
+ *
4
+ * Fronts the in-process FFI backend (fused `libelizainference`, or the
5
+ * libllama + eliza-llama-shim fallback) via the `BackendDispatcher`. At most
6
+ * one model is loaded at a time — model swap is unload-then-load so we never
7
+ * double-allocate VRAM.
8
+ *
9
+ * Two consumption paths:
10
+ * 1. The Model Hub UI calls `load()` / `unload()` to make "Activate" work.
11
+ * 2. The agent runtime calls `generate()` via the registered
12
+ * `ModelType.TEXT_SMALL` / `TEXT_LARGE` handlers (see
13
+ * `ensure-local-inference-handler.ts`).
14
+ */
15
+
16
+ import { existsSync, readdirSync, statSync } from "node:fs";
17
+ import path from "node:path";
18
+ import {
19
+ logger,
20
+ type ResponseSkeleton,
21
+ ResponseSkeletonStreamExtractor,
22
+ } from "@elizaos/core";
23
+ import { isMobilePlatform } from "@elizaos/shared";
24
+ import type { LocalInferenceLoadArgs } from "./active-model";
25
+ import {
26
+ bundleHasAsrModelFiles,
27
+ readBundleAsrProvenanceBlockers,
28
+ } from "./asr-provenance";
29
+ import { readEffectiveAssignments } from "./assignments";
30
+ import type {
31
+ GenerateArgs as BackendGenerateArgs,
32
+ BackendPlan,
33
+ LocalGenerateWithUsageResult,
34
+ LocalRuntimeLoadConfig,
35
+ } from "./backend";
36
+ import { BackendDispatcher } from "./backend";
37
+ import {
38
+ ELIZA_1_PLACEHOLDER_IDS,
39
+ type Eliza1TierId,
40
+ findCatalogModel,
41
+ } from "./catalog";
42
+ import {
43
+ type ConversationHandle,
44
+ conversationRegistry,
45
+ } from "./conversation-registry";
46
+ import { desktopFusedFfiBackendRuntime } from "./desktop-fused-ffi-backend-runtime";
47
+ import { FfiStreamingBackend } from "./ffi-streaming-backend";
48
+ import { estimateDecodeTokens, recordDecodeThroughput } from "./live-signals";
49
+ import { MemoryMonitor } from "./memory-monitor";
50
+ import { listInstalledModels } from "./registry";
51
+ import { resolveDefaultPoolSize } from "./session-pool";
52
+ import type { InstalledModel } from "./types";
53
+ import type { CoordinatorRuntime } from "./voice/cancellation-coordinator";
54
+ import {
55
+ createKokoroSpeakerPreset,
56
+ createKokoroTtsBackend,
57
+ EngineVoiceBridge,
58
+ type EngineVoiceBridgeOptions,
59
+ VoiceStartupError,
60
+ } from "./voice/engine-bridge";
61
+ import type { AsrWordTiming } from "./voice/ffi-bindings";
62
+ import { resolveKokoroEngineConfig } from "./voice/kokoro/kokoro-engine-discovery";
63
+ import {
64
+ readVoiceBackendModeFromEnv,
65
+ selectVoiceBackend,
66
+ } from "./voice/kokoro/runtime-selection";
67
+ import type { VoicePipelineEvents } from "./voice/pipeline";
68
+ import { type MtpTextRunner, mtpTextRunner } from "./voice/pipeline-impls";
69
+ import {
70
+ createEvictableModelRole,
71
+ SharedResourceRegistry,
72
+ } from "./voice/shared-resources";
73
+ import type {
74
+ RejectedTokenRange,
75
+ TextToken,
76
+ TranscriptionAudio,
77
+ VerifierStreamEvent,
78
+ } from "./voice/types";
79
+
80
+ /**
81
+ * Default MTP draft window per round for voice turns. Small (≤8) so a
82
+ * rollback is cheap (AGENTS.md §4 — "small chunk = low latency cost on
83
+ * rollback"). Overridable per call via `runVoiceTurn({ maxDraftTokens })`.
84
+ */
85
+ const DEFAULT_VOICE_MAX_DRAFT_TOKENS = 8;
86
+ export interface LocalUsageBlock {
87
+ [key: string]: unknown;
88
+ input_tokens: number;
89
+ output_tokens: number;
90
+ cache_creation_input_tokens: number;
91
+ cache_read_input_tokens: number;
92
+ mtp_drafted_tokens?: number;
93
+ mtp_accepted_tokens?: number;
94
+ mtp_acceptance_rate?: number;
95
+ cache_hit_rate?: number;
96
+ }
97
+ const DEFAULT_VOICE_SKELETON_STREAM_FIELDS = new Set([
98
+ "replyText",
99
+ "text",
100
+ "messageToUser",
101
+ ]);
102
+
103
+ function resolveVoiceSkeletonStreamFields(
104
+ skeleton: ResponseSkeleton | undefined,
105
+ ): string[] {
106
+ if (!skeleton) return [];
107
+ const fields: string[] = [];
108
+ const seen = new Set<string>();
109
+ for (const span of skeleton.spans) {
110
+ const key = span.key;
111
+ if (
112
+ span.kind === "free-string" &&
113
+ key &&
114
+ DEFAULT_VOICE_SKELETON_STREAM_FIELDS.has(key) &&
115
+ !seen.has(key)
116
+ ) {
117
+ seen.add(key);
118
+ fields.push(key);
119
+ }
120
+ }
121
+ return fields;
122
+ }
123
+
124
+ function skeletonHasFreeStringKey(
125
+ skeleton: ResponseSkeleton | undefined,
126
+ key: string,
127
+ ): boolean {
128
+ return (
129
+ skeleton?.spans.some(
130
+ (span) => span.kind === "free-string" && span.key === key,
131
+ ) ?? false
132
+ );
133
+ }
134
+
135
+ /**
136
+ * Idle-unload timeout (J3). After this long with no `useModel` activity
137
+ * (text generation, embeddings, voice turns) the engine unloads the active
138
+ * text model so its weights are reclaimed when the agent is quiet; the next
139
+ * `useModel` lazy-reloads via the runtime handler. `0` disables it. Default
140
+ * 15 minutes. Override via `ELIZA_LOCAL_IDLE_UNLOAD_MS`.
141
+ */
142
+ const DEFAULT_IDLE_UNLOAD_MS = 15 * 60 * 1000;
143
+ /** How often the idle-unload timer checks the activity clock. */
144
+ const IDLE_UNLOAD_CHECK_INTERVAL_MS = 60 * 1000;
145
+ const BYTES_PER_MIB = 1024 * 1024;
146
+ const GIB_PER_GB = 1024;
147
+ const TEXT_RESIDENT_OVERHEAD_MB = 512;
148
+
149
+ export function resolveIdleUnloadMs(): number {
150
+ const raw = process.env.ELIZA_LOCAL_IDLE_UNLOAD_MS?.trim();
151
+ if (raw === undefined) return DEFAULT_IDLE_UNLOAD_MS;
152
+ const parsed = Number.parseInt(raw, 10);
153
+ if (!Number.isFinite(parsed) || parsed < 0) return DEFAULT_IDLE_UNLOAD_MS;
154
+ return parsed;
155
+ }
156
+
157
+ /**
158
+ * Cap on how many speculative voice responses the turn-controller (W9) may
159
+ * have in flight at once — derived from the running server's slot count
160
+ * (each speculative response needs a slot's KV) but never more than half of
161
+ * them (the other half stays available for confirmed turns + tool calls).
162
+ * Floors at 1. Override via `ELIZA_LOCAL_MAX_SPECULATIVE_RESPONSES`.
163
+ */
164
+ export function resolveMaxConcurrentSpeculativeResponses(
165
+ parallelSlots: number,
166
+ ): number {
167
+ const raw = process.env.ELIZA_LOCAL_MAX_SPECULATIVE_RESPONSES?.trim();
168
+ if (raw) {
169
+ const parsed = Number.parseInt(raw, 10);
170
+ if (Number.isFinite(parsed) && parsed >= 1) return parsed;
171
+ }
172
+ return Math.max(1, Math.floor(parallelSlots / 2));
173
+ }
174
+
175
+ // Re-export of backend.ts's canonical GenerateArgs shape, including the
176
+ // optional `cacheKey` for prefix reuse via the session pool.
177
+ export type GenerateArgs = BackendGenerateArgs;
178
+
179
+ /**
180
+ * Resolve the active Eliza-1 bundle (root dir + tier id) from an
181
+ * `InstalledModel`, or `null` when the model is not an Eliza-1 bundle. An
182
+ * Eliza-1 InstalledModel carries `bundleRoot` and an `eliza-1-<tier>` id
183
+ * (the catalog seed ids). Drives the local-embedding route.
184
+ */
185
+ interface ActiveEliza1Bundle {
186
+ root: string;
187
+ tierId: Eliza1TierId;
188
+ }
189
+
190
+ function resolveActiveEliza1Bundle(
191
+ target: InstalledModel | undefined,
192
+ ): ActiveEliza1Bundle | null {
193
+ if (!target?.bundleRoot) return null;
194
+ if (!ELIZA_1_PLACEHOLDER_IDS.has(target.id)) return null;
195
+ return {
196
+ root: target.bundleRoot,
197
+ tierId: target.id as Eliza1TierId,
198
+ };
199
+ }
200
+
201
+ function resolveDirectEliza1Bundle(
202
+ args: LocalInferenceLoadArgs | undefined,
203
+ ): ActiveEliza1Bundle | null {
204
+ if (!args?.modelId || !ELIZA_1_PLACEHOLDER_IDS.has(args.modelId)) return null;
205
+ return {
206
+ root: path.dirname(path.dirname(args.modelPath)),
207
+ tierId: args.modelId as Eliza1TierId,
208
+ };
209
+ }
210
+
211
+ function estimateTextResidentMb(
212
+ installed: InstalledModel | undefined,
213
+ catalog: ReturnType<typeof findCatalogModel>,
214
+ ): number {
215
+ const installedMb =
216
+ typeof installed?.sizeBytes === "number" && installed.sizeBytes > 0
217
+ ? Math.ceil(installed.sizeBytes / BYTES_PER_MIB)
218
+ : 0;
219
+ const catalogMb =
220
+ typeof catalog?.sizeGb === "number" && catalog.sizeGb > 0
221
+ ? Math.ceil(catalog.sizeGb * GIB_PER_GB)
222
+ : 0;
223
+ const baseMb = Math.max(installedMb, catalogMb);
224
+ return baseMb > 0 ? baseMb + TEXT_RESIDENT_OVERHEAD_MB : 0;
225
+ }
226
+
227
+ function stagedLitertModelPath(
228
+ bundleRoot: string,
229
+ modelId: string | undefined,
230
+ ): string | undefined {
231
+ const textDir = path.join(bundleRoot, "text");
232
+ if (!existsSync(textDir) || !statSync(textDir).isDirectory()) {
233
+ return undefined;
234
+ }
235
+
236
+ if (modelId?.startsWith("eliza-1-")) {
237
+ const expected = path.join(textDir, `${modelId}.litertlm`);
238
+ if (existsSync(expected) && statSync(expected).isFile()) {
239
+ return expected;
240
+ }
241
+ }
242
+
243
+ const candidates = readdirSync(textDir)
244
+ .filter((name) => name.endsWith(".litertlm"))
245
+ .sort();
246
+ if (candidates.length === 1) {
247
+ const candidate = path.join(textDir, candidates[0]);
248
+ if (statSync(candidate).isFile()) return candidate;
249
+ }
250
+ return undefined;
251
+ }
252
+
253
+ /**
254
+ * Project a fully-resolved `LocalInferenceLoadArgs` onto the subset that
255
+ * the dispatcher cares about. Keeps `BackendLoadOverrides` framework-free
256
+ * (no dependency on active-model.ts here) so backend.ts and engine.ts stay
257
+ * cycle-free.
258
+ */
259
+ function toBackendLoadOverrides(
260
+ args: LocalInferenceLoadArgs,
261
+ ): BackendPlan["overrides"] {
262
+ const overrides: BackendPlan["overrides"] = {};
263
+ if (args.contextSize !== undefined) overrides.contextSize = args.contextSize;
264
+ if (args.cacheTypeK !== undefined) overrides.cacheTypeK = args.cacheTypeK;
265
+ if (args.cacheTypeV !== undefined) overrides.cacheTypeV = args.cacheTypeV;
266
+ if (args.gpuLayers !== undefined) overrides.gpuLayers = args.gpuLayers;
267
+ if (args.kvOffload !== undefined) overrides.kvOffload = args.kvOffload;
268
+ if (args.flashAttention !== undefined) {
269
+ overrides.flashAttention = args.flashAttention;
270
+ }
271
+ if (args.mmap !== undefined) overrides.mmap = args.mmap;
272
+ if (args.mlock !== undefined) overrides.mlock = args.mlock;
273
+ if (args.useGpu !== undefined) overrides.useGpu = args.useGpu;
274
+ if (args.mmprojPath !== undefined) overrides.mmprojPath = args.mmprojPath;
275
+ if (args.draftModelPath !== undefined) {
276
+ overrides.draftModelPath = args.draftModelPath;
277
+ }
278
+ if (args.modelId?.startsWith("eliza-1-")) {
279
+ const bundleRoot = path.dirname(path.dirname(args.modelPath));
280
+ overrides.bundleRoot = bundleRoot;
281
+ overrides.manifestPath = path.join(bundleRoot, "eliza-1.manifest.json");
282
+ const litertModelPath = stagedLitertModelPath(bundleRoot, args.modelId);
283
+ if (litertModelPath) overrides.litertModelPath = litertModelPath;
284
+ }
285
+ return overrides;
286
+ }
287
+
288
+ /**
289
+ * Public engine facade.
290
+ *
291
+ * Pre-existing API: `load(modelPath)`, `unload()`, `generate(args)`,
292
+ * plus the activity probes used by router/handler/active-model code. The
293
+ * implementation now sits behind the backend dispatcher; the
294
+ * shape is preserved so callers (active-model, router-handler, the agent
295
+ * runtime handler) keep working unchanged.
296
+ *
297
+ * MTP now lives in the normal optimized llama.cpp backend path. The
298
+ * dispatcher's decision tree picks `llama-cpp` when a kernel is required
299
+ * or when the catalog prefers optimized llama.cpp.
300
+ */
301
+ export class LocalInferenceEngine {
302
+ /**
303
+ * In-process FFI backend — the sole text runtime, served by the FUSED
304
+ * `libelizainference` (`desktop-fused-ffi-backend-runtime.ts`). Text gen,
305
+ * MTP speculative decoding, KV-cache quant, native tokenization,
306
+ * and vision-describe all run through the one fused lib the voice subsystem
307
+ * already loads (ABI v9). libllama has been retired: a fused lib that is
308
+ * absent or lacks the v9 capabilities is a loud `LocalInferenceUnavailable`
309
+ * error, never a silent fallback. There is no server fallback for Eliza-1.
310
+ */
311
+ private readonly ffiBackend = new FfiStreamingBackend(
312
+ desktopFusedFfiBackendRuntime,
313
+ );
314
+ private readonly dispatcher = new BackendDispatcher(
315
+ this.ffiBackend,
316
+ () => desktopFusedFfiBackendRuntime.supported(),
317
+ () => null,
318
+ );
319
+ /**
320
+ * Active voice-streaming bridge (`EngineVoiceBridge`). Only set when an
321
+ * Eliza-1 bundle has been activated AND `startVoice()` has succeeded —
322
+ * see `packages/inference/AGENTS.md` §3 + §4. The engine never lazily
323
+ * stands up a voice session: callers either start it explicitly or get
324
+ * a hard error.
325
+ */
326
+ private voiceBridge: EngineVoiceBridge | null = null;
327
+ private voiceReadyPromise: Promise<EngineVoiceBridge> | null = null;
328
+ private asrReadyPromise: Promise<EngineVoiceBridge> | null = null;
329
+
330
+ /**
331
+ * The general onload/offload coordinator (W10 / J5). One registry per
332
+ * engine: text + voice both ref-count their shared resources against it,
333
+ * and every resident model role registers an `EvictableModelRole` here so
334
+ * the `MemoryMonitor` can walk them ascending-priority under RAM pressure.
335
+ * The voice bridge gets this passed in (see `startVoice`) so it doesn't
336
+ * spin up a private one.
337
+ */
338
+ private readonly sharedResources = new SharedResourceRegistry({
339
+ logger: {
340
+ debug: (m) => console.debug(m),
341
+ warn: (m) => console.warn(m),
342
+ info: (m) => console.info(m),
343
+ },
344
+ });
345
+
346
+ /**
347
+ * RAM-pressure monitor (J2). Started when a model loads, stopped when the
348
+ * engine unloads. Evicts the lowest-priority resident role when free RAM
349
+ * crosses the low-water line.
350
+ */
351
+ private readonly memoryMonitor = new MemoryMonitor({
352
+ registry: this.sharedResources,
353
+ logger: {
354
+ info: (m) => console.info(m),
355
+ warn: (m) => console.warn(m),
356
+ debug: (m) => console.debug(m),
357
+ },
358
+ });
359
+
360
+ /** Wall-clock ms of the last `useModel`-style activity. */
361
+ private lastActivityMs = Date.now();
362
+ /** Idle-unload timer (J3); null when disabled or no model loaded. */
363
+ private idleUnloadTimer: NodeJS.Timeout | null = null;
364
+ /** Evictable text-target role id registered on `sharedResources`, or null. */
365
+ private textTargetRoleId: string | null = null;
366
+ /**
367
+ * Ids of evictable roles THIS engine registered on `sharedResources`
368
+ * (text-target today). `getResidentFootprintMb()` sums only these so the
369
+ * arbiter never double-counts its own vision/image-gen registry roles.
370
+ */
371
+ private readonly ownedEvictableRoleIds = new Set<string>();
372
+ /** Best-effort resident footprint for the active text bundle, in MiB. */
373
+ private textTargetEstimatedMb = 0;
374
+ /** Evictable drafter role id registered on `sharedResources`, or null. */
375
+
376
+ /**
377
+ * The active Eliza-1 bundle (root dir + tier id), resolved at `load()`
378
+ * from the InstalledModel path/id. `null` when the loaded model is not an
379
+ * Eliza-1 bundle (a user-installed custom). Drives bundle-relative voice
380
+ * resolution — the Kokoro TTS root and the per-tier EOT turn-detector
381
+ * revision.
382
+ */
383
+ private activeEliza1Bundle: ActiveEliza1Bundle | null = null;
384
+
385
+ /**
386
+ * The general onload/offload coordinator for this engine. Exposed so the
387
+ * voice lifecycle, the embedding route, and any other resident model role
388
+ * can register an `EvictableModelRole` against the same registry the
389
+ * `MemoryMonitor` walks under pressure.
390
+ */
391
+ getSharedResources(): SharedResourceRegistry {
392
+ return this.sharedResources;
393
+ }
394
+
395
+ /**
396
+ * Resident RAM footprint (MB) of the model weights this engine owns on the
397
+ * shared registry — the active text/embedding bundle today, plus any future
398
+ * engine-registered role (drafter, voice). This is the term `service.ts`
399
+ * feeds into the `MemoryArbiter` as `externalFootprintMb` so the arbiter's
400
+ * proactive `evictToFit` path accounts for the dominant resident consumer
401
+ * (the text target) instead of seeing only its own vision/image-gen handles
402
+ * and never tripping (#8809 AC#1). Summed by role id so it never
403
+ * double-counts the arbiter's own registry roles (vision/image-gen), which
404
+ * the arbiter already counts in its resident map.
405
+ */
406
+ getResidentFootprintMb(): number {
407
+ if (this.ownedEvictableRoleIds.size === 0) return 0;
408
+ let mb = 0;
409
+ for (const role of this.sharedResources.evictableRoles()) {
410
+ if (this.ownedEvictableRoleIds.has(role.id)) {
411
+ mb += role.estimatedResidentMb();
412
+ }
413
+ }
414
+ return mb;
415
+ }
416
+
417
+ /** The RAM-pressure monitor. Exposed for diagnostics / tests. */
418
+ getMemoryMonitor(): MemoryMonitor {
419
+ return this.memoryMonitor;
420
+ }
421
+
422
+ /** Record `useModel`-style activity so the idle-unload timer stays armed. */
423
+ private markActivity(): void {
424
+ this.lastActivityMs = Date.now();
425
+ }
426
+
427
+ /**
428
+ * Once a model is resident: register the text target as an evictable role,
429
+ * start the memory monitor, and arm the idle-unload timer. Idempotent.
430
+ */
431
+ private startBackgroundManagement(): void {
432
+ this.markActivity();
433
+ this.registerResidentRoles();
434
+ if (!this.memoryMonitor.isRunning()) this.memoryMonitor.start();
435
+ this.armIdleUnloadTimer();
436
+ }
437
+
438
+ /** Stop the memory monitor + idle timer and deregister evictable roles. */
439
+ private async stopBackgroundManagement(): Promise<void> {
440
+ if (this.idleUnloadTimer) {
441
+ clearInterval(this.idleUnloadTimer);
442
+ this.idleUnloadTimer = null;
443
+ }
444
+ this.memoryMonitor.stop();
445
+ await this.deregisterResidentRoles();
446
+ }
447
+
448
+ private registerResidentRoles(): void {
449
+ if (this.textTargetRoleId === null) {
450
+ const role = createEvictableModelRole({
451
+ role: "text-target",
452
+ estimatedMb: this.textTargetEstimatedMb,
453
+ isResident: () => this.hasLoadedModel(),
454
+ evict: async () => {
455
+ // Last thing to go. Evicting the text target = unload it; the
456
+ // next `useModel` lazy-reloads via the runtime handler.
457
+ await this.unload();
458
+ },
459
+ });
460
+ this.sharedResources.acquire(role);
461
+ this.textTargetRoleId = role.id;
462
+ this.ownedEvictableRoleIds.add(role.id);
463
+ }
464
+ }
465
+
466
+ private async deregisterResidentRoles(): Promise<void> {
467
+ const ids = [this.textTargetRoleId].filter(
468
+ (id): id is string => id !== null,
469
+ );
470
+ this.textTargetRoleId = null;
471
+ for (const id of ids) {
472
+ this.ownedEvictableRoleIds.delete(id);
473
+ try {
474
+ await this.sharedResources.release(id);
475
+ } catch {
476
+ // Already released (e.g. unload→release ran twice) — fine.
477
+ }
478
+ }
479
+ }
480
+
481
+ private armIdleUnloadTimer(): void {
482
+ if (this.idleUnloadTimer) return;
483
+ const idleMs = resolveIdleUnloadMs();
484
+ if (idleMs <= 0) return;
485
+ const timer = setInterval(() => {
486
+ if (!this.hasLoadedModel()) return;
487
+ if (Date.now() - this.lastActivityMs < idleMs) return;
488
+ console.info(
489
+ `[local-inference] No useModel activity for >${Math.round(idleMs / 1000)}s — unloading the active text model to reclaim RAM. It will reload on the next request.`,
490
+ );
491
+ void this.unload().catch((err) => {
492
+ console.warn(
493
+ `[local-inference] idle-unload failed: ${err instanceof Error ? err.message : String(err)}`,
494
+ );
495
+ });
496
+ }, IDLE_UNLOAD_CHECK_INTERVAL_MS);
497
+ timer.unref();
498
+ this.idleUnloadTimer = timer;
499
+ }
500
+
501
+ /**
502
+ * Cap on concurrent speculative voice responses (W9 / J4): derived from
503
+ * the running server's slot count (each speculative response needs a KV
504
+ * slot), never more than half of them, floored at 1. The voice
505
+ * turn-controller reads this before kicking a speculative response.
506
+ */
507
+ maxConcurrentSpeculativeResponses(): number {
508
+ return resolveMaxConcurrentSpeculativeResponses(this.activeParallel());
509
+ }
510
+
511
+ /**
512
+ * Auto-tune the running server's `--parallel` (J4): when the conversation
513
+ * high-water mark has outgrown the configured slot count AND there's RAM
514
+ * headroom for the extra KV slots, resize/restart llama.cpp with the larger
515
+ * value so new conversations get their own slot instead of thrashing.
516
+ * Returns `true` when a resize was performed. No-op when the FFI backend
517
+ * isn't loaded. Best-effort: a failed restart leaves the old `--parallel`
518
+ * in place and logs.
519
+ */
520
+ async maybeAutoResizeParallel(): Promise<boolean> {
521
+ if (this.activeBackendId() !== "llama-cpp") return false;
522
+ if (!this.dispatcher.hasLoadedModel()) return false;
523
+ const running = this.dispatcher.parallelSlots();
524
+ const recommended = conversationRegistry.recommendedParallel(running);
525
+ if (recommended <= running) return false;
526
+ // Only grow when free RAM is comfortably above the low-water line —
527
+ // adding KV slots under pressure would just trigger the monitor.
528
+ const sample = await this.memoryMonitor.sample();
529
+ if (this.memoryMonitor.isUnderPressure(sample)) {
530
+ console.warn(
531
+ `[local-inference] Conversation high-water mark wants --parallel ${recommended} (running ${running}) but RAM is tight (free ${sample.effectiveFreeMb} MB) — not resizing. Slot thrashing may occur; consider a smaller tier or more RAM.`,
532
+ );
533
+ return false;
534
+ }
535
+ try {
536
+ const resized = await this.dispatcher.resizeParallel(recommended);
537
+ if (resized) {
538
+ console.info(
539
+ `[local-inference] Resized llama.cpp --parallel ${running} → ${recommended} (conversation high-water mark grew).`,
540
+ );
541
+ }
542
+ return resized;
543
+ } catch (err) {
544
+ console.warn(
545
+ `[local-inference] --parallel resize to ${recommended} failed: ${err instanceof Error ? err.message : String(err)}`,
546
+ );
547
+ return false;
548
+ }
549
+ }
550
+
551
+ async available(): Promise<boolean> {
552
+ return this.dispatcher.available();
553
+ }
554
+
555
+ currentModelPath(): string | null {
556
+ return this.dispatcher.currentModelPath();
557
+ }
558
+
559
+ hasLoadedModel(): boolean {
560
+ return this.dispatcher.hasLoadedModel();
561
+ }
562
+
563
+ activeBackendId(): "llama-cpp" | null {
564
+ return this.dispatcher.activeBackendId();
565
+ }
566
+
567
+ currentRuntimeLoadConfig(): LocalRuntimeLoadConfig | null {
568
+ if (this.activeBackendId() !== "llama-cpp") return null;
569
+ return this.dispatcher.currentRuntimeLoadConfig();
570
+ }
571
+
572
+ private async disposeVoiceBridge(bridge: EngineVoiceBridge): Promise<void> {
573
+ try {
574
+ await bridge.disarm();
575
+ await bridge.settle();
576
+ } finally {
577
+ bridge.dispose();
578
+ if (this.voiceBridge === bridge) this.voiceBridge = null;
579
+ }
580
+ }
581
+
582
+ async unload(): Promise<void> {
583
+ // Stop the memory monitor + idle timer and deregister evictable roles
584
+ // before anything else — they reference the model that's about to go.
585
+ await this.stopBackgroundManagement();
586
+ this.activeEliza1Bundle = null;
587
+ this.textTargetEstimatedMb = 0;
588
+ const bridge = this.voiceBridge;
589
+ if (bridge) {
590
+ // Drop voice resources before tearing down text. Disarm is a
591
+ // no-op when the lifecycle is already in voice-off, so this is
592
+ // safe even if the caller never called startVoice().
593
+ await this.disposeVoiceBridge(bridge);
594
+ }
595
+ await this.dispatcher.unload();
596
+ }
597
+
598
+ async load(
599
+ modelPath: string,
600
+ resolved?: LocalInferenceLoadArgs,
601
+ ): Promise<void> {
602
+ const installed = await listInstalledModels();
603
+ const target = installed.find((m) => m.path === modelPath);
604
+ const modelId = target?.id ?? resolved?.modelId;
605
+ const catalog = modelId ? findCatalogModel(modelId) : undefined;
606
+ this.textTargetEstimatedMb = estimateTextResidentMb(target, catalog);
607
+
608
+ // Resolve the active Eliza-1 bundle (root + tier) so voice setup can
609
+ // find the bundle-relative Kokoro TTS root and the per-tier EOT
610
+ // turn-detector revision. An Eliza-1 InstalledModel carries a
611
+ // `bundleRoot` and an `eliza-1-<tier>` id. Reset on every load — a
612
+ // non-Eliza-1 model clears it.
613
+ this.activeEliza1Bundle =
614
+ resolveActiveEliza1Bundle(target) ?? resolveDirectEliza1Bundle(resolved);
615
+
616
+ // Resolved args (when provided) carry the merged catalog defaults +
617
+ // per-load overrides from the active-model coordinator. Project them
618
+ // onto the dispatcher-level overrides shape — engine.load is also
619
+ // called directly by legacy callers that pass only a `modelPath`,
620
+ // in which case `resolved` is undefined and we keep the historical
621
+ // behaviour of trusting catalog defaults inside the backend.
622
+ const overrides = resolved ? toBackendLoadOverrides(resolved) : undefined;
623
+
624
+ const plan: BackendPlan = {
625
+ modelPath,
626
+ modelId,
627
+ catalog,
628
+ overrides,
629
+ };
630
+
631
+ // The local stack is Eliza-1 only: the dispatcher routes every load to the
632
+ // fused libelizainference runtime. A load failure surfaces directly.
633
+ await this.dispatcher.load(plan);
634
+ this.startBackgroundManagement();
635
+ }
636
+
637
+ async generate(args: GenerateArgs): Promise<string> {
638
+ this.markActivity();
639
+ const streaming = this.voiceStreamingArgs(args);
640
+ const startedAt = Date.now();
641
+ const text = await this.dispatcher.generate(streaming.args);
642
+ // Decode-throughput routing signal. `generate()` returns only text, so the
643
+ // decoded-token count is approximated from the output length (the exact
644
+ // usage block is only available on the `generateInConversation` path).
645
+ recordDecodeThroughput({
646
+ tokens: estimateDecodeTokens(text),
647
+ elapsedMs: Date.now() - startedAt,
648
+ });
649
+ await streaming.finish(text);
650
+ return text;
651
+ }
652
+
653
+ /**
654
+ * Vision describe via the running llama.cpp mtmd path. Requires the FFI
655
+ * backend with an mmproj-loaded bundle. The mmproj GGUF must have been
656
+ * declared by the active catalog tier and present on disk under the
657
+ * bundle root; if not, the active backend throws.
658
+ *
659
+ * No fallback: Florence-2 / Transformers.js was the previous fallback
660
+ * and has been removed (see VISION_MIGRATION.md).
661
+ */
662
+ async describeImage(args: {
663
+ bytes: Uint8Array;
664
+ mimeType?: string;
665
+ prompt?: string;
666
+ maxTokens?: number;
667
+ temperature?: number;
668
+ signal?: AbortSignal;
669
+ /** Per-token callback for streaming vision describe (ABI v13). */
670
+ onTextChunk?: (chunk: string) => void | Promise<void>;
671
+ maxTokensPerStep?: number;
672
+ }): Promise<{
673
+ text: string;
674
+ projectorMs?: number;
675
+ decodeMs?: number;
676
+ }> {
677
+ this.markActivity();
678
+ // The dispatcher throws an actionable error if the active backend
679
+ // doesn't implement describeImage (e.g. an FFI backend without mmproj
680
+ // parity). No need for a pre-check.
681
+ return this.dispatcher.describeImage(args);
682
+ }
683
+
684
+ /** True when the active server can serve vision describe (mmproj loaded). */
685
+ canDescribeImages(): boolean {
686
+ if (this.activeBackendId() !== "llama-cpp") return false;
687
+ if (!this.dispatcher.hasLoadedModel()) return false;
688
+ return this.dispatcher.currentMmprojPath() !== null;
689
+ }
690
+
691
+ /**
692
+ * Diagnostic snapshot of an in-process JS session pool. Always null on the
693
+ * FFI runtime — its KV slots live in the native backend (C), not in a JS
694
+ * session pool. Retained so the API cache-stats panel keeps a stable shape.
695
+ */
696
+ describeSessionPool(): {
697
+ size: number;
698
+ maxSize: number;
699
+ keys: string[];
700
+ } | null {
701
+ return null;
702
+ }
703
+
704
+ /**
705
+ * Reserve a slot for a long-lived conversation. Subsequent
706
+ * `generateInConversation` calls reuse the same slot, so the prefix
707
+ * KV survives across turns regardless of hash collisions with other
708
+ * concurrent conversations.
709
+ *
710
+ * Idempotent for the same (conversationId, modelId): repeated open
711
+ * calls return the same handle. The runtime side should call this
712
+ * lazily on the first turn of a conversation and `closeConversation`
713
+ * when the chat session ends.
714
+ */
715
+ openConversation(args: {
716
+ conversationId: string;
717
+ modelId: string;
718
+ ttlMs?: number;
719
+ }): ConversationHandle {
720
+ const parallel = this.activeParallel();
721
+ const handle = conversationRegistry.open({
722
+ conversationId: args.conversationId,
723
+ modelId: args.modelId,
724
+ parallel,
725
+ ttlMs: args.ttlMs,
726
+ });
727
+ // Lazy-restore previously-persisted KV state for this conversation.
728
+ // Fire-and-forget — a missing or unreadable file just means the
729
+ // conversation cold-prefills on the next request, which is the
730
+ // pre-restore default. Only meaningful once the FFI backend is loaded.
731
+ if (this.activeBackendId() === "llama-cpp") {
732
+ void this.dispatcher
733
+ .restoreConversationKv(args.conversationId, handle.slotId)
734
+ .catch(() => {
735
+ // KV restore failures must never break the open call — the
736
+ // conversation just doesn't get its old prefix back.
737
+ });
738
+ }
739
+ return handle;
740
+ }
741
+
742
+ /**
743
+ * Run one generation pinned to a previously-opened conversation
744
+ * handle. Cache key, slot id, and (for optimized llama.cpp) kv-restore are
745
+ * all owned by the registry — callers don't need to thread them.
746
+ *
747
+ * Returns the Anthropic-shape `LocalUsageBlock` alongside the text so
748
+ * agentic callers can surface cache-hit telemetry without re-scraping
749
+ * `/metrics` themselves.
750
+ */
751
+ async generateInConversation(
752
+ handle: ConversationHandle,
753
+ args: Omit<GenerateArgs, "cacheKey">,
754
+ ): Promise<{ text: string; usage: LocalUsageBlock; slotId: number }> {
755
+ if (handle.closed) {
756
+ throw new Error(
757
+ `[local-inference] Conversation ${handle.conversationId} has been closed; reopen before generating`,
758
+ );
759
+ }
760
+ this.markActivity();
761
+ handle.lastUsedMs = Date.now();
762
+ const cacheKey = `conv:${handle.conversationId}`;
763
+ const streaming = this.voiceStreamingArgs(args);
764
+ if (this.activeBackendId() === "llama-cpp") {
765
+ const startedAt = Date.now();
766
+ const result: LocalGenerateWithUsageResult =
767
+ await this.dispatcher.generateWithUsage({
768
+ ...streaming.args,
769
+ cacheKey,
770
+ slotId: handle.slotId,
771
+ });
772
+ const elapsedMs = Date.now() - startedAt;
773
+ // Decode-throughput routing signal. Prefer the exact decoded-token count
774
+ // from the backend usage block; estimate from the text only if the
775
+ // backend omitted it.
776
+ const decodedTokens = Number(result.usage?.completion_tokens);
777
+ recordDecodeThroughput({
778
+ tokens:
779
+ Number.isFinite(decodedTokens) && decodedTokens > 0
780
+ ? decodedTokens
781
+ : estimateDecodeTokens(result.text),
782
+ elapsedMs,
783
+ });
784
+ await streaming.finish(result.text);
785
+ return {
786
+ text: result.text,
787
+ usage: {
788
+ input_tokens: Number(result.usage?.prompt_tokens ?? 0),
789
+ output_tokens: Number(result.usage?.completion_tokens ?? 0),
790
+ cache_creation_input_tokens: 0,
791
+ cache_read_input_tokens: 0,
792
+ ...(result.mtpStats
793
+ ? {
794
+ mtp_drafted_tokens: result.mtpStats.drafted,
795
+ mtp_accepted_tokens: result.mtpStats.accepted,
796
+ mtp_acceptance_rate:
797
+ result.mtpStats.acceptanceRate ?? undefined,
798
+ }
799
+ : {}),
800
+ },
801
+ slotId: result.slotId ?? handle.slotId,
802
+ };
803
+ }
804
+ // No FFI backend loaded yet: forward via the dispatcher (which throws an
805
+ // actionable "no backend loaded" error) and synthesize a zero-counter
806
+ // usage block for the shape.
807
+ const text = await this.dispatcher.generate({
808
+ ...streaming.args,
809
+ cacheKey,
810
+ });
811
+ await streaming.finish(text);
812
+ return {
813
+ text,
814
+ usage: {
815
+ input_tokens: 0,
816
+ output_tokens: 0,
817
+ cache_creation_input_tokens: 0,
818
+ cache_read_input_tokens: 0,
819
+ },
820
+ slotId: handle.slotId,
821
+ };
822
+ }
823
+
824
+ /**
825
+ * KV-prefill a conversation's pinned slot with a known prompt prefix
826
+ * (system prompt + provider context + tool/action schema block + the
827
+ * assistant-turn start), before the real request lands. This is item I1 /
828
+ * C1 of the voice swarm — fire it the moment a message arrives / STT
829
+ * starts so the response-handler prompt is already in the slot's KV when
830
+ * the user's tokens are appended.
831
+ *
832
+ * `conversationOrId` may be a `ConversationHandle` (preferred — pins to
833
+ * the handle's slot) or a raw conversation id (a handle is opened on the
834
+ * fly so the slot derivation matches the real request). Idempotent /
835
+ * cheap to call repeatedly: `cache_prompt: true` reuses the prefix so a
836
+ * second call is a no-op forward pass. Only meaningful once the FFI
837
+ * backend is loaded — returns false otherwise. Returns true when a
838
+ * pre-warm request was issued.
839
+ */
840
+ async prewarmConversation(
841
+ conversationOrId: ConversationHandle | string,
842
+ promptPrefix: string,
843
+ opts: { modelId?: string } = {},
844
+ ): Promise<boolean> {
845
+ if (this.activeBackendId() !== "llama-cpp") return false;
846
+ this.markActivity();
847
+ let slotId: number;
848
+ let cacheKey: string;
849
+ if (typeof conversationOrId === "string") {
850
+ const modelId =
851
+ opts.modelId ?? this.currentModelPath() ?? "default-local-model";
852
+ const handle =
853
+ this.conversation(conversationOrId, modelId) ??
854
+ this.openConversation({ conversationId: conversationOrId, modelId });
855
+ slotId = handle.slotId;
856
+ cacheKey = `conv:${handle.conversationId}`;
857
+ } else {
858
+ if (conversationOrId.closed) return false;
859
+ slotId = conversationOrId.slotId;
860
+ cacheKey = `conv:${conversationOrId.conversationId}`;
861
+ }
862
+ return this.dispatcher.prewarmConversation(promptPrefix, {
863
+ slotId,
864
+ cacheKey,
865
+ });
866
+ }
867
+
868
+ /**
869
+ * Close + drop a conversation handle. Persists the final KV state to
870
+ * disk so a later open with the same id can lazy-restore. Idempotent;
871
+ * closing an unknown id is a no-op.
872
+ */
873
+ async closeConversation(handle: ConversationHandle): Promise<void> {
874
+ if (handle.closed) return;
875
+ if (this.activeBackendId() === "llama-cpp") {
876
+ // Snapshot KV before deregistering so the slot id is still valid.
877
+ await this.dispatcher
878
+ .persistConversationKv(handle.conversationId, handle.slotId)
879
+ .catch(() => {
880
+ // A failed save must not block close — the slot will fall back
881
+ // to the in-RAM-only path on next open.
882
+ });
883
+ }
884
+ conversationRegistry.close(handle.conversationId, handle.modelId);
885
+ }
886
+
887
+ /**
888
+ * Read-side accessor for the conversation registry. The runtime handler
889
+ * uses this to look up an existing handle before opening a new one,
890
+ * avoiding the need to thread a handle through every layer.
891
+ */
892
+ conversation(
893
+ conversationId: string,
894
+ modelId: string,
895
+ ): ConversationHandle | null {
896
+ return conversationRegistry.get(conversationId, modelId);
897
+ }
898
+
899
+ /**
900
+ * Largest concurrent open-conversation count seen this process lifetime.
901
+ * The auto-tune-parallel path consults this and warns when it exceeds
902
+ * the running server's slot count.
903
+ */
904
+ conversationHighWaterMark(): number {
905
+ return conversationRegistry.highWater();
906
+ }
907
+
908
+ /**
909
+ * Recommended `--parallel` value given the current conversation
910
+ * high-water mark plus a small headroom (max(2, 25%)), never below the
911
+ * running slot count. Delegates to `ConversationRegistry.recommendedParallel`
912
+ * so the math lives in one place. When this exceeds `parallelSlots()` the
913
+ * engine can grow the running server (`maybeAutoResizeParallel`).
914
+ */
915
+ recommendedParallel(): number {
916
+ return conversationRegistry.recommendedParallel(this.activeParallel());
917
+ }
918
+
919
+ /**
920
+ * Emit a one-line warning when the running `--parallel` slot count is
921
+ * below the recommended value (high-water mark + headroom). Returns true
922
+ * when a warning was emitted. No-op when the FFI backend isn't loaded.
923
+ * The actual resize is `maybeAutoResizeParallel()`
924
+ * — kept separate from this hot-path check so a `useModel` call never
925
+ * blocks on (or is interrupted by) a server restart; the auto-resize is
926
+ * opted into via `ELIZA_LOCAL_AUTO_RESIZE_PARALLEL=1`, in which case this
927
+ * also kicks one off fire-and-forget.
928
+ */
929
+ warnIfParallelTooLow(logger?: { warn: (msg: string) => void }): boolean {
930
+ if (this.activeBackendId() !== "llama-cpp") return false;
931
+ const actual = this.dispatcher.parallelSlots();
932
+ const recommended = conversationRegistry.recommendedParallel(actual);
933
+ if (recommended <= actual) return false;
934
+ const message = `[local-inference] Conversation high-water mark (${conversationRegistry.highWater()}) exceeds running --parallel ${actual}. Recommended: ${recommended}. Restart llama.cpp with ELIZA_LOCAL_PARALLEL=${recommended} or higher (or set ELIZA_LOCAL_AUTO_RESIZE_PARALLEL=1) to avoid slot thrashing.`;
935
+ if (logger?.warn) {
936
+ logger.warn(message);
937
+ } else {
938
+ console.warn(message);
939
+ }
940
+ if (process.env.ELIZA_LOCAL_AUTO_RESIZE_PARALLEL === "1") {
941
+ void this.maybeAutoResizeParallel().catch(() => {
942
+ // Best-effort; the warning above already told the operator what to do.
943
+ });
944
+ }
945
+ return true;
946
+ }
947
+
948
+ /**
949
+ * Start the voice-streaming pipeline against an already-activated
950
+ * Eliza-1 bundle. Per AGENTS.md §3, voice is mandatory for Eliza-1
951
+ * tiers — every required artifact (speaker preset, fused FFI when
952
+ * `useFfiBackend`, bundle root) is checked up front and missing
953
+ * pieces surface as `VoiceStartupError`. There is no silent fallback
954
+ * to text-only, no log-and-continue.
955
+ *
956
+ * Idempotent guard: starting twice without `stopVoice()` between
957
+ * surfaces a hard error so callers do not double-allocate the
958
+ * scheduler.
959
+ */
960
+ startVoice(opts: EngineVoiceBridgeOptions): EngineVoiceBridge {
961
+ if (this.voiceBridge) {
962
+ throw new VoiceStartupError(
963
+ "already-started",
964
+ "[voice] Voice session is already active. Call stopVoice() before starting a new one.",
965
+ );
966
+ }
967
+ if (opts.useFfiBackend && bundleHasAsrModelFiles(opts.bundleRoot)) {
968
+ const blockers = readBundleAsrProvenanceBlockers(opts.bundleRoot);
969
+ if (blockers.length > 0) {
970
+ throw new VoiceStartupError(
971
+ "blocked-asr-provenance",
972
+ `[voice] Cannot start fused local voice: ${blockers.join("; ")}`,
973
+ );
974
+ }
975
+ }
976
+ // Pass the engine's shared-resource registry through so voice ref-counts
977
+ // against the same canonical resources as text and the `MemoryMonitor`
978
+ // sees voice's evictable roles too. The engine's registry is canonical —
979
+ // callers don't get to substitute their own.
980
+ this.voiceBridge = EngineVoiceBridge.start({
981
+ ...opts,
982
+ sharedResources: this.sharedResources,
983
+ });
984
+ return this.voiceBridge;
985
+ }
986
+
987
+ /**
988
+ * True when a voice session is currently active on the engine. Callers
989
+ * use this to decide whether to lazy-start one (e.g. the TTS model
990
+ * handler in `ensure-local-inference-handler.ts`, which auto-starts a
991
+ * Kokoro-only bridge on the first TEXT_TO_SPEECH invocation when the
992
+ * Kokoro artifacts are on disk and no Eliza-1 bundle has activated).
993
+ */
994
+ hasActiveVoiceBridge(): boolean {
995
+ return this.voiceBridge !== null;
996
+ }
997
+
998
+ /**
999
+ * Arm the voice lifecycle on the active bridge — lazily loads the TTS
1000
+ * mmap region, optional ASR region when present, voice caches, and
1001
+ * voice scheduler nodes via the shared resource registry. Throws
1002
+ * `VoiceLifecycleError` if any
1003
+ * required artifact is unavailable (RAM pressure, mmap fail, kernel
1004
+ * missing) — see `voice/lifecycle.ts` for the structured codes.
1005
+ *
1006
+ * Required before sustained voice use; `startVoice()` only stands up
1007
+ * the cold scheduler and bridge. Splitting setup from arming lets
1008
+ * the engine keep the voice surface in voice-off (no heavy weights
1009
+ * mapped) until the user actually toggles voice on.
1010
+ */
1011
+ async armVoice(): Promise<void> {
1012
+ const bridge = this.voiceBridge;
1013
+ if (!bridge) {
1014
+ throw new VoiceStartupError(
1015
+ "not-started",
1016
+ "[voice] Cannot arm: no voice session active. Call startVoice() first.",
1017
+ );
1018
+ }
1019
+ await bridge.arm();
1020
+ }
1021
+
1022
+ /**
1023
+ * Lazily start + arm voice for the active Eliza-1 bundle. Runtime model
1024
+ * handlers use this when visible chat text needs local speech output; direct
1025
+ * engine callers still use `startVoice()` / `armVoice()` explicitly when they
1026
+ * need custom sinks or test backends.
1027
+ */
1028
+ async ensureActiveBundleVoiceReady(): Promise<EngineVoiceBridge> {
1029
+ if (this.voiceReadyPromise) return this.voiceReadyPromise;
1030
+ this.voiceReadyPromise = this.ensureActiveBundleVoiceReadyOnce();
1031
+ try {
1032
+ return await this.voiceReadyPromise;
1033
+ } finally {
1034
+ this.voiceReadyPromise = null;
1035
+ }
1036
+ }
1037
+
1038
+ private async activateAssignedBundleForVoice(): Promise<void> {
1039
+ if (this.activeEliza1Bundle || this.dispatcher.hasLoadedModel()) return;
1040
+ try {
1041
+ const assignments = await readEffectiveAssignments();
1042
+ const textModelId = assignments.TEXT_LARGE ?? assignments.TEXT_SMALL;
1043
+ if (!textModelId) return;
1044
+ const installed = await listInstalledModels();
1045
+ const target = installed.find((m) => m.id === textModelId);
1046
+ if (!target) return;
1047
+ logger.info(
1048
+ `[voice] Pre-loading text model ${textModelId} to activate Eliza-1 bundle for voice`,
1049
+ );
1050
+ await this.load(target.path);
1051
+ } catch (err) {
1052
+ logger.warn(
1053
+ `[voice] Failed to pre-load text model for bundle activation: ${
1054
+ err instanceof Error ? err.message : String(err)
1055
+ }`,
1056
+ );
1057
+ }
1058
+ }
1059
+
1060
+ private localAsrBlockersForBundle(bundle: ActiveEliza1Bundle): string[] {
1061
+ const blockers: string[] = [];
1062
+ if (!bundleHasAsrModelFiles(bundle.root)) {
1063
+ blockers.push(
1064
+ `files.asr: no ASR model artifacts are staged under ${path.join(
1065
+ bundle.root,
1066
+ "asr",
1067
+ )}`,
1068
+ );
1069
+ }
1070
+ blockers.push(...readBundleAsrProvenanceBlockers(bundle.root));
1071
+ return blockers;
1072
+ }
1073
+
1074
+ private assertLocalAsrEligible(bundle: ActiveEliza1Bundle): void {
1075
+ const blockers = this.localAsrBlockersForBundle(bundle);
1076
+ if (blockers.length === 0) return;
1077
+ const code = blockers.some((blocker) => blocker.startsWith("files.asr:"))
1078
+ ? "missing-asr-model"
1079
+ : "blocked-asr-provenance";
1080
+ throw new VoiceStartupError(
1081
+ code,
1082
+ `[voice] Cannot start local Gemma ASR for ${bundle.tierId}: ${blockers.join("; ")}`,
1083
+ );
1084
+ }
1085
+
1086
+ private async assignedLocalAsrBundle(): Promise<ActiveEliza1Bundle | null> {
1087
+ if (this.activeEliza1Bundle) return this.activeEliza1Bundle;
1088
+ const assignments = await readEffectiveAssignments();
1089
+ const modelId =
1090
+ assignments.TRANSCRIPTION ??
1091
+ assignments.TEXT_LARGE ??
1092
+ assignments.TEXT_SMALL;
1093
+ if (!modelId) return null;
1094
+ const installed = await listInstalledModels();
1095
+ const target = installed.find((m) => m.id === modelId);
1096
+ return resolveActiveEliza1Bundle(target);
1097
+ }
1098
+
1099
+ async canTranscribeLocally(): Promise<boolean> {
1100
+ try {
1101
+ const bridge = this.voiceBridge;
1102
+ if (bridge?.asrAvailable) return true;
1103
+ const bundle = await this.assignedLocalAsrBundle();
1104
+ return (
1105
+ bundle !== null && this.localAsrBlockersForBundle(bundle).length === 0
1106
+ );
1107
+ } catch (err) {
1108
+ logger.warn(
1109
+ `[voice] Local ASR readiness check failed: ${
1110
+ err instanceof Error ? err.message : String(err)
1111
+ }`,
1112
+ );
1113
+ return false;
1114
+ }
1115
+ }
1116
+
1117
+ async ensureActiveBundleAsrReady(): Promise<EngineVoiceBridge> {
1118
+ if (this.asrReadyPromise) return this.asrReadyPromise;
1119
+ this.asrReadyPromise = this.ensureActiveBundleAsrReadyOnce();
1120
+ try {
1121
+ return await this.asrReadyPromise;
1122
+ } finally {
1123
+ this.asrReadyPromise = null;
1124
+ }
1125
+ }
1126
+
1127
+ private async ensureActiveBundleAsrReadyOnce(): Promise<EngineVoiceBridge> {
1128
+ await this.activateAssignedBundleForVoice();
1129
+ const bundle = this.activeEliza1Bundle;
1130
+ if (!bundle) {
1131
+ throw new VoiceStartupError(
1132
+ "missing-bundle-root",
1133
+ "[voice] Cannot start local ASR: no active Eliza-1 bundle is loaded. Install and activate a Gemma ASR-capable Eliza-1 bundle.",
1134
+ );
1135
+ }
1136
+ this.assertLocalAsrEligible(bundle);
1137
+
1138
+ let bridge = this.voiceBridge;
1139
+ if (bridge?.asrAvailable) {
1140
+ await bridge.arm();
1141
+ return bridge;
1142
+ }
1143
+ if (bridge) {
1144
+ await this.disposeVoiceBridge(bridge);
1145
+ }
1146
+
1147
+ const bundleKokoroRoot = path.join(bundle.root, "tts", "kokoro");
1148
+ const kokoro =
1149
+ resolveKokoroEngineConfig(bundleKokoroRoot) ??
1150
+ resolveKokoroEngineConfig();
1151
+ const kokoroOverrides = kokoro
1152
+ ? {
1153
+ ttsBackendOverride: createKokoroTtsBackend(kokoro, {
1154
+ bundleRoot: bundle.root,
1155
+ }),
1156
+ speakerPresetOverride: createKokoroSpeakerPreset(kokoro),
1157
+ }
1158
+ : {};
1159
+ bridge = this.startVoice({
1160
+ bundleRoot: bundle.root,
1161
+ useFfiBackend: true,
1162
+ ...kokoroOverrides,
1163
+ });
1164
+ await bridge.arm();
1165
+ return bridge;
1166
+ }
1167
+
1168
+ private async ensureActiveBundleVoiceReadyOnce(): Promise<EngineVoiceBridge> {
1169
+ let bridge = this.voiceBridge;
1170
+ if (!bridge) {
1171
+ // If no text model is loaded yet, try to load the assigned one so
1172
+ // the Eliza-1 bundle activates before voice needs it. This covers
1173
+ // the boot-time warmup race where TTS fires before any text request.
1174
+ await this.activateAssignedBundleForVoice();
1175
+ const bundle = this.activeEliza1Bundle;
1176
+ if (bundle) {
1177
+ const bundleKokoroRoot = path.join(bundle.root, "tts", "kokoro");
1178
+ const kokoro =
1179
+ resolveKokoroEngineConfig(bundleKokoroRoot) ??
1180
+ resolveKokoroEngineConfig();
1181
+ const mode = readVoiceBackendModeFromEnv();
1182
+ const decision = selectVoiceBackend({
1183
+ mode,
1184
+ mobile: isMobilePlatform(),
1185
+ kokoroAvailable: kokoro !== null,
1186
+ });
1187
+ logger.info(
1188
+ `[voice] Selected ${decision.backend} backend for ${bundle.tierId}: ${decision.reason}`,
1189
+ );
1190
+ if (!kokoro) {
1191
+ throw new VoiceStartupError(
1192
+ "missing-bundle-root",
1193
+ "[voice] Kokoro was selected but its model artifacts are not staged under <stateDir>/local-inference/models/kokoro/.",
1194
+ );
1195
+ }
1196
+ bridge = this.startVoice({
1197
+ bundleRoot: "",
1198
+ useFfiBackend: false,
1199
+ kokoroOnly: kokoro,
1200
+ });
1201
+ } else {
1202
+ // No Eliza-1 bundle. Fall back to the Kokoro-only path when its
1203
+ // artifacts are staged. No silent degrade: when both are absent
1204
+ // the error names both staging options.
1205
+ const kokoro = resolveKokoroEngineConfig();
1206
+ if (!kokoro) {
1207
+ throw new VoiceStartupError(
1208
+ "missing-bundle-root",
1209
+ "[voice] Cannot start local voice: no active Eliza-1 bundle is loaded and no Kokoro artifacts are staged under <stateDir>/local-inference/models/kokoro/. Install an Eliza-1 bundle, or stage the Kokoro ONNX + at least one voice .bin to enable local TTS.",
1210
+ );
1211
+ }
1212
+ bridge = this.startVoice({
1213
+ bundleRoot: "",
1214
+ useFfiBackend: false,
1215
+ kokoroOnly: kokoro,
1216
+ });
1217
+ }
1218
+ }
1219
+ await bridge.arm();
1220
+ return bridge;
1221
+ }
1222
+
1223
+ /**
1224
+ * Assemble + run the full live voice loop on top of `startVoice()` /
1225
+ * `armVoice()`: mic → (`pipeMicToRingBuffer` + `VadDetector.pushFrame`)
1226
+ * per frame → `StreamingTranscriber.feed` (VAD-gated) → `VoiceTurnController`
1227
+ * (speculative-on-pause, abort-on-resume, finalize/promote, barge-in) →
1228
+ * `VoiceScheduler` → TTS → audio sink.
1229
+ *
1230
+ * Gated behind a complete real backend chain (AGENTS.md §3 — no silent
1231
+ * backend-mode "voice"):
1232
+ * - a `MicSource` (caller-supplied, or `DesktopMicSource` under Electrobun),
1233
+ * - a Silero v5 GGML VAD (caller-supplied detector, or `createSileroVadDetector()` — runs through libelizainference's native VAD ABI),
1234
+ * - a working ASR (the bridge's `createStreamingTranscriber` throws
1235
+ * `AsrUnavailableError` when the fused decoder is unavailable — the
1236
+ * fused build is the sole on-device ASR runtime),
1237
+ * - a real OmniVoice TTS backend on the bridge (the `StubOmniVoiceBackend`
1238
+ * is rejected — it emits zeros).
1239
+ * Any missing piece fails loudly with the specific component named.
1240
+ *
1241
+ * `prewarm` defaults to `this.prewarmConversation(roomId, "")` (best-effort
1242
+ * KV-prefill); a caller with the response-handler stable prefix (W6) should
1243
+ * pass its own. `generate` is required — it builds the message and runs the
1244
+ * runtime turn (streaming `replyText` into TTS via this engine's
1245
+ * `generate({ onTextChunk })`, which routes through the voice scheduler).
1246
+ */
1247
+ async startVoiceSession(opts: {
1248
+ roomId: string;
1249
+ /** Mic source. Defaults to a `DesktopMicSource` (Electrobun). */
1250
+ micSource?: import("./voice/types").MicSource;
1251
+ /** VAD detector. Defaults to `createSileroVadDetector()`. */
1252
+ vad?: import("./voice/vad").VadDetector;
1253
+ /** Run one turn: build the message + stream `replyText` into TTS. Required. */
1254
+ generate: (
1255
+ request: import("./voice/turn-controller").VoiceGenerateRequest,
1256
+ ) => Promise<import("./voice/turn-controller").VoiceTurnOutcome>;
1257
+ /**
1258
+ * Semantic turn detector layered with VAD/STT. Defaults to the local
1259
+ * LiveKit ONNX model when installed, otherwise the deterministic heuristic.
1260
+ * Pass `false` only for tests/manual troubleshooting.
1261
+ */
1262
+ turnDetector?: import("./voice/eot-classifier").EotClassifier | false;
1263
+ /** Optional local LiveKit turn-detector directory override. */
1264
+ turnDetectorModelDir?: string;
1265
+ /**
1266
+ * Use the already-loaded eliza-1 text model as the EOT classifier — see
1267
+ * `voice/eliza1-eot-scorer.ts`. When set, the runtime skips the
1268
+ * separate LiveKit/Turnsense ONNX and reads P(`<end_of_turn>`) directly
1269
+ * off the live model.
1270
+ *
1271
+ * `"auto"` (default): use eliza-1 EOT when `ELIZA_VOICE_EOT_BACKEND=eliza-1`
1272
+ * or when no bundled LiveKit ONNX is resolvable; otherwise fall
1273
+ * through to the existing LiveKit path. `true` forces eliza-1 EOT
1274
+ * (throws if the active backend is not in-process). `false` forces
1275
+ * the historical LiveKit path.
1276
+ */
1277
+ useEliza1Eot?: boolean | "auto";
1278
+ /**
1279
+ * Optional path to a fine-tuned EOT LoRA adapter to layer on top of
1280
+ * the drafter at scoring time. The training recipe lives in
1281
+ * `packages/training/scripts/turn_detector/`.
1282
+ */
1283
+ eliza1EotLoraPath?: string;
1284
+ /** KV-prefill / response-handler-prefix prewarm. Defaults to `prewarmConversation`. */
1285
+ prewarm?: (roomId: string) => void | Promise<void>;
1286
+ speculatePauseMs?: number;
1287
+ events?: import("./voice/turn-controller").VoiceTurnControllerEvents;
1288
+ /**
1289
+ * Opt-in openWakeWord hotword gate (local mode only — the
1290
+ * local-inference engine never runs in cloud mode, and the connector
1291
+ * UI hides this surface there per AGENTS.md §5 hide-not-disable).
1292
+ * Disabled by default: voice mode works push-to-talk / VAD-gated
1293
+ * without it. When `enabled` and the bundle ships the openWakeWord
1294
+ * graphs, mic frames are also fanned into an `OpenWakeWordDetector`;
1295
+ * each fresh detection prewarms the conversation and calls `onWake`
1296
+ * (the same place a push-to-talk press would arm a listening window).
1297
+ * Silently inert when the bundle has no wake-word model.
1298
+ */
1299
+ wakeWord?: {
1300
+ enabled: boolean;
1301
+ /** Wake phrase head name (defaults to the bundle's `hey-eliza`). */
1302
+ head?: string;
1303
+ /** P(wake) firing threshold (openWakeWord default ~0.5). */
1304
+ threshold?: number;
1305
+ /** Called once per detected utterance (refractory-debounced). */
1306
+ onWake?: () => void;
1307
+ };
1308
+ /**
1309
+ * Runtime reference for cancellation coordination (W3-9 F1).
1310
+ *
1311
+ * @deprecated G5.d: pass `runtime` to `startVoice()` (the
1312
+ * `EngineVoiceBridgeOptions`) instead. The bridge is the canonical
1313
+ * owner of `VoiceCancellationCoordinator` + `OptimisticGenerationPolicy`,
1314
+ * and `startVoiceSession()` now delegates to the bridge's coordinator.
1315
+ * When this field is supplied here without a matching bridge-level
1316
+ * runtime, `startVoiceSession()` logs once and ignores it — the
1317
+ * canonical wiring lives on the bridge.
1318
+ */
1319
+ runtime?: CoordinatorRuntime;
1320
+ }): Promise<import("./voice/turn-controller").VoiceTurnController> {
1321
+ const bridge = this.requireVoiceBridge("start a voice session");
1322
+ if (bridge.lifecycle.current().kind !== "voice-on") {
1323
+ throw new VoiceStartupError(
1324
+ "not-started",
1325
+ "[voice] Cannot start a voice session: voice lifecycle is not armed. Call armVoice() first.",
1326
+ );
1327
+ }
1328
+ const backendId = (bridge.backend as { id?: string }).id;
1329
+ if (backendId === "stub") {
1330
+ throw new VoiceStartupError(
1331
+ "missing-fused-build",
1332
+ "[voice] Cannot start a live voice session on the StubOmniVoiceBackend (it emits silence). Start the bridge with useFfiBackend:true or a real backendOverride.",
1333
+ );
1334
+ }
1335
+
1336
+ const [
1337
+ { DesktopMicSource, pipeMicToRingBuffer },
1338
+ vadMod,
1339
+ { VoiceTurnController },
1340
+ { InMemoryAudioSink },
1341
+ eotMod,
1342
+ eotGgmlMod,
1343
+ ] = await Promise.all([
1344
+ import("./voice/mic-source"),
1345
+ import("./voice/vad"),
1346
+ import("./voice/turn-controller"),
1347
+ import("./voice/ring-buffer"),
1348
+ import("./voice/eot-classifier"),
1349
+ import("./voice/eot-classifier-ggml"),
1350
+ ]);
1351
+
1352
+ const micSource = opts.micSource ?? new DesktopMicSource();
1353
+ const vad =
1354
+ opts.vad ??
1355
+ (await vadMod.createSileroVadDetector({
1356
+ bundleRoot: bridge.bundlePath(),
1357
+ ffi: bridge.ffi,
1358
+ ctx: bridge.ffi
1359
+ ? () => {
1360
+ const ctx = bridge.ffiCtx;
1361
+ if (ctx === null) {
1362
+ throw new VoiceStartupError(
1363
+ "missing-ffi",
1364
+ "[voice] Cannot initialize native VAD: fused FFI context is not loaded.",
1365
+ );
1366
+ }
1367
+ return ctx;
1368
+ }
1369
+ : undefined,
1370
+ }));
1371
+
1372
+ // ASR — throws `AsrUnavailableError` when the fused decoder is
1373
+ // unavailable (the fused build is the sole on-device ASR runtime). Gated
1374
+ // on the VAD so silent frames aren't decoded.
1375
+ const transcriber = bridge.createStreamingTranscriber({ vad });
1376
+ // Voice Wave 2 (2026-05-14): tier-aware turn-detector revision selection.
1377
+ // `2b` (the entry tier) ships the ~66 MB EN-only SmolLM2-135M distill
1378
+ // (`v1.2.2-en`); `4b`+ ship the ~396 MB multilingual pruned
1379
+ // semantic detector (`v0.4.1-intl`). The on-disk layout is per-tier so the
1380
+ // bundle dir already contains the matching ONNX — `revision` here is a
1381
+ // telemetry label (the upstream tag the bundle was staged from). When no
1382
+ // active bundle is resolvable we omit the hint and the resolver falls
1383
+ // back to the upstream-default filename order.
1384
+ const activeTier = this.activeEliza1Bundle?.tierId;
1385
+ const tierRevision = activeTier
1386
+ ? eotMod.turnDetectorRevisionForTier(activeTier)
1387
+ : undefined;
1388
+ const eliza1EotSelected = resolveEliza1EotSelection(
1389
+ opts.useEliza1Eot,
1390
+ opts.eliza1EotLoraPath,
1391
+ );
1392
+ const eliza1EotClassifier =
1393
+ eliza1EotSelected !== "off" && opts.turnDetector !== false
1394
+ ? this.tryBuildEliza1EotClassifier(
1395
+ eliza1EotSelected,
1396
+ opts.eliza1EotLoraPath,
1397
+ )
1398
+ : null;
1399
+ if (eliza1EotSelected === "force" && !eliza1EotClassifier) {
1400
+ throw new VoiceStartupError(
1401
+ "missing-turn-detector",
1402
+ "[voice] useEliza1Eot:true requested but the in-process Eliza-1 EOT scorer is unavailable on the FFI runtime — use the GGUF turn detector by setting useEliza1Eot:false.",
1403
+ );
1404
+ }
1405
+ // Fused end-of-turn scorer (ABI v11): the model-based turn detector now
1406
+ // runs in-process through libelizainference — a composite of the fused
1407
+ // semantic scorer (P(<end_of_turn>) over the loaded text model) and the
1408
+ // heuristic syntactic co-signal. Built only when the loaded fused build
1409
+ // wires the v11 EOT symbol; null on a pre-v11 library, in which case the
1410
+ // resolver falls through to the heuristic-only classifier.
1411
+ const bridgeFfi = bridge.ffi;
1412
+ const fusedEot =
1413
+ opts.turnDetector === false || !bridgeFfi
1414
+ ? null
1415
+ : eotMod.tryBuildFusedEotClassifier({
1416
+ ffi: bridgeFfi,
1417
+ getContext: () => {
1418
+ const ctx = bridge.ffiCtx;
1419
+ if (ctx === null) {
1420
+ throw new VoiceStartupError(
1421
+ "missing-ffi",
1422
+ "[voice] Cannot initialize fused EOT scorer: FFI context is not loaded.",
1423
+ );
1424
+ }
1425
+ return ctx;
1426
+ },
1427
+ });
1428
+ // Resolver order: prefer the fused composite EOT (v11), then the legacy
1429
+ // in-process Eliza-1 scorer + GGUF turn detector (both null on the FFI
1430
+ // runtime — they needed node-llama controlledEvaluate), then the
1431
+ // heuristic. The ONNX path was removed.
1432
+ const ggmlTurnDetector =
1433
+ opts.turnDetector === false || fusedEot
1434
+ ? undefined
1435
+ : await eotGgmlMod
1436
+ .createBundledLiveKitGgmlTurnDetector({
1437
+ ...(opts.turnDetectorModelDir
1438
+ ? { modelDir: opts.turnDetectorModelDir }
1439
+ : {}),
1440
+ ...(tierRevision ? { revision: tierRevision } : {}),
1441
+ })
1442
+ .catch(() => null);
1443
+ const turnDetector =
1444
+ opts.turnDetector === false
1445
+ ? undefined
1446
+ : (opts.turnDetector ??
1447
+ fusedEot ??
1448
+ eliza1EotClassifier ??
1449
+ ggmlTurnDetector ??
1450
+ new eotMod.HeuristicEotClassifier());
1451
+ if (turnDetector) {
1452
+ try {
1453
+ // Warm one short pass while the session is arming, so the first
1454
+ // real user pause does not pay model-load latency.
1455
+ await turnDetector.score("yes");
1456
+ } catch (err) {
1457
+ throw new VoiceStartupError(
1458
+ "missing-turn-detector",
1459
+ `[voice] Cannot initialize semantic turn detector: ${err instanceof Error ? err.message : String(err)}`,
1460
+ );
1461
+ }
1462
+ }
1463
+
1464
+ // G5.d (Gauntlet cleanup): delegate to the bridge's canonical
1465
+ // VoiceCancellationCoordinator. The bridge is the single owner — it
1466
+ // constructs the coordinator + policy at `EngineVoiceBridge.start()`
1467
+ // when `runtime` is passed in `EngineVoiceBridgeOptions` (see
1468
+ // `engine-bridge.ts buildCancellationWiring`). Earlier C0-F wiring
1469
+ // built a separate coordinator here; that path is removed.
1470
+ //
1471
+ // Back-compat: when callers still pass `opts.runtime` to
1472
+ // `startVoiceSession()` but did not pass `runtime` to `startVoice()`,
1473
+ // the bridge has no coordinator. We log once and proceed — the
1474
+ // caller-supplied runtime is ignored because the bridge owns the
1475
+ // FFI context that the coordinator targets.
1476
+ if (opts.runtime && !bridge.cancellationCoordinatorOrNull()) {
1477
+ console.warn(
1478
+ "[voice] startVoiceSession({ runtime }) supplied but the bridge has no canonical cancellation coordinator — pass `runtime` to startVoice() instead. Ignoring the session-level runtime.",
1479
+ );
1480
+ }
1481
+
1482
+ const controller = new VoiceTurnController(
1483
+ {
1484
+ vad,
1485
+ transcriber,
1486
+ scheduler: bridge.scheduler,
1487
+ ...(turnDetector ? { turnDetector } : {}),
1488
+ prewarm:
1489
+ opts.prewarm ??
1490
+ ((roomId: string) => {
1491
+ void this.prewarmConversation(roomId, "");
1492
+ }),
1493
+ playFirstAudioFiller: () => this.playFirstAudioFiller(),
1494
+ generate: opts.generate,
1495
+ },
1496
+ {
1497
+ roomId: opts.roomId,
1498
+ ...(opts.speculatePauseMs !== undefined
1499
+ ? { speculatePauseMs: opts.speculatePauseMs }
1500
+ : {}),
1501
+ },
1502
+ opts.events ?? {},
1503
+ );
1504
+
1505
+ // Bind the bridge's BargeInController into the bridge's canonical
1506
+ // coordinator (G5.d). No-op when the bridge was constructed without a
1507
+ // runtime — returns a no-op unsubscribe so the teardown path stays
1508
+ // branchless.
1509
+ const unsubCoordinator = bridge.bindBargeInControllerForRoom(opts.roomId);
1510
+
1511
+ // Mic → ring buffer (the buffer the ASR / instrumentation can read from)
1512
+ // + per-frame fan-out to the VAD and the streaming transcriber.
1513
+ const { unsubscribe: stopMicRing } = pipeMicToRingBuffer(
1514
+ micSource,
1515
+ new InMemoryAudioSink(),
1516
+ );
1517
+ // Optional openWakeWord hotword gate (opt-in, local mode). Resolved
1518
+ // against the active bundle; absent graphs → silently no wake word.
1519
+ let wakeWord: import("./voice/wake-word").OpenWakeWordDetector | null =
1520
+ null;
1521
+ let feedWakeFrame: ((pcm: Float32Array) => void) | null = null;
1522
+ if (opts.wakeWord?.enabled) {
1523
+ const {
1524
+ isPlaceholderWakeWordHead,
1525
+ loadBundledWakeWordModel,
1526
+ OPENWAKEWORD_DEFAULT_HEAD,
1527
+ OpenWakeWordDetector,
1528
+ } = await import("./voice/wake-word");
1529
+ const headName = opts.wakeWord.head?.trim() || OPENWAKEWORD_DEFAULT_HEAD;
1530
+ if (isPlaceholderWakeWordHead(headName)) {
1531
+ console.warn(
1532
+ `[voice] wake word head '${headName}' is a PLACEHOLDER (the upstream openWakeWord "hey jarvis" head, renamed) — it fires on "hey jarvis", not the Eliza-1 wake phrase. Experimental, opt-in only; see packages/inference/reports/porting/2026-05-11/wakeword-head-plan.md.`,
1533
+ );
1534
+ }
1535
+ if (!bridge.ffi) {
1536
+ throw new VoiceStartupError(
1537
+ "missing-ffi",
1538
+ "[voice] Cannot initialize wake-word detector: fused libelizainference FFI is not loaded. Wake-word detection requires the native GGUF runtime (eliza_inference_wakeword_* symbols).",
1539
+ );
1540
+ }
1541
+ const ffiCtxResolver = () => {
1542
+ const ctx = bridge.ffiCtx;
1543
+ if (ctx === null) {
1544
+ throw new VoiceStartupError(
1545
+ "missing-ffi",
1546
+ "[voice] Cannot initialize wake-word detector: fused FFI context is not loaded.",
1547
+ );
1548
+ }
1549
+ return ctx;
1550
+ };
1551
+ const model = await loadBundledWakeWordModel({
1552
+ ffi: bridge.ffi,
1553
+ ctx: ffiCtxResolver,
1554
+ bundleRoot: bridge.bundlePath(),
1555
+ ...(opts.wakeWord.head ? { head: opts.wakeWord.head } : {}),
1556
+ });
1557
+ if (model) {
1558
+ const detector = new OpenWakeWordDetector({
1559
+ model,
1560
+ ...(opts.wakeWord.threshold !== undefined
1561
+ ? { config: { threshold: opts.wakeWord.threshold } }
1562
+ : {}),
1563
+ onWake: () => {
1564
+ void this.prewarmConversation(opts.roomId, "");
1565
+ opts.wakeWord?.onWake?.();
1566
+ },
1567
+ });
1568
+ wakeWord = detector;
1569
+ // The mic frame size need not match the openWakeWord frame size
1570
+ // (1280 samples = 80 ms @ 16 kHz); re-buffer into exact frames.
1571
+ const need = model.frameSamples;
1572
+ let acc = new Float32Array(0);
1573
+ feedWakeFrame = (pcm: Float32Array) => {
1574
+ const merged = new Float32Array(acc.length + pcm.length);
1575
+ merged.set(acc);
1576
+ merged.set(pcm, acc.length);
1577
+ let off = 0;
1578
+ while (merged.length - off >= need) {
1579
+ const slice = merged.slice(off, off + need);
1580
+ off += need;
1581
+ void detector.pushFrame(slice);
1582
+ }
1583
+ acc = merged.slice(off);
1584
+ };
1585
+ } else {
1586
+ console.info(
1587
+ "[voice] wake word requested but no openWakeWord model in this bundle — running VAD-gated only",
1588
+ );
1589
+ }
1590
+ }
1591
+
1592
+ const unsubFrame = micSource.onFrame((frame) => {
1593
+ // The VAD forward pass is serialized internally; fire-and-forget so a
1594
+ // slow frame doesn't backpressure the mic (the VAD records overruns).
1595
+ void vad.pushFrame(frame);
1596
+ transcriber.feed(frame);
1597
+ feedWakeFrame?.(frame.pcm);
1598
+ });
1599
+
1600
+ controller.start();
1601
+ await micSource.start();
1602
+
1603
+ // Single teardown knob: stopping the controller stops the mic chain too.
1604
+ const origStop = controller.stop.bind(controller);
1605
+ controller.stop = () => {
1606
+ origStop();
1607
+ unsubFrame();
1608
+ stopMicRing();
1609
+ void micSource.stop();
1610
+ transcriber.dispose();
1611
+ wakeWord?.reset();
1612
+ // G5.d: tear down only the per-room barge-in binding. The bridge
1613
+ // owns the coordinator lifecycle and disposes it in
1614
+ // `EngineVoiceBridge.dispose()` — we must not dispose it here or
1615
+ // we would cancel armed tokens for other concurrent rooms.
1616
+ unsubCoordinator();
1617
+ };
1618
+ return controller;
1619
+ }
1620
+
1621
+ /**
1622
+ * Disarm the voice lifecycle — drains the ring buffer, settles the
1623
+ * scheduler, and drops TTS/ASR weights from RAM via `evictPages()`
1624
+ * (madvise / VirtualUnlock equivalent — see voice/engine-bridge.ts).
1625
+ * No-op when not armed.
1626
+ */
1627
+ async disarmVoice(): Promise<void> {
1628
+ const bridge = this.voiceBridge;
1629
+ if (!bridge) return;
1630
+ await bridge.disarm();
1631
+ }
1632
+
1633
+ /**
1634
+ * Tear down the active voice bridge. Idempotent; calling when no
1635
+ * voice session is active is a no-op. Disarms the lifecycle first
1636
+ * (drops voice weights via `evictPages`), then settles any in-flight
1637
+ * TTS so audio committed to the ring buffer surfaces to the sink
1638
+ * before the bridge is dropped.
1639
+ */
1640
+ async stopVoice(): Promise<void> {
1641
+ const bridge = this.voiceBridge;
1642
+ if (!bridge) return;
1643
+ try {
1644
+ await bridge.disarm();
1645
+ await bridge.settle();
1646
+ } finally {
1647
+ bridge.dispose();
1648
+ if (this.voiceBridge === bridge) this.voiceBridge = null;
1649
+ }
1650
+ }
1651
+
1652
+ async synthesizeSpeech(
1653
+ text: string,
1654
+ signal?: AbortSignal,
1655
+ ): Promise<Uint8Array> {
1656
+ this.markActivity();
1657
+ const bridge = this.requireVoiceBridge("synthesize speech");
1658
+ if ((bridge.backend as { id?: string }).id === "stub") {
1659
+ throw new VoiceStartupError(
1660
+ "missing-fused-build",
1661
+ "[voice] Cannot synthesize speech with StubOmniVoiceBackend (it emits silence). Start voice with useFfiBackend:true or inject a real backend.",
1662
+ );
1663
+ }
1664
+ return bridge.synthesizeTextToWav(text, signal);
1665
+ }
1666
+
1667
+ async prewarmVoicePhrases(
1668
+ texts: ReadonlyArray<string>,
1669
+ opts: { concurrency?: number } = {},
1670
+ ): Promise<{ warmed: number; cached: number }> {
1671
+ return this.requireVoiceBridge("prewarm voice phrases").prewarmPhrases(
1672
+ texts,
1673
+ opts,
1674
+ );
1675
+ }
1676
+
1677
+ /**
1678
+ * Idle-time auto-prewarm: synthesize the canonical common-phrase seed so
1679
+ * the phrase cache is warm before the next turn. No-op unless a real TTS
1680
+ * backend is present and voice is armed. Callers (the voice bridge /
1681
+ * connector) invoke this when the loop is idle.
1682
+ */
1683
+ async prewarmIdleVoicePhrases(
1684
+ opts: { concurrency?: number } = {},
1685
+ ): Promise<{ warmed: number; cached: number }> {
1686
+ return this.requireVoiceBridge(
1687
+ "prewarm idle voice phrases",
1688
+ ).prewarmIdlePhrases(opts);
1689
+ }
1690
+
1691
+ /**
1692
+ * Play the first-audio filler (a short cached acknowledgement) — the seam
1693
+ * W9's turn controller calls the instant VAD fires `speech-start` to mask
1694
+ * first-token latency. Returns the played filler text, or `null` if none
1695
+ * was played. No-op without a real TTS backend / armed voice.
1696
+ */
1697
+ playFirstAudioFiller(): string | null {
1698
+ return this.requireVoiceBridge(
1699
+ "play first-audio filler",
1700
+ ).playFirstAudioFiller();
1701
+ }
1702
+
1703
+ async transcribePcm(
1704
+ args: TranscriptionAudio,
1705
+ signal?: AbortSignal,
1706
+ onPartial?: (delta: string) => void,
1707
+ ): Promise<string> {
1708
+ this.markActivity();
1709
+ if (signal?.aborted) {
1710
+ throw signal.reason instanceof Error
1711
+ ? signal.reason
1712
+ : new DOMException("Aborted", "AbortError");
1713
+ }
1714
+ const transcript = await this.requireVoiceBridge(
1715
+ "transcribe audio",
1716
+ ).transcribePcm(args, signal, onPartial);
1717
+ if (signal?.aborted) {
1718
+ throw signal.reason instanceof Error
1719
+ ? signal.reason
1720
+ : new DOMException("Aborted", "AbortError");
1721
+ }
1722
+ return transcript;
1723
+ }
1724
+
1725
+ /** Transcribe + per-word timings (fused ASR v12) through the voice bridge. */
1726
+ async transcribePcmTimed(
1727
+ args: TranscriptionAudio,
1728
+ signal?: AbortSignal,
1729
+ ): Promise<{ text: string; words: AsrWordTiming[] }> {
1730
+ this.markActivity();
1731
+ if (signal?.aborted) {
1732
+ throw signal.reason instanceof Error
1733
+ ? signal.reason
1734
+ : new DOMException("Aborted", "AbortError");
1735
+ }
1736
+ return this.requireVoiceBridge("transcribe audio").transcribePcmTimed(
1737
+ args,
1738
+ signal,
1739
+ );
1740
+ }
1741
+
1742
+ /**
1743
+ * Run one fused mic→speech voice turn through the overlapped
1744
+ * `VoicePipeline`: ASR → {MTP drafts ∥ target verifies} → phrase
1745
+ * chunker → OmniVoice → PCM ring buffer, with rollback-on-reject and
1746
+ * barge-in cancel. Requires `startVoice()` + `armVoice()` first.
1747
+ *
1748
+ * `opts.textRunner` lets a host that runs its own text engine in-process
1749
+ * (the iOS/Android FFI path or the desktop FFI runtime) supply its own
1750
+ * {@link MtpTextRunner}. When omitted, the active local dispatcher is
1751
+ * used.
1752
+ *
1753
+ * Production caller: the on-device device bridge, via `runDeviceVoiceTurn`
1754
+ * (`adapters/capacitor-llama/voice-turn.ts`), which wraps the loaded
1755
+ * `CapacitorLlamaContext` in an `MtpTextRunner` and passes it here so the
1756
+ * draft/verify loop runs on the on-device text model. The native mic
1757
+ * capture hands `runDeviceVoiceTurn` the PCM; everything downstream is
1758
+ * in-process JS + the fused FFI (no HTTP, no second voice path).
1759
+ *
1760
+ * Resolves with the turn's exit reason (`done` / `token-cap` /
1761
+ * `cancelled`). A missing ASR region in voice mode surfaces as a
1762
+ * `VoiceStartupError` — no silent cloud fallback (AGENTS.md §3).
1763
+ */
1764
+ async runVoiceTurn(
1765
+ audio: TranscriptionAudio,
1766
+ opts: {
1767
+ maxDraftTokens?: number;
1768
+ maxGeneratedTokens?: number;
1769
+ events?: VoicePipelineEvents;
1770
+ /**
1771
+ * In-process text runner for the mobile FFI path. Must implement the
1772
+ * same `MtpTextRunner` contract (`hasDrafter()` +
1773
+ * `generateWithVerifierEvents()`); the AOSP/Capacitor bridge wraps
1774
+ * its libllama-context-backed speculative loop in one.
1775
+ */
1776
+ textRunner?: MtpTextRunner;
1777
+ } = {},
1778
+ ): Promise<"done" | "token-cap" | "cancelled"> {
1779
+ this.markActivity();
1780
+ const bridge = this.requireVoiceBridge("run a voice turn");
1781
+ return bridge.runVoiceTurn(
1782
+ audio,
1783
+ opts.textRunner ?? mtpTextRunner(this.dispatcher),
1784
+ {
1785
+ maxDraftTokens: opts.maxDraftTokens ?? DEFAULT_VOICE_MAX_DRAFT_TOKENS,
1786
+ maxGeneratedTokens: opts.maxGeneratedTokens,
1787
+ },
1788
+ opts.events,
1789
+ );
1790
+ }
1791
+
1792
+ /**
1793
+ * Active voice bridge, or null when voice mode is not running.
1794
+ * Callers (router, UI, agent runtime) read this to decide whether to
1795
+ * forward verifier events. Voice is mandatory for Eliza-1 tiers but
1796
+ * the bridge is still created lazily — `startVoice()` MUST be called
1797
+ * before `voice()` returns non-null.
1798
+ */
1799
+ voice(): EngineVoiceBridge | null {
1800
+ return this.voiceBridge;
1801
+ }
1802
+
1803
+ private requireVoiceBridge(action: string): EngineVoiceBridge {
1804
+ const bridge = this.voiceBridge;
1805
+ if (!bridge) {
1806
+ throw new VoiceStartupError(
1807
+ "not-started",
1808
+ `[voice] Cannot ${action}: no voice session active. Call startVoice() and armVoice() first.`,
1809
+ );
1810
+ }
1811
+ return bridge;
1812
+ }
1813
+
1814
+ private voiceStreamingArgs<T extends Omit<GenerateArgs, "cacheKey">>(
1815
+ args: T,
1816
+ ): {
1817
+ args: T;
1818
+ finish: (finalText: string) => Promise<void>;
1819
+ } {
1820
+ const bridge = this.voiceBridge;
1821
+ const voiceOn = bridge?.lifecycle.current().kind === "voice-on";
1822
+ const structuredVoiceFields =
1823
+ args.streamStructured === true
1824
+ ? resolveVoiceSkeletonStreamFields(args.responseSkeleton)
1825
+ : [];
1826
+ const hasShouldRespondGate =
1827
+ args.streamStructured === true &&
1828
+ skeletonHasFreeStringKey(args.responseSkeleton, "shouldRespond");
1829
+ const extractorStreamFields =
1830
+ hasShouldRespondGate && !structuredVoiceFields.includes("shouldRespond")
1831
+ ? ["shouldRespond", ...structuredVoiceFields]
1832
+ : structuredVoiceFields;
1833
+ const userVisibleVoice =
1834
+ args.voiceOutput === "user-visible" ||
1835
+ (args.voiceOutput === undefined &&
1836
+ (typeof args.onTextChunk === "function" ||
1837
+ structuredVoiceFields.length > 0));
1838
+ if (!voiceOn || !bridge || !userVisibleVoice) {
1839
+ return {
1840
+ args,
1841
+ finish: async () => {},
1842
+ };
1843
+ }
1844
+
1845
+ // Barge-in → LLM/drafter abort. A `hard-stop` from the scheduler's
1846
+ // barge-in controller (ASR-confirmed words, or `triggerBargeIn()`)
1847
+ // aborts this controller; we hand its signal to `dispatcher.generate`
1848
+ // so generation stops at the next kernel boundary — not just TTS
1849
+ // (AGENTS.md §4 / brief item 2). Composed with the caller's signal so
1850
+ // an external cancel still works.
1851
+ const bargeAbort = new AbortController();
1852
+ const detachBarge = bridge.scheduler.bargeIn.onSignal((signal) => {
1853
+ if (signal.type === "hard-stop" && !bargeAbort.signal.aborted) {
1854
+ bargeAbort.abort();
1855
+ }
1856
+ });
1857
+ const callerSignal = args.signal;
1858
+ if (callerSignal) {
1859
+ if (callerSignal.aborted) bargeAbort.abort();
1860
+ else
1861
+ callerSignal.addEventListener(
1862
+ "abort",
1863
+ () => {
1864
+ if (!bargeAbort.signal.aborted) bargeAbort.abort();
1865
+ },
1866
+ { once: true },
1867
+ );
1868
+ }
1869
+
1870
+ let nextIndex = 0;
1871
+ let streamedAny = false;
1872
+ let verifierHandled = false;
1873
+ const callerOnTextChunk = args.onTextChunk;
1874
+ const callerOnVerifierEvent = args.onVerifierEvent;
1875
+ let structuredVoicePush = Promise.resolve();
1876
+ let shouldRespondText = "";
1877
+ let shouldRespondAllowsVoice: boolean | null = hasShouldRespondGate
1878
+ ? null
1879
+ : true;
1880
+ const pendingStructuredReplyChunks: string[] = [];
1881
+ const pushStructuredVoiceChunk = (chunk: string) => {
1882
+ streamedAny = true;
1883
+ const token: TextToken = { index: nextIndex++, text: chunk };
1884
+ structuredVoicePush = structuredVoicePush.then(() =>
1885
+ bridge.pushAcceptedToken(token),
1886
+ );
1887
+ };
1888
+ const structuredVoiceExtractor =
1889
+ structuredVoiceFields.length > 0 && args.responseSkeleton
1890
+ ? new ResponseSkeletonStreamExtractor({
1891
+ skeleton: args.responseSkeleton,
1892
+ streamFields: extractorStreamFields,
1893
+ abortSignal: bargeAbort.signal,
1894
+ onChunk: (chunk: string, field?: string) => {
1895
+ if (chunk.length === 0) return;
1896
+ if (field === "shouldRespond") {
1897
+ shouldRespondText += chunk;
1898
+ const normalized = shouldRespondText
1899
+ .trim()
1900
+ .toUpperCase()
1901
+ .replace(/^[^A-Z]+/, "");
1902
+ if (
1903
+ normalized.startsWith("IG") ||
1904
+ normalized.startsWith("ST")
1905
+ ) {
1906
+ shouldRespondAllowsVoice = false;
1907
+ pendingStructuredReplyChunks.length = 0;
1908
+ } else if (normalized.startsWith("RE")) {
1909
+ shouldRespondAllowsVoice = true;
1910
+ for (const pending of pendingStructuredReplyChunks.splice(
1911
+ 0,
1912
+ )) {
1913
+ pushStructuredVoiceChunk(pending);
1914
+ }
1915
+ }
1916
+ return;
1917
+ }
1918
+ if (hasShouldRespondGate) {
1919
+ if (shouldRespondAllowsVoice === false) return;
1920
+ if (shouldRespondAllowsVoice !== true) {
1921
+ pendingStructuredReplyChunks.push(chunk);
1922
+ return;
1923
+ }
1924
+ }
1925
+ pushStructuredVoiceChunk(chunk);
1926
+ },
1927
+ })
1928
+ : null;
1929
+ const wrapped = {
1930
+ ...args,
1931
+ signal: bargeAbort.signal,
1932
+ onVerifierEvent: async (event: VerifierStreamEvent) => {
1933
+ if (structuredVoiceExtractor) {
1934
+ await callerOnVerifierEvent?.(event);
1935
+ return;
1936
+ }
1937
+ verifierHandled = true;
1938
+ if (event.kind === "accept" && event.tokens.length > 0) {
1939
+ streamedAny = true;
1940
+ const last = event.tokens[event.tokens.length - 1];
1941
+ nextIndex = Math.max(nextIndex, last.index + 1);
1942
+ }
1943
+ await this.pushVerifierEvent(event);
1944
+ await callerOnVerifierEvent?.(event);
1945
+ },
1946
+ onTextChunk: async (chunk: string) => {
1947
+ if (structuredVoiceExtractor) {
1948
+ structuredVoiceExtractor.push(chunk);
1949
+ await callerOnTextChunk?.(chunk);
1950
+ return;
1951
+ }
1952
+ if (chunk.length > 0 && !verifierHandled) {
1953
+ streamedAny = true;
1954
+ const token: TextToken = { index: nextIndex++, text: chunk };
1955
+ await bridge.pushAcceptedToken(token);
1956
+ }
1957
+ await callerOnTextChunk?.(chunk);
1958
+ },
1959
+ } as T;
1960
+
1961
+ return {
1962
+ args: wrapped,
1963
+ finish: async (finalText: string) => {
1964
+ try {
1965
+ if (structuredVoiceExtractor) {
1966
+ if (!streamedAny && finalText.length > 0) {
1967
+ structuredVoiceExtractor.push(finalText);
1968
+ }
1969
+ structuredVoiceExtractor.flush();
1970
+ await structuredVoicePush;
1971
+ }
1972
+ if (
1973
+ !structuredVoiceExtractor &&
1974
+ !streamedAny &&
1975
+ finalText.length > 0 &&
1976
+ !bargeAbort.signal.aborted
1977
+ ) {
1978
+ await bridge.pushAcceptedToken({
1979
+ index: nextIndex++,
1980
+ text: finalText,
1981
+ });
1982
+ }
1983
+ await bridge.settle();
1984
+ } finally {
1985
+ detachBarge();
1986
+ }
1987
+ },
1988
+ };
1989
+ }
1990
+
1991
+ /**
1992
+ * Forward a verifier-stream event into the voice scheduler. Accepted tokens flow into the
1993
+ * phrase chunker; rejected ranges trigger the rollback queue. No-op
1994
+ * when voice is not active so callers can fan out events
1995
+ * unconditionally.
1996
+ *
1997
+ * When MTP produces an accepted text token, the phrase chunker MUST hand
1998
+ * the chunk to TTS within the same scheduler tick.
1999
+ */
2000
+ async pushVerifierEvent(event: VerifierStreamEvent): Promise<void> {
2001
+ const bridge = this.voiceBridge;
2002
+ if (!bridge) return;
2003
+ if (event.kind === "accept") {
2004
+ const now = Date.now();
2005
+ for (const tok of event.tokens) {
2006
+ await bridge.pushAcceptedToken(tok, now);
2007
+ }
2008
+ return;
2009
+ }
2010
+ if (event.tokens.length === 0) return;
2011
+ const range: RejectedTokenRange = {
2012
+ fromIndex: event.tokens[0].index,
2013
+ toIndex: event.tokens[event.tokens.length - 1].index,
2014
+ };
2015
+ await bridge.pushRejectedRange(range);
2016
+ }
2017
+
2018
+ /**
2019
+ * Mic VAD → barge-in. Per AGENTS.md §4, the PCM ring buffer MUST
2020
+ * drain immediately and any in-flight TTS forward pass MUST be
2021
+ * cancelled at the next kernel boundary. The scheduler enforces both
2022
+ * — this is a thin pass-through.
2023
+ */
2024
+ triggerBargeIn(): void {
2025
+ this.voiceBridge?.triggerBargeIn();
2026
+ }
2027
+
2028
+ /**
2029
+ * Test surface: fan an accepted-token list into the bridge in one
2030
+ * call. Production callers should prefer `pushVerifierEvent` so the
2031
+ * accept/reject discriminator stays explicit; this exists so the
2032
+ * voice integration test can drive the scheduler without
2033
+ * reconstructing `VerifierStreamEvent` boilerplate.
2034
+ */
2035
+ async pushAcceptedTokens(tokens: ReadonlyArray<TextToken>): Promise<void> {
2036
+ await this.pushVerifierEvent({ kind: "accept", tokens: [...tokens] });
2037
+ }
2038
+
2039
+ /**
2040
+ * Active llama.cpp parallel slot count from the running FFI backend, or
2041
+ * the configured default pool size when no model is loaded yet.
2042
+ */
2043
+ private activeParallel(): number {
2044
+ if (this.activeBackendId() === "llama-cpp") {
2045
+ return this.dispatcher.parallelSlots();
2046
+ }
2047
+ return resolveDefaultPoolSize(process.env.ELIZA_LOCAL_SESSION_POOL_SIZE);
2048
+ }
2049
+
2050
+ /**
2051
+ * The in-process `Eliza1EotClassifier` required a node-bound `LlamaModel`
2052
+ * forward pass, which the FFI runtime does not expose. Always null now —
2053
+ * callers fall through to the GGUF (FFI) turn-detector and then the
2054
+ * heuristic chain.
2055
+ */
2056
+ private tryBuildEliza1EotClassifier(
2057
+ _mode: "prefer" | "force",
2058
+ _loraPath: string | undefined,
2059
+ ): import("./voice/eot-classifier").Eliza1EotClassifier | null {
2060
+ return null;
2061
+ }
2062
+ }
2063
+
2064
+ /**
2065
+ * Resolve which EOT classifier to build for a voice session. Precedence:
2066
+ * 1. Explicit `opts.useEliza1Eot` (`true` → `"force"`; `false` → `"off"`;
2067
+ * `"auto"` or unset → step 2).
2068
+ * 2. `ELIZA_VOICE_EOT_BACKEND` env var (`eliza-1` → `"force"`, anything
2069
+ * else like `livekit`/`turnsense`/`heuristic` → `"off"`; unset →
2070
+ * step 3).
2071
+ * 3. Default `"prefer"` — we try eliza-1 first when available and fall
2072
+ * back to LiveKit/Heuristic when the in-process backend is unavailable.
2073
+ *
2074
+ * Returns:
2075
+ * - `"force"` — must build; throw if preconditions fail.
2076
+ * - `"prefer"` — try; on null, fall through to the LiveKit chain.
2077
+ * - `"off"` — skip eliza-1 entirely.
2078
+ */
2079
+ function resolveEliza1EotSelection(
2080
+ optsValue: boolean | "auto" | undefined,
2081
+ _loraPath: string | undefined,
2082
+ ): "force" | "prefer" | "off" {
2083
+ if (optsValue === true) return "force";
2084
+ if (optsValue === false) return "off";
2085
+ const envValue = process.env.ELIZA_VOICE_EOT_BACKEND?.trim().toLowerCase();
2086
+ if (envValue === "eliza-1" || envValue === "eliza1") return "force";
2087
+ if (
2088
+ envValue === "livekit" ||
2089
+ envValue === "turnsense" ||
2090
+ envValue === "heuristic"
2091
+ )
2092
+ return "off";
2093
+ return "prefer";
2094
+ }
2095
+
2096
+ export const localInferenceEngine = new LocalInferenceEngine();