@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,505 @@
1
+ /**
2
+ * Pipelined parallel-generation scheduler — the fused mic→speech graph
3
+ * from `packages/inference/AGENTS.md` §4:
4
+ *
5
+ * mic / file → ASR → text tokens
6
+ * ↓
7
+ * scheduler ──→ MTP drafter (proposes N tokens)
8
+ * ∥ (overlap, not sequential)
9
+ * target verifier (text model)
10
+ * ↓
11
+ * accepted tokens → phrase chunker
12
+ * ↓ ↘
13
+ * speaker preset (cached) rollback queue
14
+ * ↓ ↙
15
+ * OmniVoice TTS ←── on-reject: cancel chunk
16
+ * ↓
17
+ * PCM ring buffer → audio out
18
+ *
19
+ * The headline contract: **the moment ASR emits its last token, the
20
+ * MTP drafter starts drafting AND the target starts verifying — they
21
+ * overlap.** Drafter speculation N tokens ahead happens concurrently
22
+ * with the target verifying the previous window; accepted tokens are
23
+ * handed to the phrase chunker within the same scheduler tick.
24
+ *
25
+ * GPU command buffers stay N=1 (no command-buffer batching for voice)
26
+ * so a barge-in cancel lands at the next kernel boundary, not after a
27
+ * batch flush.
28
+ *
29
+ * Why this lives next to `VoiceScheduler` and not inside it: the
30
+ * scheduler owns the *audio* side (chunker → TTS → ring buffer →
31
+ * rollback → barge-in). This module owns the *text-generation* side
32
+ * (audio source → ASR → drafter∥verifier loop) and feeds accepted /
33
+ * rejected ranges into the scheduler. Keeping them separate keeps the
34
+ * scheduler usable from text-only callers (which reach the same nodes
35
+ * via the same scheduler — AGENTS.md §4) without an ASR/drafter
36
+ * dependency.
37
+ */
38
+
39
+ import { PartialStabilizer } from "./partial-stabilizer";
40
+ import type { VoiceScheduler } from "./scheduler";
41
+ import type {
42
+ PcmFrame,
43
+ RejectedTokenRange,
44
+ StreamingTranscriber,
45
+ TextToken,
46
+ TranscriptionAudio,
47
+ VerifierStreamEvent,
48
+ } from "./types";
49
+
50
+ /**
51
+ * Split a transcript string into contiguous text tokens. The fused ASR
52
+ * tokenizer is shared with the text backbone (AGENTS.md §1 — zero
53
+ * re-tokenization), so the pipeline only needs *contiguous* token
54
+ * indices, not the model's exact subword boundaries; whitespace-aware
55
+ * word chunking is the closest stable approximation when only surface
56
+ * text is available. Empty input yields no tokens.
57
+ *
58
+ * `tokenIds`, when supplied, are the text-model vocabulary ids the fused
59
+ * ASR decoder emitted for `transcript`. When the lengths line up they are
60
+ * attached as `TextToken.id` so a downstream in-process handoff can skip
61
+ * re-tokenization; otherwise (mismatch — the surface split disagrees with
62
+ * the decoder's subword boundaries) the ids are dropped and only the
63
+ * word-chunk approximation is returned.
64
+ */
65
+ export function splitTranscriptToTokens(
66
+ transcript: string,
67
+ startIndex = 0,
68
+ tokenIds?: ReadonlyArray<number>,
69
+ ): TextToken[] {
70
+ const trimmed = transcript.trim();
71
+ if (trimmed.length === 0) return [];
72
+ // Keep leading whitespace attached to each chunk after the first so a
73
+ // join() round-trips to the original spacing (matches how the chunker
74
+ // reconstructs phrase text from token.text concatenation).
75
+ const parts = trimmed.split(/(?<=\S)(?=\s)/).filter((p) => p.length > 0);
76
+ const tokens: TextToken[] = [];
77
+ // Pass through real token ids only when the producer's id count matches
78
+ // the surface-chunk count — anything else means the two disagree on
79
+ // boundaries and a positional join would mislabel ids.
80
+ const ids =
81
+ tokenIds && tokenIds.length === parts.length ? tokenIds : undefined;
82
+ let i = startIndex;
83
+ for (let p = 0; p < parts.length; p++) {
84
+ const token: TextToken = { index: i++, text: parts[p] };
85
+ if (ids) token.id = ids[p];
86
+ tokens.push(token);
87
+ }
88
+ return tokens;
89
+ }
90
+
91
+ /**
92
+ * MTP drafter. `propose` returns up to `maxDraft` candidate
93
+ * continuation tokens given the accepted prefix. N=1 command buffers —
94
+ * the implementation MUST keep its GPU dispatch short enough to cancel
95
+ * at the next kernel boundary (no command-buffer batching for voice).
96
+ * Honours `cancel.cancelled` between kernel ticks.
97
+ */
98
+ export interface DraftProposer {
99
+ propose(args: {
100
+ prefix: ReadonlyArray<TextToken>;
101
+ maxDraft: number;
102
+ cancel: { cancelled: boolean };
103
+ }): Promise<TextToken[]>;
104
+ }
105
+
106
+ /**
107
+ * Target verifier (the text model). Given the accepted prefix plus a
108
+ * draft window, returns which leading draft tokens are accepted and the
109
+ * one corrected token at the first divergence (if any). When the draft
110
+ * is empty, the verifier still produces one token (plain autoregressive
111
+ * step). Honours `cancel.cancelled` between kernel ticks.
112
+ */
113
+ export interface TargetVerifier {
114
+ verify(args: {
115
+ prefix: ReadonlyArray<TextToken>;
116
+ draft: ReadonlyArray<TextToken>;
117
+ cancel: { cancelled: boolean };
118
+ }): Promise<{
119
+ accepted: TextToken[];
120
+ /** Set when the verifier reached the natural end of generation. */
121
+ done: boolean;
122
+ }>;
123
+ }
124
+
125
+ export interface VoicePipelineDeps {
126
+ scheduler: VoiceScheduler;
127
+ /**
128
+ * The live frame-fed ASR adapter (`voice/transcriber.ts` — fused
129
+ * `eliza_inference_asr_stream_*`, the fused batch adapter, or
130
+ * `MissingAsrTranscriber` deferring a hard failure). The pipeline drives
131
+ * it as a batch: it feeds the whole (VAD-gated) utterance buffer as one
132
+ * frame, `flush()`es to finalize, then splits the final transcript into
133
+ * contiguous text tokens (`splitTranscriptToTokens`). One `StreamingTranscriber`
134
+ * contract — there is no separate batch ASR interface.
135
+ */
136
+ transcriber: StreamingTranscriber;
137
+ drafter: DraftProposer;
138
+ verifier: TargetVerifier;
139
+ }
140
+
141
+ export interface VoicePipelineConfig {
142
+ /**
143
+ * Max tokens MTP drafts per round. Per-tier; small (≤8) so a
144
+ * rollback is cheap. The drafter and verifier overlap one round: while
145
+ * the verifier checks round k, the drafter speculates round k+1.
146
+ */
147
+ maxDraftTokens: number;
148
+ /**
149
+ * Hard cap on generated tokens per turn (safety stop). The verifier's
150
+ * `done` flag is the normal stop; this bounds a runaway model.
151
+ */
152
+ maxGeneratedTokens?: number;
153
+ /**
154
+ * A2 — when true, run streaming-ASR partials through a LocalAgreement-n
155
+ * stabilizer (`PartialStabilizer`) before splitting them into tokens
156
+ * and feeding the drafter. Off by default until the streaming-ASR
157
+ * fast path lands and validates the latency/quality trade. The
158
+ * `StreamingTranscriber.flush()`-driven batch path is unaffected (the
159
+ * stabilizer is a no-op on a single final partial).
160
+ */
161
+ usePartialStabilizer?: boolean;
162
+ /**
163
+ * A2 — agreement count `n` for `PartialStabilizer` when enabled.
164
+ * Ignored when `usePartialStabilizer` is false. Default 2.
165
+ */
166
+ partialStabilizerAgreementCount?: number;
167
+ }
168
+
169
+ export interface VoicePipelineEvents {
170
+ /** Fired once, the instant ASR emits its final token (= drafter+verifier kick-off). */
171
+ onAsrComplete?(tokens: ReadonlyArray<TextToken>): void;
172
+ /**
173
+ * Fired exactly once per turn, right after the ASR phase finishes and
174
+ * before the first drafter/verifier round. ASR → text → TTS are
175
+ * sequential within a turn (AGENTS.md §4), so the idle ASR-model pages
176
+ * can be dropped now — wire this to `MmapRegionHandle.evictPages()`
177
+ * (`madvise(MADV_DONTNEED)` on POSIX) for the ASR region to claw back
178
+ * ~1 GB of peak RSS while TTS decodes. The pages page back in
179
+ * transparently on the next turn's `feed()`; a host that prefers to
180
+ * keep ASR resident simply doesn't supply this hook. May be async; the
181
+ * pipeline does not block on it (a slow trim must not delay first audio).
182
+ */
183
+ onAsrPhaseComplete?(): void | Promise<void>;
184
+ /** Fired with each verifier accept/reject event before it hits the scheduler. */
185
+ onVerifierEvent?(event: VerifierStreamEvent): void;
186
+ /** Fired when the loop exits (verifier `done`, token cap, or barge-in cancel). */
187
+ onComplete?(reason: "done" | "token-cap" | "cancelled"): void;
188
+ }
189
+
190
+ const DEFAULT_MAX_GENERATED_TOKENS = 4096;
191
+
192
+ interface PipelineRun {
193
+ cancel: { cancelled: boolean };
194
+ done: Promise<"done" | "token-cap" | "cancelled">;
195
+ }
196
+
197
+ /**
198
+ * One pipeline per active voice turn. Construct, call `run(audio)`,
199
+ * await the returned promise (or call `cancel()` for barge-in). The
200
+ * scheduler's barge-in controller also cancels an in-flight run — wire
201
+ * `bridge.triggerBargeIn()` and this run's `cancel()` to the same VAD
202
+ * signal so both the audio side (ring buffer drain) and the text side
203
+ * (stop drafting/verifying) abort together.
204
+ */
205
+ export class VoicePipeline {
206
+ private readonly scheduler: VoiceScheduler;
207
+ private readonly transcriber: StreamingTranscriber;
208
+ private readonly drafter: DraftProposer;
209
+ private readonly verifier: TargetVerifier;
210
+ private readonly maxDraftTokens: number;
211
+ private readonly maxGeneratedTokens: number;
212
+ private readonly events: VoicePipelineEvents;
213
+ /**
214
+ * A2 — when `config.usePartialStabilizer === true`, this is the active
215
+ * `PartialStabilizer` instance. Streaming-ASR consumers feed partials
216
+ * through it; the batch path in `transcribeAll()` collapses on a single
217
+ * final partial so the stabilizer is a no-op there. Exposed via
218
+ * `getPartialStabilizer()` so the streaming-ASR adapter (separate agent)
219
+ * can plug straight in once it ships.
220
+ */
221
+ private readonly partialStabilizer: PartialStabilizer | null;
222
+ private active: PipelineRun | null = null;
223
+
224
+ constructor(
225
+ deps: VoicePipelineDeps,
226
+ config: VoicePipelineConfig,
227
+ events: VoicePipelineEvents = {},
228
+ ) {
229
+ this.scheduler = deps.scheduler;
230
+ this.transcriber = deps.transcriber;
231
+ this.drafter = deps.drafter;
232
+ this.verifier = deps.verifier;
233
+ this.maxDraftTokens = Math.max(1, Math.floor(config.maxDraftTokens));
234
+ this.maxGeneratedTokens = Math.max(
235
+ 1,
236
+ Math.floor(config.maxGeneratedTokens ?? DEFAULT_MAX_GENERATED_TOKENS),
237
+ );
238
+ this.events = events;
239
+ this.partialStabilizer = config.usePartialStabilizer
240
+ ? new PartialStabilizer({
241
+ agreementCount: config.partialStabilizerAgreementCount,
242
+ })
243
+ : null;
244
+ // A mic VAD barge-in cancels the audio side via the scheduler's
245
+ // barge-in controller; mirror it onto the text side so we stop
246
+ // drafting/verifying at the next kernel boundary too.
247
+ this.scheduler.bargeIn.attach({
248
+ onCancel: () => {
249
+ if (this.active) this.active.cancel.cancelled = true;
250
+ },
251
+ });
252
+ }
253
+
254
+ /** True while a turn is in flight. */
255
+ isRunning(): boolean {
256
+ return this.active !== null;
257
+ }
258
+
259
+ /**
260
+ * A2 — the active `PartialStabilizer` when the pipeline was built with
261
+ * `usePartialStabilizer: true`, otherwise null. The streaming-ASR
262
+ * adapter (separate agent) feeds partials into this instance and
263
+ * forwards the `stable` portion downstream. Returning null when the
264
+ * feature flag is off lets the adapter skip the work entirely.
265
+ */
266
+ getPartialStabilizer(): PartialStabilizer | null {
267
+ return this.partialStabilizer;
268
+ }
269
+
270
+ /**
271
+ * Run one mic→speech turn. ASR streams first; the instant its last
272
+ * token lands, the drafter and verifier kick off concurrently and
273
+ * accepted tokens flow into the scheduler's chunker on the same tick.
274
+ * Resolves with the exit reason. Throws if a turn is already running.
275
+ */
276
+ async run(
277
+ audio: TranscriptionAudio,
278
+ ): Promise<"done" | "token-cap" | "cancelled"> {
279
+ if (this.active) {
280
+ throw new Error(
281
+ "[voice-pipeline] a turn is already running; cancel() it or await the previous run() first",
282
+ );
283
+ }
284
+ const cancel = { cancelled: false };
285
+ const done = this.execute(audio, cancel);
286
+ this.active = { cancel, done };
287
+ try {
288
+ return await done;
289
+ } finally {
290
+ this.active = null;
291
+ }
292
+ }
293
+
294
+ /**
295
+ * Barge-in: cancel the in-flight turn. Stops ASR, stops the
296
+ * drafter/verifier loop at the next kernel boundary, and triggers the
297
+ * scheduler's barge-in (ring buffer drain + chunker flush + in-flight
298
+ * TTS cancel). No-op when no turn is running.
299
+ */
300
+ cancel(): void {
301
+ if (this.active) this.active.cancel.cancelled = true;
302
+ this.scheduler.bargeIn.onMicActive();
303
+ }
304
+
305
+ private async execute(
306
+ audio: TranscriptionAudio,
307
+ cancel: { cancelled: boolean },
308
+ ): Promise<"done" | "token-cap" | "cancelled"> {
309
+ // --- ASR phase -----------------------------------------------------
310
+ // Drive the live `StreamingTranscriber` as a batch: feed the whole
311
+ // (already VAD-gated) utterance buffer as one frame, `flush()` to
312
+ // force-finalize, and split the final transcript into contiguous text
313
+ // tokens. The fused Qwen3-ASR decoder shares the text vocab (AGENTS.md
314
+ // §1), so when it reports token ids alongside the transcript they ride
315
+ // along as `TextToken.id`; when it omits them the word-chunk fallback
316
+ // is used.
317
+ const asrTokens = await this.transcribeAll(audio, cancel);
318
+ if (cancel.cancelled) return this.finish("cancelled");
319
+ // The instant ASR's last token has been emitted: drafter + verifier
320
+ // start. (`onAsrComplete` is the kick-off observability hook.)
321
+ this.events.onAsrComplete?.(asrTokens);
322
+ // ASR is done for this turn; text generation + TTS run next and never
323
+ // touch the ASR model again until the next turn. Let the host drop the
324
+ // idle ASR pages now (within-turn RSS trim, AGENTS.md §4). Fire-and-
325
+ // forget: a slow `madvise` must not delay the drafter kick-off.
326
+ if (this.events.onAsrPhaseComplete) {
327
+ void Promise.resolve(this.events.onAsrPhaseComplete()).catch(() => {});
328
+ }
329
+
330
+ // --- overlapped drafter ∥ verifier loop ---------------------------
331
+ // Each round:
332
+ // 1. take the drafter's N proposed tokens (the previous round's
333
+ // `propose` ran concurrently with the previous verify),
334
+ // 2. SPECULATIVELY push them to the phrase chunker now — TTS for
335
+ // drafted phrases starts immediately (low first-audio latency),
336
+ // 3. concurrently: kick the *next* draft AND run the verifier,
337
+ // 4. when the verifier returns, drop the not-yet-spoken TTS chunks
338
+ // for any draft positions it rejected (rollback queue), then
339
+ // push the verifier's corrected token,
340
+ // 5. if a reject happened, the next draft we kicked is stale — drop
341
+ // it and re-draft from the corrected prefix.
342
+ // The drafter and verifier passes for a round overlap; that is the
343
+ // whole point ("the moment ASR emits its last token the MTP
344
+ // drafter starts drafting AND the target starts verifying").
345
+ const prefix: TextToken[] = [...asrTokens];
346
+ let nextIndex =
347
+ asrTokens.length > 0 ? asrTokens[asrTokens.length - 1].index + 1 : 0;
348
+ let generated = 0;
349
+
350
+ let pendingDraft = this.drafter.propose({
351
+ prefix,
352
+ maxDraft: this.maxDraftTokens,
353
+ cancel,
354
+ });
355
+
356
+ for (;;) {
357
+ if (cancel.cancelled) return this.finish("cancelled");
358
+ const draft = await pendingDraft;
359
+ if (cancel.cancelled) return this.finish("cancelled");
360
+ const indexedDraft = draft.map((t, i) => ({
361
+ index: nextIndex + i,
362
+ text: t.text,
363
+ }));
364
+
365
+ // (2) speculative TTS — push drafted tokens to the chunker now.
366
+ let speculated = 0;
367
+ for (const t of indexedDraft) {
368
+ if (generated + speculated >= this.maxGeneratedTokens) break;
369
+ await this.scheduler.accept(t);
370
+ speculated++;
371
+ }
372
+ if (speculated > 0) {
373
+ this.events.onVerifierEvent?.({
374
+ kind: "accept",
375
+ tokens: indexedDraft.slice(0, speculated),
376
+ });
377
+ }
378
+
379
+ // (3) OVERLAP: kick next draft on the optimistic prefix, then verify.
380
+ const optimisticPrefix = [...prefix, ...indexedDraft];
381
+ let nextDraft: Promise<TextToken[]> | null = this.drafter.propose({
382
+ prefix: optimisticPrefix,
383
+ maxDraft: this.maxDraftTokens,
384
+ cancel,
385
+ });
386
+ const result = await this.verifier.verify({
387
+ prefix,
388
+ draft: indexedDraft,
389
+ cancel,
390
+ });
391
+ if (cancel.cancelled) return this.finish("cancelled");
392
+
393
+ // (4) how many leading draft tokens did the verifier keep?
394
+ const acceptedFromDraft = countMatchingPrefix(
395
+ result.accepted,
396
+ indexedDraft,
397
+ );
398
+ if (acceptedFromDraft < indexedDraft.length) {
399
+ // Rejected draft tail → drop the matching not-yet-spoken TTS chunks.
400
+ const range: RejectedTokenRange = {
401
+ fromIndex: nextIndex + acceptedFromDraft,
402
+ toIndex: nextIndex + indexedDraft.length - 1,
403
+ };
404
+ this.events.onVerifierEvent?.({
405
+ kind: "reject",
406
+ tokens: indexedDraft.slice(acceptedFromDraft),
407
+ });
408
+ await this.scheduler.reject(range);
409
+ nextDraft = null; // (5) stale — re-draft from the corrected prefix
410
+ }
411
+
412
+ // Commit the accepted prefix to our running state, then push the
413
+ // verifier's correction / bonus tokens (everything past the draft
414
+ // tokens it kept) to the chunker on this same tick.
415
+ for (let i = 0; i < acceptedFromDraft; i++) {
416
+ prefix.push(indexedDraft[i]);
417
+ generated++;
418
+ }
419
+ nextIndex += acceptedFromDraft;
420
+
421
+ const extra = result.accepted.slice(acceptedFromDraft);
422
+ const extraIndexed = extra.map((t, i) => ({
423
+ index: nextIndex + i,
424
+ text: t.text,
425
+ }));
426
+ if (extraIndexed.length > 0) {
427
+ this.events.onVerifierEvent?.({ kind: "accept", tokens: extraIndexed });
428
+ for (const t of extraIndexed) {
429
+ if (generated >= this.maxGeneratedTokens) break;
430
+ await this.scheduler.accept(t);
431
+ prefix.push(t);
432
+ nextIndex = t.index + 1;
433
+ generated++;
434
+ }
435
+ }
436
+
437
+ if (result.done) {
438
+ await this.scheduler.flushPending();
439
+ return this.finish("done");
440
+ }
441
+ if (generated >= this.maxGeneratedTokens) {
442
+ await this.scheduler.flushPending();
443
+ return this.finish("token-cap");
444
+ }
445
+ if (cancel.cancelled) return this.finish("cancelled");
446
+
447
+ pendingDraft =
448
+ nextDraft ??
449
+ this.drafter.propose({
450
+ prefix,
451
+ maxDraft: this.maxDraftTokens,
452
+ cancel,
453
+ });
454
+ }
455
+ }
456
+
457
+ /**
458
+ * Feed the whole utterance buffer to the live transcriber, finalize,
459
+ * and return the final transcript as contiguous text tokens. The
460
+ * transcriber is disposed afterwards (it is one per turn). A barge-in
461
+ * cancel checked before `flush()` short-circuits to an empty list.
462
+ */
463
+ private async transcribeAll(
464
+ audio: TranscriptionAudio,
465
+ cancel: { cancelled: boolean },
466
+ ): Promise<TextToken[]> {
467
+ try {
468
+ if (cancel.cancelled) return [];
469
+ const frame: PcmFrame = {
470
+ pcm: audio.pcm,
471
+ sampleRate: audio.sampleRate,
472
+ timestampMs: 0,
473
+ };
474
+ this.transcriber.feed(frame);
475
+ const final = await this.transcriber.flush();
476
+ if (cancel.cancelled) return [];
477
+ return splitTranscriptToTokens(final.partial, 0, final.tokens);
478
+ } finally {
479
+ this.transcriber.dispose();
480
+ }
481
+ }
482
+
483
+ private finish(
484
+ reason: "done" | "token-cap" | "cancelled",
485
+ ): "done" | "token-cap" | "cancelled" {
486
+ this.events.onComplete?.(reason);
487
+ return reason;
488
+ }
489
+ }
490
+
491
+ /**
492
+ * How many leading tokens of `accepted` match `draft` by text. The
493
+ * verifier accepts a prefix of the draft then emits a correction; this
494
+ * counts the accepted-from-draft prefix length so the rest of the draft
495
+ * (the rejected tail) can be rolled back from the TTS chunker.
496
+ */
497
+ function countMatchingPrefix(
498
+ accepted: ReadonlyArray<TextToken>,
499
+ draft: ReadonlyArray<TextToken>,
500
+ ): number {
501
+ const n = Math.min(accepted.length, draft.length);
502
+ let i = 0;
503
+ while (i < n && accepted[i].text === draft[i].text) i++;
504
+ return i;
505
+ }
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Optimistic prefill client (C7) — implements `/v1/prefill` against the
3
+ * llama.cpp REST API in three phases:
4
+ *
5
+ * Phase 1 — `slot/save`: snapshot the pre-user-message KV state so a
6
+ * rollback can restore it if speech continues (SPEECH_ACTIVE_REBOUND).
7
+ *
8
+ * Phase 2 — `POST /completion` with stream=false + cache_prompt=true:
9
+ * run the model's prefill over `partialText` without sampling any
10
+ * output tokens. This warms the KV cache so the subsequent real
11
+ * generation can skip one full prefill RTT.
12
+ *
13
+ * Phase 3 — `slot/save` again: snapshot the post-prefill KV state under a
14
+ * separate name. The voice state machine passes this handle to the
15
+ * verifier so generation resumes from the prefilled position.
16
+ *
17
+ * The upstream `/v1/prefill` endpoint is absent — the fork PR that
18
+ * adds it is tracked in `docs/eliza-1-optimistic-rollback.md`. Until it
19
+ * lands, phases 1–3 are emulated via the existing slot-save REST path. When
20
+ * the upstream endpoint ships the body of `prefillOptimistic` switches to a
21
+ * single REST call — callers see no signature change.
22
+ *
23
+ * Upstream endpoint contract: replace phases 2+3 with a single
24
+ * `POST /v1/prefill { slotId, partialText, eotProb }` once llama.cpp exposes
25
+ * it. That call must run the model prefill against `slotId`, save the resulting
26
+ * KV checkpoint, and return `{ handle, eotProb }`.
27
+ */
28
+ import type { CheckpointHandle, CheckpointManagerLike } from "./checkpoint-manager";
29
+ import type { ContextPartial } from "./eager-context-builder";
30
+ /**
31
+ * Input contract for the optimistic prefill call. `partialText` is the
32
+ * current partial transcript; `eotProb` is the caller's estimate that the
33
+ * user has stopped speaking (from VAD hangover progress or the EOT classifier).
34
+ */
35
+ export interface PrefillOptimisticArgs {
36
+ /** Base URL of the llama-server (`http://host:port`). */
37
+ baseUrl: string;
38
+ /** Slot id pinning this conversation. */
39
+ slotId: string;
40
+ /** Partial transcript to prefill against. Non-empty. */
41
+ partialText: string;
42
+ /**
43
+ * Probability the partial is end-of-turn (0..1). Today recorded as
44
+ * telemetry only; once `/v1/prefill` lands the server uses it to decide
45
+ * whether to also kick the drafter inline.
46
+ */
47
+ eotProb: number;
48
+ /**
49
+ * Deterministic context from `EagerContextBuilder` (C3). Used to build the
50
+ * system prompt passed to the prefill `/completion` call so the KV cache
51
+ * covers both the system prompt and the partial transcript. Optional — when
52
+ * absent, only the partial transcript is prefilled.
53
+ */
54
+ context?: ContextPartial;
55
+ }
56
+ export interface PrefillOptimisticResult {
57
+ /**
58
+ * Handle to the POST-prefill KV snapshot. Pass to
59
+ * `CheckpointManager.restoreCheckpoint` on SPEECH_END so the verifier
60
+ * resumes from the prefilled position.
61
+ */
62
+ checkpointHandle: CheckpointHandle;
63
+ /**
64
+ * Approximate token count of the prefilled text. Derived from a rough
65
+ * whitespace tokenizer since the REST emulation path doesn't return a token count;
66
+ * once the upstream endpoint lands, the server returns the real count.
67
+ */
68
+ tokenCount: number;
69
+ /**
70
+ * Wall-clock milliseconds the prefill round-trip took (phases 1–3).
71
+ */
72
+ prefillMs: number;
73
+ /**
74
+ * Backend label. `slot-save-emulation` = pre-upstream emulation path;
75
+ * `prefill-v1` = native `/v1/prefill` endpoint.
76
+ */
77
+ backend: "slot-save-emulation" | "prefill-v1";
78
+ /**
79
+ * End-of-turn probability echoed back from the server. Today equals the
80
+ * caller's `eotProb` (the emulation path has nothing to refine it with); once the
81
+ * upstream endpoint lands, the server returns its own model estimate.
82
+ */
83
+ eotProb: number;
84
+ }
85
+ export interface PrefillOptimisticOptions {
86
+ checkpointManager: CheckpointManagerLike;
87
+ /**
88
+ * Name to use for the PRE-prefill snapshot (C1 — used by the rollback path
89
+ * on SPEECH_ACTIVE_REBOUND). Defaults to `pre-prefill`.
90
+ */
91
+ preCheckpointName?: string;
92
+ /**
93
+ * Name to use for the POST-prefill snapshot (the one the verifier starts
94
+ * from on SPEECH_END). Defaults to `post-prefill`.
95
+ */
96
+ postCheckpointName?: string;
97
+ /**
98
+ * Optional fetch implementation for tests. Defaults to global `fetch`.
99
+ */
100
+ fetchImpl?: typeof fetch;
101
+ /**
102
+ * Request timeout for the `/completion` prefill call (ms). Default 5 000 ms.
103
+ * The call is a no-sample prefill-only pass, so it should complete in
104
+ * O(transcript_tokens / throughput) — typically well under 1 s for short
105
+ * partials.
106
+ */
107
+ prefillTimeoutMs?: number;
108
+ }
109
+ /**
110
+ * Run the three-phase optimistic prefill and return a checkpoint handle for
111
+ * the post-prefill KV state.
112
+ *
113
+ * Voice state machine wiring:
114
+ * - Call on `PAUSE_TENTATIVE` entry with `eotProb` from the EOT classifier.
115
+ * - On `SPEECH_ACTIVE_REBOUND` (within rollback window): restore to the
116
+ * PRE-prefill checkpoint (C1 saved in phase 1) via the checkpoint manager.
117
+ * The post-prefill handle returned here is no longer needed.
118
+ * - On `SPEECH_END`: pass `result.checkpointHandle` to the verifier so it
119
+ * can resume generation from the prefilled KV state, saving one full
120
+ * prefill RTT.
121
+ */
122
+ export declare function prefillOptimistic(args: PrefillOptimisticArgs, opts: PrefillOptimisticOptions): Promise<PrefillOptimisticResult>;
123
+ //# sourceMappingURL=prefill-client.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prefill-client.d.ts","sourceRoot":"","sources":["prefill-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAGH,OAAO,KAAK,EACX,gBAAgB,EAChB,qBAAqB,EACrB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAM9D;;;;GAIG;AACH,MAAM,WAAW,qBAAqB;IACrC,yDAAyD;IACzD,OAAO,EAAE,MAAM,CAAC;IAChB,yCAAyC;IACzC,MAAM,EAAE,MAAM,CAAC;IACf,yDAAyD;IACzD,WAAW,EAAE,MAAM,CAAC;IACpB;;;;OAIG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;;;;OAKG;IACH,OAAO,CAAC,EAAE,cAAc,CAAC;CACzB;AAED,MAAM,WAAW,uBAAuB;IACvC;;;;OAIG;IACH,gBAAgB,EAAE,gBAAgB,CAAC;IACnC;;;;OAIG;IACH,UAAU,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,OAAO,EAAE,qBAAqB,GAAG,YAAY,CAAC;IAC9C;;;;OAIG;IACH,OAAO,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,wBAAwB;IACxC,iBAAiB,EAAE,qBAAqB,CAAC;IACzC;;;OAGG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B;;;OAGG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;IACzB;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC1B;AAUD;;;;;;;;;;;;GAYG;AACH,wBAAsB,iBAAiB,CACtC,IAAI,EAAE,qBAAqB,EAC3B,IAAI,EAAE,wBAAwB,GAC5B,OAAO,CAAC,uBAAuB,CAAC,CAoDlC"}