@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (893) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +157 -0
  3. package/dist/actions/generate-media.d.ts +59 -0
  4. package/dist/actions/generate-media.d.ts.map +1 -0
  5. package/dist/actions/identify-speaker.d.ts +23 -0
  6. package/dist/actions/identify-speaker.d.ts.map +1 -0
  7. package/dist/actions/transcription-control.d.ts +29 -0
  8. package/dist/actions/transcription-control.d.ts.map +1 -0
  9. package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
  10. package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
  11. package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
  12. package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
  13. package/dist/adapters/capacitor-llama/index.d.ts +18 -0
  14. package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
  15. package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
  16. package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
  17. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
  18. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
  19. package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
  20. package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
  21. package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
  22. package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
  23. package/dist/adapters/capacitor-llama/types.d.ts +338 -0
  24. package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
  25. package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
  26. package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
  27. package/dist/backends/apple-foundation.d.ts +56 -0
  28. package/dist/backends/apple-foundation.d.ts.map +1 -0
  29. package/dist/index.d.ts +8 -37
  30. package/dist/index.d.ts.map +1 -0
  31. package/dist/index.js +38979 -430
  32. package/dist/index.js.map +217 -0
  33. package/dist/local-inference-routes.d.ts +47 -0
  34. package/dist/local-inference-routes.d.ts.map +1 -0
  35. package/dist/provider.d.ts +21 -0
  36. package/dist/provider.d.ts.map +1 -0
  37. package/dist/routes/compat-helpers.d.ts +18 -0
  38. package/dist/routes/compat-helpers.d.ts.map +1 -0
  39. package/dist/routes/family-member-route.d.ts +62 -0
  40. package/dist/routes/family-member-route.d.ts.map +1 -0
  41. package/dist/routes/index.d.ts +20 -0
  42. package/dist/routes/index.d.ts.map +1 -0
  43. package/dist/routes/index.js +42040 -0
  44. package/dist/routes/index.js.map +236 -0
  45. package/dist/routes/live-diarization-route.d.ts +33 -0
  46. package/dist/routes/live-diarization-route.d.ts.map +1 -0
  47. package/dist/routes/local-inference-asr-route.d.ts +4 -0
  48. package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
  49. package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
  50. package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  51. package/dist/routes/local-inference-compat-routes.d.ts +16 -0
  52. package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
  53. package/dist/routes/local-inference-tts-route.d.ts +7 -0
  54. package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/dist/routes/native-pcm-turn-route.d.ts +3 -0
  56. package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
  57. package/dist/routes/transcript-audio-store.d.ts +15 -0
  58. package/dist/routes/transcript-audio-store.d.ts.map +1 -0
  59. package/dist/routes/transcripts-routes.d.ts +44 -0
  60. package/dist/routes/transcripts-routes.d.ts.map +1 -0
  61. package/dist/routes/voice-first-run-routes.d.ts +62 -0
  62. package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
  63. package/dist/routes/voice-models-routes.d.ts +62 -0
  64. package/dist/routes/voice-models-routes.d.ts.map +1 -0
  65. package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
  66. package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  67. package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
  68. package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
  69. package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/dist/runtime/embedding-manager-support.d.ts +77 -0
  72. package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
  73. package/dist/runtime/embedding-presets.d.ts +16 -0
  74. package/dist/runtime/embedding-presets.d.ts.map +1 -0
  75. package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
  76. package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
  77. package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
  78. package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  79. package/dist/runtime/index.d.ts +15 -0
  80. package/dist/runtime/index.d.ts.map +1 -0
  81. package/dist/runtime/index.js +38768 -0
  82. package/dist/runtime/index.js.map +217 -0
  83. package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
  84. package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  85. package/dist/runtime/voice-entity-binding.d.ts +113 -0
  86. package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
  87. package/dist/services/active-model.d.ts +310 -0
  88. package/dist/services/active-model.d.ts.map +1 -0
  89. package/dist/services/asr-provenance.d.ts +5 -0
  90. package/dist/services/asr-provenance.d.ts.map +1 -0
  91. package/dist/services/assignments.d.ts +84 -0
  92. package/dist/services/assignments.d.ts.map +1 -0
  93. package/dist/services/backend-selector.d.ts +55 -0
  94. package/dist/services/backend-selector.d.ts.map +1 -0
  95. package/dist/services/backend.d.ts +440 -0
  96. package/dist/services/backend.d.ts.map +1 -0
  97. package/dist/services/bionic-host-loader.d.ts +67 -0
  98. package/dist/services/bionic-host-loader.d.ts.map +1 -0
  99. package/dist/services/bundled-models.d.ts +34 -0
  100. package/dist/services/bundled-models.d.ts.map +1 -0
  101. package/dist/services/cache-bridge.d.ts +206 -0
  102. package/dist/services/cache-bridge.d.ts.map +1 -0
  103. package/dist/services/catalog.d.ts +10 -0
  104. package/dist/services/catalog.d.ts.map +1 -0
  105. package/dist/services/checkpoint-client.d.ts +109 -0
  106. package/dist/services/checkpoint-client.d.ts.map +1 -0
  107. package/dist/services/checkpoint-manager.d.ts +217 -0
  108. package/dist/services/checkpoint-manager.d.ts.map +1 -0
  109. package/dist/services/cloud-fallback.d.ts +102 -0
  110. package/dist/services/cloud-fallback.d.ts.map +1 -0
  111. package/dist/services/context-fit.d.ts +36 -0
  112. package/dist/services/context-fit.d.ts.map +1 -0
  113. package/dist/services/conversation-registry.d.ts +142 -0
  114. package/dist/services/conversation-registry.d.ts.map +1 -0
  115. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
  116. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  117. package/dist/services/device-bridge.d.ts +188 -0
  118. package/dist/services/device-bridge.d.ts.map +1 -0
  119. package/dist/services/device-resource-metrics.d.ts +149 -0
  120. package/dist/services/device-resource-metrics.d.ts.map +1 -0
  121. package/dist/services/device-tier.d.ts +133 -0
  122. package/dist/services/device-tier.d.ts.map +1 -0
  123. package/dist/services/downloader.d.ts +94 -0
  124. package/dist/services/downloader.d.ts.map +1 -0
  125. package/dist/services/engine.d.ts +579 -0
  126. package/dist/services/engine.d.ts.map +1 -0
  127. package/dist/services/ensure-local-artifacts.d.ts +82 -0
  128. package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
  129. package/dist/services/external-scanner.d.ts +17 -0
  130. package/dist/services/external-scanner.d.ts.map +1 -0
  131. package/dist/services/ffi-llm-mock.d.ts +90 -0
  132. package/dist/services/ffi-llm-mock.d.ts.map +1 -0
  133. package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
  134. package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
  135. package/dist/services/ffi-streaming-backend.d.ts +201 -0
  136. package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
  137. package/dist/services/ffi-streaming-runner.d.ts +146 -0
  138. package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
  139. package/dist/services/gpu-autotune.d.ts +150 -0
  140. package/dist/services/gpu-autotune.d.ts.map +1 -0
  141. package/dist/services/gpu-detect.d.ts +56 -0
  142. package/dist/services/gpu-detect.d.ts.map +1 -0
  143. package/dist/services/handler-registry.d.ts +72 -0
  144. package/dist/services/handler-registry.d.ts.map +1 -0
  145. package/dist/services/hardware.d.ts +63 -0
  146. package/dist/services/hardware.d.ts.map +1 -0
  147. package/dist/services/image-description-runtime.d.ts +14 -0
  148. package/dist/services/image-description-runtime.d.ts.map +1 -0
  149. package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
  150. package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  151. package/dist/services/imagegen/backend-selector.d.ts +118 -0
  152. package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
  153. package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
  154. package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  155. package/dist/services/imagegen/errors.d.ts +16 -0
  156. package/dist/services/imagegen/errors.d.ts.map +1 -0
  157. package/dist/services/imagegen/index.d.ts +58 -0
  158. package/dist/services/imagegen/index.d.ts.map +1 -0
  159. package/dist/services/imagegen/mflux.d.ts +74 -0
  160. package/dist/services/imagegen/mflux.d.ts.map +1 -0
  161. package/dist/services/imagegen/sd-cpp.d.ts +181 -0
  162. package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
  163. package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  164. package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  165. package/dist/services/imagegen/types.d.ts +181 -0
  166. package/dist/services/imagegen/types.d.ts.map +1 -0
  167. package/dist/services/index.d.ts +31 -0
  168. package/dist/services/index.d.ts.map +1 -0
  169. package/dist/services/index.js +39453 -0
  170. package/dist/services/index.js.map +227 -0
  171. package/dist/services/inference-capabilities.d.ts +132 -0
  172. package/dist/services/inference-capabilities.d.ts.map +1 -0
  173. package/dist/services/inference-telemetry.d.ts +59 -0
  174. package/dist/services/inference-telemetry.d.ts.map +1 -0
  175. package/dist/services/ios-llama-streaming.d.ts +119 -0
  176. package/dist/services/ios-llama-streaming.d.ts.map +1 -0
  177. package/dist/services/kv-spill.d.ts +189 -0
  178. package/dist/services/kv-spill.d.ts.map +1 -0
  179. package/dist/services/latency-trace.d.ts +346 -0
  180. package/dist/services/latency-trace.d.ts.map +1 -0
  181. package/dist/services/lib-target.d.ts +55 -0
  182. package/dist/services/lib-target.d.ts.map +1 -0
  183. package/dist/services/live-signals.d.ts +86 -0
  184. package/dist/services/live-signals.d.ts.map +1 -0
  185. package/dist/services/llama-server-metrics.d.ts +114 -0
  186. package/dist/services/llama-server-metrics.d.ts.map +1 -0
  187. package/dist/services/llm-streaming-binding.d.ts +96 -0
  188. package/dist/services/llm-streaming-binding.d.ts.map +1 -0
  189. package/dist/services/load-args.d.ts +82 -0
  190. package/dist/services/load-args.d.ts.map +1 -0
  191. package/dist/services/manifest/index.d.ts +4 -0
  192. package/dist/services/manifest/index.d.ts.map +1 -0
  193. package/dist/services/manifest/schema.d.ts +903 -0
  194. package/dist/services/manifest/schema.d.ts.map +1 -0
  195. package/dist/services/manifest/types.d.ts +32 -0
  196. package/dist/services/manifest/types.d.ts.map +1 -0
  197. package/dist/services/manifest/validator.d.ts +66 -0
  198. package/dist/services/manifest/validator.d.ts.map +1 -0
  199. package/dist/services/memory-arbiter.d.ts +348 -0
  200. package/dist/services/memory-arbiter.d.ts.map +1 -0
  201. package/dist/services/memory-benchmark.d.ts +76 -0
  202. package/dist/services/memory-benchmark.d.ts.map +1 -0
  203. package/dist/services/memory-monitor.d.ts +128 -0
  204. package/dist/services/memory-monitor.d.ts.map +1 -0
  205. package/dist/services/memory-pressure.d.ts +130 -0
  206. package/dist/services/memory-pressure.d.ts.map +1 -0
  207. package/dist/services/mtp-doctor.d.ts +13 -0
  208. package/dist/services/mtp-doctor.d.ts.map +1 -0
  209. package/dist/services/network-policy.d.ts +127 -0
  210. package/dist/services/network-policy.d.ts.map +1 -0
  211. package/dist/services/paths.d.ts +6 -0
  212. package/dist/services/paths.d.ts.map +1 -0
  213. package/dist/services/planner-skeleton.d.ts +124 -0
  214. package/dist/services/planner-skeleton.d.ts.map +1 -0
  215. package/dist/services/providers.d.ts +38 -0
  216. package/dist/services/providers.d.ts.map +1 -0
  217. package/dist/services/ram-budget.d.ts +110 -0
  218. package/dist/services/ram-budget.d.ts.map +1 -0
  219. package/dist/services/readiness.d.ts +9 -0
  220. package/dist/services/readiness.d.ts.map +1 -0
  221. package/dist/services/recommendation.d.ts +111 -0
  222. package/dist/services/recommendation.d.ts.map +1 -0
  223. package/dist/services/registry.d.ts +33 -0
  224. package/dist/services/registry.d.ts.map +1 -0
  225. package/dist/services/router-handler.d.ts +92 -0
  226. package/dist/services/router-handler.d.ts.map +1 -0
  227. package/dist/services/routing-policy.d.ts +92 -0
  228. package/dist/services/routing-policy.d.ts.map +1 -0
  229. package/dist/services/routing-preferences.d.ts +8 -0
  230. package/dist/services/routing-preferences.d.ts.map +1 -0
  231. package/dist/services/runtime-target.d.ts +98 -0
  232. package/dist/services/runtime-target.d.ts.map +1 -0
  233. package/dist/services/service.d.ts +128 -0
  234. package/dist/services/service.d.ts.map +1 -0
  235. package/dist/services/session-pool.d.ts +72 -0
  236. package/dist/services/session-pool.d.ts.map +1 -0
  237. package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
  238. package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
  239. package/dist/services/structured-output/index.d.ts +2 -0
  240. package/dist/services/structured-output/index.d.ts.map +1 -0
  241. package/dist/services/structured-output.d.ts +311 -0
  242. package/dist/services/structured-output.d.ts.map +1 -0
  243. package/dist/services/system-memory.d.ts +33 -0
  244. package/dist/services/system-memory.d.ts.map +1 -0
  245. package/dist/services/types.d.ts +19 -0
  246. package/dist/services/types.d.ts.map +1 -0
  247. package/dist/services/verify-on-device.d.ts +34 -0
  248. package/dist/services/verify-on-device.d.ts.map +1 -0
  249. package/dist/services/verify.d.ts +8 -0
  250. package/dist/services/verify.d.ts.map +1 -0
  251. package/dist/services/vision/aosp-unavailable.d.ts +115 -0
  252. package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
  253. package/dist/services/vision/capacitor-llama.d.ts +99 -0
  254. package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
  255. package/dist/services/vision/cloud-fallback.d.ts +47 -0
  256. package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
  257. package/dist/services/vision/hash.d.ts +71 -0
  258. package/dist/services/vision/hash.d.ts.map +1 -0
  259. package/dist/services/vision/index.d.ts +95 -0
  260. package/dist/services/vision/index.d.ts.map +1 -0
  261. package/dist/services/vision/llama-server.d.ts +73 -0
  262. package/dist/services/vision/llama-server.d.ts.map +1 -0
  263. package/dist/services/vision/types.d.ts +162 -0
  264. package/dist/services/vision/types.d.ts.map +1 -0
  265. package/dist/services/vision/vast-fallback.d.ts +18 -0
  266. package/dist/services/vision/vast-fallback.d.ts.map +1 -0
  267. package/dist/services/vision-embedding-cache.d.ts +98 -0
  268. package/dist/services/vision-embedding-cache.d.ts.map +1 -0
  269. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
  270. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
  271. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
  272. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
  273. package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
  274. package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
  275. package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
  276. package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
  277. package/dist/services/voice/barge-in.d.ts +112 -0
  278. package/dist/services/voice/barge-in.d.ts.map +1 -0
  279. package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
  280. package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
  281. package/dist/services/voice/checkpoint-manager.d.ts +199 -0
  282. package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
  283. package/dist/services/voice/checkpoint-policy.d.ts +178 -0
  284. package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
  285. package/dist/services/voice/corpus-augment.d.ts +111 -0
  286. package/dist/services/voice/corpus-augment.d.ts.map +1 -0
  287. package/dist/services/voice/corpus-generator.d.ts +134 -0
  288. package/dist/services/voice/corpus-generator.d.ts.map +1 -0
  289. package/dist/services/voice/diarization-error-rate.d.ts +40 -0
  290. package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
  291. package/dist/services/voice/e2e-harness.d.ts +297 -0
  292. package/dist/services/voice/e2e-harness.d.ts.map +1 -0
  293. package/dist/services/voice/eager-context-builder.d.ts +170 -0
  294. package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
  295. package/dist/services/voice/echo-delay.d.ts +67 -0
  296. package/dist/services/voice/echo-delay.d.ts.map +1 -0
  297. package/dist/services/voice/echo-metrics.d.ts +7 -0
  298. package/dist/services/voice/echo-metrics.d.ts.map +1 -0
  299. package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
  300. package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
  301. package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
  302. package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  303. package/dist/services/voice/embedding-server.d.ts +37 -0
  304. package/dist/services/voice/embedding-server.d.ts.map +1 -0
  305. package/dist/services/voice/embedding.d.ts +132 -0
  306. package/dist/services/voice/embedding.d.ts.map +1 -0
  307. package/dist/services/voice/emotion-attribution.d.ts +68 -0
  308. package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
  309. package/dist/services/voice/engine-bridge.d.ts +762 -0
  310. package/dist/services/voice/engine-bridge.d.ts.map +1 -0
  311. package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
  312. package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  313. package/dist/services/voice/eot-classifier.d.ts +211 -0
  314. package/dist/services/voice/eot-classifier.d.ts.map +1 -0
  315. package/dist/services/voice/errors.d.ts +20 -0
  316. package/dist/services/voice/errors.d.ts.map +1 -0
  317. package/dist/services/voice/expressive-tags.d.ts +158 -0
  318. package/dist/services/voice/expressive-tags.d.ts.map +1 -0
  319. package/dist/services/voice/ffi-bindings.d.ts +696 -0
  320. package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
  321. package/dist/services/voice/first-line-cache.d.ts +181 -0
  322. package/dist/services/voice/first-line-cache.d.ts.map +1 -0
  323. package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
  324. package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
  325. package/dist/services/voice/index.d.ts +96 -0
  326. package/dist/services/voice/index.d.ts.map +1 -0
  327. package/dist/services/voice/kokoro/index.d.ts +24 -0
  328. package/dist/services/voice/kokoro/index.d.ts.map +1 -0
  329. package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
  330. package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  331. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  332. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  333. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  334. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  335. package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  336. package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  337. package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
  338. package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
  339. package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
  340. package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  341. package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
  342. package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  343. package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
  344. package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  345. package/dist/services/voice/kokoro/types.d.ts +82 -0
  346. package/dist/services/voice/kokoro/types.d.ts.map +1 -0
  347. package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
  348. package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  349. package/dist/services/voice/kokoro/voices.d.ts +30 -0
  350. package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
  351. package/dist/services/voice/lifecycle.d.ts +135 -0
  352. package/dist/services/voice/lifecycle.d.ts.map +1 -0
  353. package/dist/services/voice/live-diarization-session.d.ts +196 -0
  354. package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
  355. package/dist/services/voice/metric-math.d.ts +10 -0
  356. package/dist/services/voice/metric-math.d.ts.map +1 -0
  357. package/dist/services/voice/mic-source.d.ts +136 -0
  358. package/dist/services/voice/mic-source.d.ts.map +1 -0
  359. package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
  360. package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
  361. package/dist/services/voice/optimistic-policy.d.ts +109 -0
  362. package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
  363. package/dist/services/voice/optimistic-rollback.d.ts +151 -0
  364. package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
  365. package/dist/services/voice/partial-stabilizer.d.ts +73 -0
  366. package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
  367. package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
  368. package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  369. package/dist/services/voice/phrase-cache.d.ts +76 -0
  370. package/dist/services/voice/phrase-cache.d.ts.map +1 -0
  371. package/dist/services/voice/phrase-chunker.d.ts +62 -0
  372. package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
  373. package/dist/services/voice/pipeline-impls.d.ts +151 -0
  374. package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
  375. package/dist/services/voice/pipeline.d.ts +216 -0
  376. package/dist/services/voice/pipeline.d.ts.map +1 -0
  377. package/dist/services/voice/prefill-client.d.ts +123 -0
  378. package/dist/services/voice/prefill-client.d.ts.map +1 -0
  379. package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
  380. package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  381. package/dist/services/voice/profile-store.d.ts +248 -0
  382. package/dist/services/voice/profile-store.d.ts.map +1 -0
  383. package/dist/services/voice/ring-buffer.d.ts +40 -0
  384. package/dist/services/voice/ring-buffer.d.ts.map +1 -0
  385. package/dist/services/voice/rollback-queue.d.ts +24 -0
  386. package/dist/services/voice/rollback-queue.d.ts.map +1 -0
  387. package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
  388. package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  389. package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
  390. package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  391. package/dist/services/voice/scheduler.d.ts +146 -0
  392. package/dist/services/voice/scheduler.d.ts.map +1 -0
  393. package/dist/services/voice/self-voice-imprint.d.ts +33 -0
  394. package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
  395. package/dist/services/voice/shared-resources.d.ts +204 -0
  396. package/dist/services/voice/shared-resources.d.ts.map +1 -0
  397. package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  398. package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  399. package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
  400. package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  401. package/dist/services/voice/speaker/diarizer.d.ts +75 -0
  402. package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
  403. package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
  404. package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  405. package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
  406. package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  407. package/dist/services/voice/speaker/encoder.d.ts +37 -0
  408. package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
  409. package/dist/services/voice/speaker-imprint.d.ts +83 -0
  410. package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
  411. package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
  412. package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
  413. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
  414. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
  415. package/dist/services/voice/system-audio-sink.d.ts +73 -0
  416. package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
  417. package/dist/services/voice/transcriber.d.ts +244 -0
  418. package/dist/services/voice/transcriber.d.ts.map +1 -0
  419. package/dist/services/voice/transcript-knowledge.d.ts +37 -0
  420. package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
  421. package/dist/services/voice/transcript-service.d.ts +60 -0
  422. package/dist/services/voice/transcript-service.d.ts.map +1 -0
  423. package/dist/services/voice/transcript-store.d.ts +64 -0
  424. package/dist/services/voice/transcript-store.d.ts.map +1 -0
  425. package/dist/services/voice/turn-controller.d.ts +183 -0
  426. package/dist/services/voice/turn-controller.d.ts.map +1 -0
  427. package/dist/services/voice/types.d.ts +643 -0
  428. package/dist/services/voice/types.d.ts.map +1 -0
  429. package/dist/services/voice/vad.d.ts +283 -0
  430. package/dist/services/voice/vad.d.ts.map +1 -0
  431. package/dist/services/voice/voice-budget.d.ts +241 -0
  432. package/dist/services/voice/voice-budget.d.ts.map +1 -0
  433. package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
  434. package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  435. package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
  436. package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
  437. package/dist/services/voice/voice-preset-format.d.ts +158 -0
  438. package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
  439. package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
  440. package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
  441. package/dist/services/voice/voice-profile-routes.d.ts +83 -0
  442. package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
  443. package/dist/services/voice/voice-scenario.d.ts +131 -0
  444. package/dist/services/voice/voice-scenario.d.ts.map +1 -0
  445. package/dist/services/voice/voice-state-machine.d.ts +364 -0
  446. package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
  447. package/dist/services/voice/voice-workbench-report.d.ts +117 -0
  448. package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
  449. package/dist/services/voice/wake-word-ggml.d.ts +100 -0
  450. package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
  451. package/dist/services/voice/wake-word.d.ts +255 -0
  452. package/dist/services/voice/wake-word.d.ts.map +1 -0
  453. package/dist/services/voice/wav-codec.d.ts +11 -0
  454. package/dist/services/voice/wav-codec.d.ts.map +1 -0
  455. package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
  456. package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
  457. package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
  458. package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
  459. package/dist/services/voice/workbench-logic-services.d.ts +36 -0
  460. package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
  461. package/dist/services/voice/workbench-real-services.d.ts +17 -0
  462. package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
  463. package/dist/services/voice/workbench-scenarios.d.ts +24 -0
  464. package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
  465. package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  466. package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  467. package/dist/services/voice-model-updater.d.ts +240 -0
  468. package/dist/services/voice-model-updater.d.ts.map +1 -0
  469. package/dist/services/voice-prewarm.d.ts +3 -0
  470. package/dist/services/voice-prewarm.d.ts.map +1 -0
  471. package/dist/voice-workbench.d.ts +18 -0
  472. package/dist/voice-workbench.d.ts.map +1 -0
  473. package/dist/voice-workbench.js +5259 -0
  474. package/dist/voice-workbench.js.map +34 -0
  475. package/package.json +101 -15
  476. package/registry-entry.json +137 -0
  477. package/src/actions/generate-media.ts +647 -0
  478. package/src/actions/identify-speaker.ts +171 -0
  479. package/src/actions/transcription-control.test.ts +100 -0
  480. package/src/actions/transcription-control.ts +127 -0
  481. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  482. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  483. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  484. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  485. package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
  486. package/src/adapters/capacitor-llama/environment.ts +71 -0
  487. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  488. package/src/adapters/capacitor-llama/index.ts +831 -0
  489. package/src/adapters/capacitor-llama/loader.ts +109 -0
  490. package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
  491. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  492. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  493. package/src/adapters/capacitor-llama/types.ts +374 -0
  494. package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
  495. package/src/backends/apple-foundation.ts +127 -0
  496. package/src/index.ts +62 -0
  497. package/src/local-inference-routes.test.ts +390 -0
  498. package/src/local-inference-routes.ts +1625 -0
  499. package/src/provider.ts +1111 -0
  500. package/src/routes/compat-helpers.ts +275 -0
  501. package/src/routes/family-member-route.ts +353 -0
  502. package/src/routes/index.ts +61 -0
  503. package/src/routes/live-diarization-route.test.ts +347 -0
  504. package/src/routes/live-diarization-route.ts +198 -0
  505. package/src/routes/local-inference-asr-route.test.ts +246 -0
  506. package/src/routes/local-inference-asr-route.ts +166 -0
  507. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  508. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  509. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  510. package/src/routes/local-inference-compat-routes.ts +775 -0
  511. package/src/routes/local-inference-tts-route.test.ts +179 -0
  512. package/src/routes/local-inference-tts-route.ts +230 -0
  513. package/src/routes/native-pcm-turn-route.test.ts +136 -0
  514. package/src/routes/native-pcm-turn-route.ts +121 -0
  515. package/src/routes/transcript-audio-store.ts +27 -0
  516. package/src/routes/transcripts-routes.test.ts +195 -0
  517. package/src/routes/transcripts-routes.ts +191 -0
  518. package/src/routes/voice-first-run-routes.ts +524 -0
  519. package/src/routes/voice-models-routes.ts +554 -0
  520. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  521. package/src/routes/voice-profiles-management-routes.ts +476 -0
  522. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  523. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  524. package/src/runtime/bionic-wire-encoding.test.ts +147 -0
  525. package/src/runtime/capacitor-llama.d.ts +25 -0
  526. package/src/runtime/embedding-manager-support.ts +497 -0
  527. package/src/runtime/embedding-presets.ts +81 -0
  528. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  529. package/src/runtime/embedding-warmup-policy.ts +48 -0
  530. package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
  531. package/src/runtime/ensure-local-inference-handler.ts +1640 -0
  532. package/src/runtime/index.ts +36 -0
  533. package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
  534. package/src/runtime/mobile-local-inference-gate.ts +99 -0
  535. package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
  536. package/src/runtime/voice-entity-binding.ts +368 -0
  537. package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
  538. package/src/services/README.md +71 -0
  539. package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
  540. package/src/services/__tests__/backend-selector.test.ts +101 -0
  541. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  542. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  543. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  544. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  545. package/src/services/__tests__/runtime-target.test.ts +176 -0
  546. package/src/services/active-model-context-fit.test.ts +125 -0
  547. package/src/services/active-model-switch-rollback.test.ts +183 -0
  548. package/src/services/active-model.ts +1416 -0
  549. package/src/services/asr-provenance.ts +68 -0
  550. package/src/services/assignment-validation.test.ts +118 -0
  551. package/src/services/assignments.test.ts +106 -0
  552. package/src/services/assignments.ts +278 -0
  553. package/src/services/backend-selector.ts +95 -0
  554. package/src/services/backend.test.ts +84 -0
  555. package/src/services/backend.ts +791 -0
  556. package/src/services/bionic-host-loader.test.ts +226 -0
  557. package/src/services/bionic-host-loader.ts +252 -0
  558. package/src/services/bundled-models.ts +129 -0
  559. package/src/services/cache-bridge.test.ts +516 -0
  560. package/src/services/cache-bridge.ts +423 -0
  561. package/src/services/catalog.test.ts +259 -0
  562. package/src/services/catalog.ts +33 -0
  563. package/src/services/checkpoint-client.ts +258 -0
  564. package/src/services/checkpoint-manager.ts +474 -0
  565. package/src/services/cloud-fallback.ts +230 -0
  566. package/src/services/context-fit.test.ts +121 -0
  567. package/src/services/context-fit.ts +113 -0
  568. package/src/services/conversation-registry.test.ts +235 -0
  569. package/src/services/conversation-registry.ts +264 -0
  570. package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
  571. package/src/services/device-bridge.ts +1237 -0
  572. package/src/services/device-resource-metrics.test.ts +98 -0
  573. package/src/services/device-resource-metrics.ts +346 -0
  574. package/src/services/device-tier.test.ts +458 -0
  575. package/src/services/device-tier.ts +502 -0
  576. package/src/services/downloader.test.ts +888 -0
  577. package/src/services/downloader.ts +1039 -0
  578. package/src/services/engine-direct-bundle.test.ts +90 -0
  579. package/src/services/engine-streaming.test.ts +80 -0
  580. package/src/services/engine.ts +2096 -0
  581. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  582. package/src/services/ensure-local-artifacts.test.ts +368 -0
  583. package/src/services/ensure-local-artifacts.ts +351 -0
  584. package/src/services/external-scanner.ts +312 -0
  585. package/src/services/ffi-llm-mock.ts +354 -0
  586. package/src/services/ffi-llm-streaming-abi.ts +445 -0
  587. package/src/services/ffi-streaming-backend.ts +418 -0
  588. package/src/services/ffi-streaming-runner.test.ts +220 -0
  589. package/src/services/ffi-streaming-runner.ts +407 -0
  590. package/src/services/ffi-unload-ordering.test.ts +166 -0
  591. package/src/services/fused-eliza1-no-regression.test.ts +144 -0
  592. package/src/services/gpu-autotune.ts +534 -0
  593. package/src/services/gpu-detect.ts +139 -0
  594. package/src/services/handler-registry.ts +240 -0
  595. package/src/services/hardware.test.ts +236 -0
  596. package/src/services/hardware.ts +438 -0
  597. package/src/services/image-description-runtime.test.ts +61 -0
  598. package/src/services/image-description-runtime.ts +118 -0
  599. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  600. package/src/services/imagegen/backend-selector.test.ts +190 -0
  601. package/src/services/imagegen/backend-selector.ts +277 -0
  602. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  603. package/src/services/imagegen/errors.ts +40 -0
  604. package/src/services/imagegen/index.ts +144 -0
  605. package/src/services/imagegen/mflux.ts +313 -0
  606. package/src/services/imagegen/sd-cpp.ts +715 -0
  607. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  608. package/src/services/imagegen/types.ts +193 -0
  609. package/src/services/index.ts +229 -0
  610. package/src/services/inference-capabilities.test.ts +75 -0
  611. package/src/services/inference-capabilities.ts +204 -0
  612. package/src/services/inference-telemetry.ts +143 -0
  613. package/src/services/ios-llama-streaming.ts +248 -0
  614. package/src/services/kv-spill.test.ts +222 -0
  615. package/src/services/kv-spill.ts +357 -0
  616. package/src/services/latency-trace.test.ts +266 -0
  617. package/src/services/latency-trace.ts +844 -0
  618. package/src/services/lib-target.test.ts +145 -0
  619. package/src/services/lib-target.ts +102 -0
  620. package/src/services/live-signals.test.ts +132 -0
  621. package/src/services/live-signals.ts +177 -0
  622. package/src/services/llama-server-metrics.test.ts +168 -0
  623. package/src/services/llama-server-metrics.ts +304 -0
  624. package/src/services/llm-streaming-binding.ts +136 -0
  625. package/src/services/load-args.ts +81 -0
  626. package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
  627. package/src/services/manifest/index.ts +72 -0
  628. package/src/services/manifest/manifest.test.ts +791 -0
  629. package/src/services/manifest/schema.ts +761 -0
  630. package/src/services/manifest/types.ts +61 -0
  631. package/src/services/manifest/validator.ts +633 -0
  632. package/src/services/memory-arbiter.test.ts +558 -0
  633. package/src/services/memory-arbiter.ts +991 -0
  634. package/src/services/memory-benchmark.test.ts +91 -0
  635. package/src/services/memory-benchmark.ts +354 -0
  636. package/src/services/memory-monitor.test.ts +232 -0
  637. package/src/services/memory-monitor.ts +309 -0
  638. package/src/services/memory-pressure.ts +414 -0
  639. package/src/services/mtp-doctor.ts +86 -0
  640. package/src/services/network-policy.ts +346 -0
  641. package/src/services/paths.ts +25 -0
  642. package/src/services/planner-skeleton.ts +175 -0
  643. package/src/services/providers.ts +507 -0
  644. package/src/services/ram-budget-cache.test.ts +164 -0
  645. package/src/services/ram-budget.ts +309 -0
  646. package/src/services/readiness.test.ts +87 -0
  647. package/src/services/readiness.ts +238 -0
  648. package/src/services/recommendation.test.ts +216 -0
  649. package/src/services/recommendation.ts +671 -0
  650. package/src/services/registry.ts +157 -0
  651. package/src/services/required-kernels-gate.test.ts +64 -0
  652. package/src/services/router-handler.test.ts +45 -0
  653. package/src/services/router-handler.ts +426 -0
  654. package/src/services/routing-policy.test.ts +352 -0
  655. package/src/services/routing-policy.ts +367 -0
  656. package/src/services/routing-preferences.ts +17 -0
  657. package/src/services/runtime-target.ts +154 -0
  658. package/src/services/service.test.ts +223 -0
  659. package/src/services/service.ts +750 -0
  660. package/src/services/session-pool.ts +153 -0
  661. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  662. package/src/services/structured-output/deterministic-repair.ts +443 -0
  663. package/src/services/structured-output/index.ts +4 -0
  664. package/src/services/structured-output.test.ts +483 -0
  665. package/src/services/structured-output.ts +712 -0
  666. package/src/services/system-memory.test.ts +47 -0
  667. package/src/services/system-memory.ts +67 -0
  668. package/src/services/transcription-priority.test.ts +211 -0
  669. package/src/services/types.ts +59 -0
  670. package/src/services/verify-on-device.test.ts +87 -0
  671. package/src/services/verify-on-device.ts +127 -0
  672. package/src/services/verify.ts +13 -0
  673. package/src/services/vision/aosp-unavailable.ts +163 -0
  674. package/src/services/vision/capacitor-llama.ts +255 -0
  675. package/src/services/vision/cloud-fallback.test.ts +243 -0
  676. package/src/services/vision/cloud-fallback.ts +268 -0
  677. package/src/services/vision/fallback-chain.test.ts +86 -0
  678. package/src/services/vision/hash.ts +157 -0
  679. package/src/services/vision/index.ts +251 -0
  680. package/src/services/vision/llama-server.ts +177 -0
  681. package/src/services/vision/types.ts +163 -0
  682. package/src/services/vision/vast-fallback.ts +127 -0
  683. package/src/services/vision-embedding-cache.ts +189 -0
  684. package/src/services/voice/VOICE_WORKBENCH.md +133 -0
  685. package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
  686. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  687. package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
  688. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  689. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  690. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  691. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  692. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  693. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  694. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  695. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  696. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  697. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  698. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  699. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  700. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  701. package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
  702. package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
  703. package/src/services/voice/asr-timed.real.test.ts +139 -0
  704. package/src/services/voice/audio-frame-consumer.test.ts +669 -0
  705. package/src/services/voice/audio-frame-consumer.ts +651 -0
  706. package/src/services/voice/barge-in.test.ts +244 -0
  707. package/src/services/voice/barge-in.ts +335 -0
  708. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  709. package/src/services/voice/cancellation-coordinator.ts +269 -0
  710. package/src/services/voice/checkpoint-manager.ts +401 -0
  711. package/src/services/voice/checkpoint-policy.ts +336 -0
  712. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  713. package/src/services/voice/corpus-augment.test.ts +276 -0
  714. package/src/services/voice/corpus-augment.ts +451 -0
  715. package/src/services/voice/corpus-generator.test.ts +201 -0
  716. package/src/services/voice/corpus-generator.ts +413 -0
  717. package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
  718. package/src/services/voice/diarization-error-rate.test.ts +100 -0
  719. package/src/services/voice/diarization-error-rate.ts +249 -0
  720. package/src/services/voice/e2e-harness.der.test.ts +94 -0
  721. package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
  722. package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
  723. package/src/services/voice/e2e-harness.test.ts +182 -0
  724. package/src/services/voice/e2e-harness.ts +902 -0
  725. package/src/services/voice/eager-context-builder.ts +262 -0
  726. package/src/services/voice/echo-delay.test.ts +118 -0
  727. package/src/services/voice/echo-delay.ts +135 -0
  728. package/src/services/voice/echo-metrics.test.ts +17 -0
  729. package/src/services/voice/echo-metrics.ts +20 -0
  730. package/src/services/voice/echo-reference-buffer.test.ts +86 -0
  731. package/src/services/voice/echo-reference-buffer.ts +165 -0
  732. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  733. package/src/services/voice/embedding-server.ts +200 -0
  734. package/src/services/voice/embedding.test.ts +131 -0
  735. package/src/services/voice/embedding.ts +242 -0
  736. package/src/services/voice/emotion-attribution.test.ts +129 -0
  737. package/src/services/voice/emotion-attribution.ts +361 -0
  738. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  739. package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
  740. package/src/services/voice/engine-bridge.test.ts +384 -0
  741. package/src/services/voice/engine-bridge.ts +2343 -0
  742. package/src/services/voice/eot-classifier-ggml.ts +569 -0
  743. package/src/services/voice/eot-classifier.test.ts +98 -0
  744. package/src/services/voice/eot-classifier.ts +422 -0
  745. package/src/services/voice/errors.ts +34 -0
  746. package/src/services/voice/expressive-tags.asr.test.ts +77 -0
  747. package/src/services/voice/expressive-tags.test.ts +102 -0
  748. package/src/services/voice/expressive-tags.ts +405 -0
  749. package/src/services/voice/ffi-bindings.test.ts +735 -0
  750. package/src/services/voice/ffi-bindings.ts +3387 -0
  751. package/src/services/voice/first-line-cache.ts +725 -0
  752. package/src/services/voice/fused-eot-scorer.ts +139 -0
  753. package/src/services/voice/index.ts +502 -0
  754. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
  755. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
  756. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  757. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  758. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  759. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  760. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  761. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
  762. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  763. package/src/services/voice/kokoro/index.ts +79 -0
  764. package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
  765. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  766. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  767. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  768. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  769. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  770. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  771. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  772. package/src/services/voice/kokoro/runtime-selection.ts +64 -0
  773. package/src/services/voice/kokoro/types.ts +95 -0
  774. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  775. package/src/services/voice/kokoro/voices.ts +64 -0
  776. package/src/services/voice/lifecycle.test.ts +315 -0
  777. package/src/services/voice/lifecycle.ts +301 -0
  778. package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
  779. package/src/services/voice/live-diarization-session.ts +622 -0
  780. package/src/services/voice/metric-math.test.ts +61 -0
  781. package/src/services/voice/metric-math.ts +25 -0
  782. package/src/services/voice/mic-source.test.ts +210 -0
  783. package/src/services/voice/mic-source.ts +503 -0
  784. package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
  785. package/src/services/voice/nlms-echo-canceller.ts +317 -0
  786. package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
  787. package/src/services/voice/optimistic-policy.test.ts +101 -0
  788. package/src/services/voice/optimistic-policy.ts +192 -0
  789. package/src/services/voice/optimistic-rollback.ts +343 -0
  790. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  791. package/src/services/voice/partial-stabilizer.ts +140 -0
  792. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  793. package/src/services/voice/phrase-cache.test.ts +242 -0
  794. package/src/services/voice/phrase-cache.ts +186 -0
  795. package/src/services/voice/phrase-chunker.test.ts +239 -0
  796. package/src/services/voice/phrase-chunker.ts +281 -0
  797. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  798. package/src/services/voice/pipeline-impls.test.ts +292 -0
  799. package/src/services/voice/pipeline-impls.ts +315 -0
  800. package/src/services/voice/pipeline.ts +504 -0
  801. package/src/services/voice/prefill-client.ts +316 -0
  802. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  803. package/src/services/voice/profile-store.ts +887 -0
  804. package/src/services/voice/real-audio-decode.test.ts +148 -0
  805. package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
  806. package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
  807. package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
  808. package/src/services/voice/ring-buffer.test.ts +129 -0
  809. package/src/services/voice/ring-buffer.ts +123 -0
  810. package/src/services/voice/rollback-queue.ts +74 -0
  811. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  812. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  813. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  814. package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
  815. package/src/services/voice/scheduler.t2.test.ts +141 -0
  816. package/src/services/voice/scheduler.ts +927 -0
  817. package/src/services/voice/self-voice-imprint.test.ts +59 -0
  818. package/src/services/voice/self-voice-imprint.ts +102 -0
  819. package/src/services/voice/shared-resources.ts +343 -0
  820. package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
  821. package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
  822. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  823. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  824. package/src/services/voice/speaker/diarizer.ts +218 -0
  825. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  826. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  827. package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
  828. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  829. package/src/services/voice/speaker/encoder.ts +105 -0
  830. package/src/services/voice/speaker-imprint.test.ts +185 -0
  831. package/src/services/voice/speaker-imprint.ts +312 -0
  832. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  833. package/src/services/voice/speaker-preset-cache.ts +195 -0
  834. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  835. package/src/services/voice/system-audio-sink.test.ts +29 -0
  836. package/src/services/voice/system-audio-sink.ts +366 -0
  837. package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
  838. package/src/services/voice/transcriber.test.ts +392 -0
  839. package/src/services/voice/transcriber.ts +704 -0
  840. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  841. package/src/services/voice/transcript-knowledge.ts +75 -0
  842. package/src/services/voice/transcript-service.test.ts +195 -0
  843. package/src/services/voice/transcript-service.ts +205 -0
  844. package/src/services/voice/transcript-store.test.ts +189 -0
  845. package/src/services/voice/transcript-store.ts +164 -0
  846. package/src/services/voice/turn-controller.test.ts +575 -0
  847. package/src/services/voice/turn-controller.ts +596 -0
  848. package/src/services/voice/types.ts +699 -0
  849. package/src/services/voice/vad.test.ts +498 -0
  850. package/src/services/voice/vad.ts +832 -0
  851. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  852. package/src/services/voice/voice-budget.test.ts +415 -0
  853. package/src/services/voice/voice-budget.ts +635 -0
  854. package/src/services/voice/voice-duet.test.ts +375 -0
  855. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  856. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  857. package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
  858. package/src/services/voice/voice-preload-predictor.test.ts +130 -0
  859. package/src/services/voice/voice-preload-predictor.ts +113 -0
  860. package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
  861. package/src/services/voice/voice-preset-format.test.ts +75 -0
  862. package/src/services/voice/voice-preset-format.ts +713 -0
  863. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  864. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  865. package/src/services/voice/voice-profile-artifact.ts +518 -0
  866. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  867. package/src/services/voice/voice-profile-routes.ts +425 -0
  868. package/src/services/voice/voice-scenario.test.ts +159 -0
  869. package/src/services/voice/voice-scenario.ts +280 -0
  870. package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
  871. package/src/services/voice/voice-state-machine.ts +727 -0
  872. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  873. package/src/services/voice/voice-workbench-report.ts +367 -0
  874. package/src/services/voice/voice-workbench.test.ts +158 -0
  875. package/src/services/voice/voice.test.ts +1070 -0
  876. package/src/services/voice/wake-word-ggml.ts +319 -0
  877. package/src/services/voice/wake-word.test.ts +298 -0
  878. package/src/services/voice/wake-word.ts +554 -0
  879. package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
  880. package/src/services/voice/wav-codec.test.ts +32 -0
  881. package/src/services/voice/wav-codec.ts +101 -0
  882. package/src/services/voice/workbench-entrypoint.test.ts +55 -0
  883. package/src/services/voice/workbench-entrypoint.ts +88 -0
  884. package/src/services/voice/workbench-headless-runner.test.ts +162 -0
  885. package/src/services/voice/workbench-headless-runner.ts +396 -0
  886. package/src/services/voice/workbench-logic-services.test.ts +225 -0
  887. package/src/services/voice/workbench-logic-services.ts +184 -0
  888. package/src/services/voice/workbench-real-services.ts +629 -0
  889. package/src/services/voice/workbench-scenarios.ts +407 -0
  890. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  891. package/src/services/voice-model-updater.ts +724 -0
  892. package/src/services/voice-prewarm.ts +51 -0
  893. package/src/voice-workbench.ts +71 -0
@@ -0,0 +1,357 @@
1
+ /**
2
+ * CPU-offloaded KV-cache spill policy.
3
+ *
4
+ * packages/inference/AGENTS.md §3 item 7 mandates that for context > 64k on a
5
+ * device whose RAM cannot hold the full KV cache, the runtime MUST implement
6
+ * *spill* — keep the hot KV pages resident, page the cold ones out to CPU RAM
7
+ * (or, when even that is insufficient, to disk) — rather than refusing the
8
+ * request. AGENTS.md §3 "Failure handling" is equally explicit that the spill
9
+ * is gated by a real latency budget: a device where paging the cold KV back in
10
+ * would miss the voice first-audio-latency target must HARD-FAIL with a
11
+ * structured error, not silently serve a slow session.
12
+ *
13
+ * This module is the policy core. It is pure arithmetic — no llama-server
14
+ * process management, no native binding. `ffi-streaming-backend.ts` consults
15
+ * `planKvSpill()` at activation time:
16
+ * - `mode: "resident"` → no spill needed; load normally.
17
+ * - `mode: "spill"` → pass the resulting `residentPages` /
18
+ * `spillBytes` / tier ("cpu" | "disk") down to the
19
+ * backend as a `--kv-spill` hint.
20
+ * - `mode: "unsupported"` → throw `KvSpillUnsupportedError` so the engine
21
+ * surfaces a structured 4xx to the UI.
22
+ *
23
+ * Model parameters (page size, per-page bandwidth, voice latency budget) are
24
+ * documented constants below — the only "measured" inputs are the device's
25
+ * memory bandwidth class and the KV geometry of the loaded bundle. We do not
26
+ * pretend to micro-benchmark the disk here; the bandwidth tiers are coarse
27
+ * and conservative, and the gate fails *closed*.
28
+ */
29
+
30
+ import type { RamBudget } from "./types";
31
+
32
+ /** Context length below which spill never applies (AGENTS.md §3 item 7). */
33
+ export const KV_SPILL_MIN_CONTEXT = 65536;
34
+
35
+ /**
36
+ * KV-cache page granularity, in tokens. The runtime evicts/restores KV in
37
+ * page units, not per-token, so spill accounting is page-aligned. 256 tokens
38
+ * is the buun-llama-cpp fork's default `--kv-page-size` for the spillable
39
+ * cache; keep this in sync if that default changes.
40
+ */
41
+ export const KV_PAGE_TOKENS = 256;
42
+
43
+ /**
44
+ * First-audio-latency budget for voice mode, in milliseconds. The streaming
45
+ * contract (AGENTS.md §4) wants the phrase chunker handing the first chunk to
46
+ * TTS inside a scheduler tick; a cold KV restore at decode time eats directly
47
+ * into this budget. If the worst-case restore for the spilled pages exceeds
48
+ * this, spill is not viable for a voice-enabled bundle and we hard-fail.
49
+ *
50
+ * Text-only bundles get the looser `KV_SPILL_TEXT_LATENCY_BUDGET_MS`.
51
+ */
52
+ export const KV_SPILL_VOICE_LATENCY_BUDGET_MS = 200;
53
+ export const KV_SPILL_TEXT_LATENCY_BUDGET_MS = 1500;
54
+
55
+ /**
56
+ * Effective KV transfer bandwidth back into the attention kernel, by storage
57
+ * tier and host class, in bytes per millisecond (≈ GB/s). Conservative — the
58
+ * gate fails closed, so under-estimating bandwidth only makes us refuse more
59
+ * aggressively, never serve something too slow.
60
+ *
61
+ * - `cpu`/`apple` : Apple Silicon shared memory — "spilling to CPU" is
62
+ * mostly an accounting move (same physical RAM, different
63
+ * residency bookkeeping); effective restore bandwidth is
64
+ * high.
65
+ * - `cpu`/`pcie` : discrete-GPU x86 — cold KV pages live in host RAM and
66
+ * ride the PCIe bus back to VRAM. PCIe 4.0 x16 ≈ 25 GB/s
67
+ * after framing; we budget 12.
68
+ * - `disk`/`nvme` : NVMe SSD — sequential read ≈ 3 GB/s; we budget 1.5.
69
+ * - `disk`/`sata` : SATA SSD / spinning rust fallback — ≈ 0.4 GB/s; we
70
+ * budget 0.25. (Mostly here so the math is defined; in
71
+ * practice this tier fails the gate immediately.)
72
+ */
73
+ const KV_RESTORE_BANDWIDTH_BYTES_PER_MS = {
74
+ "cpu-apple": 40_000_000,
75
+ "cpu-pcie": 12_000_000,
76
+ "disk-nvme": 1_500_000,
77
+ "disk-sata": 250_000,
78
+ } as const;
79
+
80
+ export type KvRestoreClass = keyof typeof KV_RESTORE_BANDWIDTH_BYTES_PER_MS;
81
+
82
+ /**
83
+ * Per-token KV-cache footprint of a loaded bundle, summed across all
84
+ * full-attention layers, for the *quantized* cache it actually ships with
85
+ * (QJL K + PolarQuant/TurboQuant V — see packages/training/AGENTS.md §3).
86
+ * Callers derive this from the bundle's manifest / catalog runtime block;
87
+ * `estimateQuantizedKvBytesPerToken()` is the fallback when only the param
88
+ * count is known.
89
+ */
90
+ export interface KvGeometry {
91
+ /** Bytes of compressed KV the cache grows by, per generated token. */
92
+ bytesPerToken: number;
93
+ /** True when the loaded bundle has voice enabled (tighter latency gate). */
94
+ voiceEnabled: boolean;
95
+ }
96
+
97
+ /**
98
+ * Fallback per-token KV estimate when the manifest doesn't carry an explicit
99
+ * figure. Order-of-magnitude only. Gemma 4 E2B uses stock q8_0 KV over an MQA
100
+ * geometry (n_head_kv=1) with dual head dims — 512 global / 256 SWA — summed
101
+ * over the global + sliding-window attention layers. For Gemma-4-class
102
+ * geometry that lands roughly at the table below (bytes/token across the whole
103
+ * cache). These are the figures the catalog's per-tier `ramBudgetMb` was sized
104
+ * against.
105
+ */
106
+ const QUANTIZED_KV_BYTES_PER_TOKEN_BY_PARAMS: Readonly<Record<string, number>> =
107
+ {
108
+ "0.8B": 1_400,
109
+ "2B": 2_400,
110
+ "4B": 4_800,
111
+ "9B": 9_000,
112
+ "27B": 22_000,
113
+ };
114
+
115
+ export function estimateQuantizedKvBytesPerToken(params: string): number {
116
+ const known = QUANTIZED_KV_BYTES_PER_TOKEN_BY_PARAMS[params];
117
+ if (known !== undefined) return known;
118
+ // Unknown param string — fail closed by assuming the largest tier's
119
+ // footprint so a mis-tagged bundle errs toward refusing spill rather than
120
+ // toward over-promising residency.
121
+ return QUANTIZED_KV_BYTES_PER_TOKEN_BY_PARAMS["27B"];
122
+ }
123
+
124
+ /**
125
+ * Where the spilled pages land. `"cpu"` = host RAM (still RAM, just not
126
+ * counted against the resident budget); `"disk"` = the local-inference cache
127
+ * directory on persistent storage.
128
+ */
129
+ export type KvSpillTier = "cpu" | "disk";
130
+
131
+ export interface KvSpillPlanResident {
132
+ mode: "resident";
133
+ /** The whole KV cache fits in the resident budget; nothing spills. */
134
+ totalKvBytes: number;
135
+ residentBytes: number;
136
+ }
137
+
138
+ export interface KvSpillPlanSpill {
139
+ mode: "spill";
140
+ tier: KvSpillTier;
141
+ /** Pages kept resident (the hot tail of the context). */
142
+ residentPages: number;
143
+ /** Pages paged out to `tier`. */
144
+ spillPages: number;
145
+ /** Bytes of KV held resident. */
146
+ residentBytes: number;
147
+ /** Bytes of KV spilled to `tier`. */
148
+ spillBytes: number;
149
+ /** Total compressed KV footprint at full context. */
150
+ totalKvBytes: number;
151
+ /** Worst-case latency to restore one cold page, in ms. */
152
+ worstCaseRestoreMs: number;
153
+ /** The latency budget this plan was checked against, in ms. */
154
+ latencyBudgetMs: number;
155
+ }
156
+
157
+ export type KvSpillPlan = KvSpillPlanResident | KvSpillPlanSpill;
158
+
159
+ /**
160
+ * Structured error thrown when spill cannot meet the latency budget. The
161
+ * engine catches this and surfaces it to the UI as a 4xx with `code` and
162
+ * `details` intact — there is NO silent-slow fallback (AGENTS.md §3).
163
+ */
164
+ export class KvSpillUnsupportedError extends Error {
165
+ readonly code = "kv-spill-unsupported";
166
+ readonly details: {
167
+ requestedContext: number;
168
+ totalKvBytes: number;
169
+ residentBytes: number;
170
+ spillBytes: number;
171
+ worstCaseRestoreMs: number;
172
+ latencyBudgetMs: number;
173
+ restoreClass: KvRestoreClass;
174
+ voiceEnabled: boolean;
175
+ };
176
+
177
+ constructor(details: KvSpillUnsupportedError["details"]) {
178
+ super(
179
+ `KV-cache spill for a ${details.requestedContext}-token context cannot ` +
180
+ `meet the ${
181
+ details.voiceEnabled ? "voice" : "text"
182
+ } latency budget on this device: worst-case cold-page restore is ` +
183
+ `${details.worstCaseRestoreMs.toFixed(1)}ms vs a ${
184
+ details.latencyBudgetMs
185
+ }ms budget (${details.restoreClass}, ${(
186
+ details.spillBytes / 1024 / 1024
187
+ ).toFixed(0)} MiB would spill). Use a smaller context variant or a ` +
188
+ `device with more RAM / faster storage.`,
189
+ );
190
+ this.name = "KvSpillUnsupportedError";
191
+ this.details = details;
192
+ }
193
+ }
194
+
195
+ /**
196
+ * Inputs to `planKvSpill`. `residentKvBudgetBytes` is the slice of the RAM
197
+ * budget the runtime is willing to hand to the *resident* KV cache after
198
+ * weights + activations + the TTS/ASR working sets are accounted for; callers
199
+ * derive it from `RamBudget` via `residentKvBudgetFromRamBudget()`.
200
+ */
201
+ export interface KvSpillInput {
202
+ requestedContext: number;
203
+ geometry: KvGeometry;
204
+ residentKvBudgetBytes: number;
205
+ restoreClass: KvRestoreClass;
206
+ /**
207
+ * True when the host can spill to CPU RAM (host RAM available beyond the
208
+ * resident budget). When false the spill tier degrades to `"disk"`.
209
+ */
210
+ cpuSpillAvailable: boolean;
211
+ }
212
+
213
+ /**
214
+ * Slice the resident-KV budget out of a model's `RamBudget`. The recommended
215
+ * budget covers weights + activations + voice working sets + KV; we reserve a
216
+ * fixed fraction for KV. This mirrors what `recommendation.ts` already assumes
217
+ * implicitly when it sizes tiers — kept as one constant so the spill policy
218
+ * and the recommender agree.
219
+ */
220
+ export const RESIDENT_KV_BUDGET_FRACTION = 0.25;
221
+
222
+ export function residentKvBudgetFromRamBudget(budget: RamBudget): number {
223
+ return Math.floor(
224
+ budget.recommendedMb * 1024 * 1024 * RESIDENT_KV_BUDGET_FRACTION,
225
+ );
226
+ }
227
+
228
+ function pagesForTokens(tokens: number): number {
229
+ return Math.ceil(tokens / KV_PAGE_TOKENS);
230
+ }
231
+
232
+ /**
233
+ * Decide the KV-cache placement for a requested context.
234
+ *
235
+ * Returns `{ mode: "resident" }` when the whole compressed KV fits the
236
+ * resident budget; `{ mode: "spill", ... }` when it fits with paging and the
237
+ * cold-page restore stays inside the latency budget; throws
238
+ * `KvSpillUnsupportedError` when spill would miss the budget.
239
+ *
240
+ * Below `KV_SPILL_MIN_CONTEXT` this is always `{ mode: "resident" }` — there
241
+ * is no spill at short context, by contract.
242
+ */
243
+ export function planKvSpill(input: KvSpillInput): KvSpillPlan {
244
+ const { requestedContext, geometry, residentKvBudgetBytes } = input;
245
+
246
+ if (
247
+ !Number.isFinite(requestedContext) ||
248
+ requestedContext <= 0 ||
249
+ !Number.isFinite(geometry.bytesPerToken) ||
250
+ geometry.bytesPerToken <= 0
251
+ ) {
252
+ throw new Error(
253
+ `[kv-spill] planKvSpill needs a positive context and bytesPerToken; got context=${requestedContext}, bytesPerToken=${geometry.bytesPerToken}`,
254
+ );
255
+ }
256
+ if (residentKvBudgetBytes <= 0) {
257
+ throw new Error(
258
+ `[kv-spill] residentKvBudgetBytes must be positive; got ${residentKvBudgetBytes}`,
259
+ );
260
+ }
261
+
262
+ const pageBytes = geometry.bytesPerToken * KV_PAGE_TOKENS;
263
+ const totalPages = pagesForTokens(requestedContext);
264
+ const totalKvBytes = totalPages * pageBytes;
265
+
266
+ // Whole cache fits resident — no spill, regardless of context length.
267
+ if (totalKvBytes <= residentKvBudgetBytes) {
268
+ return {
269
+ mode: "resident",
270
+ totalKvBytes,
271
+ residentBytes: totalKvBytes,
272
+ };
273
+ }
274
+
275
+ // Below the contract floor, spill is not on the table: a 64k-or-less
276
+ // context that doesn't fit the resident budget is a wrong-tier-for-device
277
+ // situation, not a spill case. The recommender's RAM gate should have
278
+ // already excluded this; treat it as unsupported with the same structured
279
+ // error so the engine surfaces it cleanly rather than half-loading.
280
+ if (requestedContext < KV_SPILL_MIN_CONTEXT) {
281
+ throw new KvSpillUnsupportedError({
282
+ requestedContext,
283
+ totalKvBytes,
284
+ residentBytes: residentKvBudgetBytes,
285
+ spillBytes: totalKvBytes - residentKvBudgetBytes,
286
+ worstCaseRestoreMs: 0,
287
+ latencyBudgetMs: 0,
288
+ restoreClass: input.restoreClass,
289
+ voiceEnabled: geometry.voiceEnabled,
290
+ });
291
+ }
292
+
293
+ const residentPages = Math.max(
294
+ 1,
295
+ Math.floor(residentKvBudgetBytes / pageBytes),
296
+ );
297
+ const spillPages = totalPages - residentPages;
298
+ const residentBytes = residentPages * pageBytes;
299
+ const spillBytes = spillPages * pageBytes;
300
+
301
+ const tier: KvSpillTier = input.cpuSpillAvailable ? "cpu" : "disk";
302
+ // When CPU spill isn't available the only restore class that makes sense is
303
+ // a disk one; if the caller handed us a `cpu-*` class, downgrade to NVMe.
304
+ const restoreClass: KvRestoreClass =
305
+ tier === "disk" && input.restoreClass.startsWith("cpu-")
306
+ ? "disk-nvme"
307
+ : input.restoreClass;
308
+ const bandwidth = KV_RESTORE_BANDWIDTH_BYTES_PER_MS[restoreClass];
309
+
310
+ // Worst case at decode time: a single cold page faulted back in. (Spilling
311
+ // by page keeps this bounded — a smaller `KV_PAGE_TOKENS` is the lever for
312
+ // cutting the worst case if a device class needs it.)
313
+ const worstCaseRestoreMs = pageBytes / bandwidth;
314
+ const latencyBudgetMs = geometry.voiceEnabled
315
+ ? KV_SPILL_VOICE_LATENCY_BUDGET_MS
316
+ : KV_SPILL_TEXT_LATENCY_BUDGET_MS;
317
+
318
+ if (worstCaseRestoreMs > latencyBudgetMs) {
319
+ throw new KvSpillUnsupportedError({
320
+ requestedContext,
321
+ totalKvBytes,
322
+ residentBytes,
323
+ spillBytes,
324
+ worstCaseRestoreMs,
325
+ latencyBudgetMs,
326
+ restoreClass,
327
+ voiceEnabled: geometry.voiceEnabled,
328
+ });
329
+ }
330
+
331
+ return {
332
+ mode: "spill",
333
+ tier,
334
+ residentPages,
335
+ spillPages,
336
+ residentBytes,
337
+ spillBytes,
338
+ totalKvBytes,
339
+ worstCaseRestoreMs,
340
+ latencyBudgetMs,
341
+ };
342
+ }
343
+
344
+ /**
345
+ * Map a `HardwareProbe`-shaped descriptor to the KV restore bandwidth class.
346
+ * Apple Silicon → unified-memory class; discrete-GPU x86 → PCIe class;
347
+ * CPU-only → NVMe class (no GPU to page back to, so "restore" is a host-RAM
348
+ * memcpy bounded by the same order as a fast SSD on the conservative side).
349
+ */
350
+ export function restoreClassForHardware(input: {
351
+ appleSilicon: boolean;
352
+ hasDiscreteGpu: boolean;
353
+ }): KvRestoreClass {
354
+ if (input.appleSilicon) return "cpu-apple";
355
+ if (input.hasDiscreteGpu) return "cpu-pcie";
356
+ return "disk-nvme";
357
+ }
@@ -0,0 +1,266 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import {
3
+ buildVoiceLatencyDevPayload,
4
+ EndToEndLatencyTracer,
5
+ LATENCY_DERIVED_KEYS,
6
+ VOICE_CHECKPOINTS,
7
+ type VoiceCheckpoint,
8
+ } from "./latency-trace";
9
+ import type { VadEvent, VadEventListener } from "./voice/types";
10
+
11
+ /** Drive one full turn's worth of checkpoints with explicit timestamps. */
12
+ function fullTurn(
13
+ tracer: EndToEndLatencyTracer,
14
+ base: number,
15
+ offsets: Partial<Record<VoiceCheckpoint, number>>,
16
+ roomId?: string,
17
+ ): string {
18
+ const turnId = tracer.beginTurn(roomId ? { roomId } : {});
19
+ for (const cp of VOICE_CHECKPOINTS) {
20
+ const off = offsets[cp];
21
+ if (off === undefined) continue;
22
+ tracer.mark(turnId, cp, base + off);
23
+ }
24
+ return turnId;
25
+ }
26
+
27
+ const CANONICAL_OFFSETS: Record<VoiceCheckpoint, number> = {
28
+ "peer-utterance-end": -120,
29
+ "vad-trigger": 0,
30
+ "vad-speech-start": 30,
31
+ "prewarm-fired": 35,
32
+ "asr-first-partial": 220,
33
+ "asr-final": 900,
34
+ "llm-first-token": 1000,
35
+ "llm-first-replytext-char": 1040,
36
+ "replyText-first-emotion-tag": 1050,
37
+ "phrase-1-to-tts": 1090,
38
+ "tts-first-audio-chunk": 1200,
39
+ "audio-first-played": 1230,
40
+ "audio-first-into-peer-ring": 1235,
41
+ };
42
+
43
+ describe("EndToEndLatencyTracer", () => {
44
+ it("records checkpoints and derives metrics for a complete turn", () => {
45
+ const tracer = new EndToEndLatencyTracer();
46
+ const turnId = fullTurn(tracer, 1_000_000, CANONICAL_OFFSETS, "roomA");
47
+ const trace = tracer.endTurn(turnId);
48
+ expect(trace).not.toBeNull();
49
+ if (!trace) return;
50
+ expect(trace.complete).toBe(true);
51
+ // CANONICAL_OFFSETS records every checkpoint including the duet-only ones,
52
+ // so nothing is missing.
53
+ expect(trace.missing).toHaveLength(0);
54
+ expect(trace.roomId).toBe("roomA");
55
+ expect(trace.checkpoints).toHaveLength(VOICE_CHECKPOINTS.length);
56
+ // t0 == the earliest checkpoint recorded — here `peer-utterance-end`
57
+ // (offset -120), the duet headline t0; checkpoints sorted by atEpochMs.
58
+ expect(trace.t0EpochMs).toBe(1_000_000 - 120);
59
+ expect(trace.checkpoints[0]?.name).toBe("peer-utterance-end");
60
+ expect(trace.checkpoints[0]?.tMs).toBe(0);
61
+ // Derived spans (absolute deltas — independent of t0).
62
+ expect(trace.derived.ttftMs).toBe(1000); // vad-trigger → llm-first-token
63
+ expect(trace.derived.ttfaMs).toBe(1200); // vad-trigger → tts-first-audio-chunk
64
+ expect(trace.derived.ttapMs).toBe(1230); // vad-trigger → audio-first-played
65
+ expect(trace.derived.asrFinalLatencyMs).toBe(870); // vad-speech-start(30) → asr-final(900)
66
+ expect(trace.derived.prewarmLatencyMs).toBe(35);
67
+ expect(trace.derived.audioSinkLatencyMs).toBe(30); // tts-first-chunk(1200) → played(1230)
68
+ // Duet (cross-agent) spans — peer-utterance-end(-120) is the headline t0.
69
+ expect(trace.derived.ttftFromUtteranceEndMs).toBe(1120); // -120 → 1000
70
+ expect(trace.derived.firstAudioIntoPeerRingFromUtteranceEndMs).toBe(1355); // -120 → 1235
71
+ expect(trace.derived.emotionTagOverheadMs).toBe(50); // llm-first-token(1000) → tag(1050)
72
+ expect(trace.anomalies).toHaveLength(0);
73
+ });
74
+
75
+ it("leaves derived metrics null when an endpoint checkpoint is missing", () => {
76
+ const tracer = new EndToEndLatencyTracer();
77
+ // No `audio-first-played`, no `tts-first-audio-chunk`.
78
+ const offsets = { ...CANONICAL_OFFSETS };
79
+ delete (offsets as Partial<Record<VoiceCheckpoint, number>>)[
80
+ "tts-first-audio-chunk"
81
+ ];
82
+ delete (offsets as Partial<Record<VoiceCheckpoint, number>>)[
83
+ "audio-first-played"
84
+ ];
85
+ const turnId = fullTurn(tracer, 2_000_000, offsets);
86
+ const trace = tracer.endTurn(turnId);
87
+ if (!trace) throw new Error("expected trace");
88
+ expect(trace.complete).toBe(false);
89
+ expect(trace.missing).toEqual(
90
+ expect.arrayContaining(["tts-first-audio-chunk", "audio-first-played"]),
91
+ );
92
+ expect(trace.derived.ttftMs).toBe(1000); // still computable
93
+ expect(trace.derived.ttfaMs).toBeNull(); // depends on tts-first-audio-chunk
94
+ expect(trace.derived.ttapMs).toBeNull(); // depends on audio-first-played
95
+ expect(trace.derived.audioSinkLatencyMs).toBeNull();
96
+ });
97
+
98
+ it("flags a duplicate mark and keeps the first timestamp", () => {
99
+ const tracer = new EndToEndLatencyTracer();
100
+ const turnId = tracer.beginTurn({});
101
+ tracer.mark(turnId, "vad-trigger", 100);
102
+ tracer.mark(turnId, "vad-trigger", 999); // duplicate
103
+ tracer.mark(turnId, "llm-first-token", 600);
104
+ const trace = tracer.endTurn(turnId);
105
+ if (!trace) throw new Error("expected trace");
106
+ expect(trace.anomalies.some((a) => a.includes("duplicate"))).toBe(true);
107
+ expect(trace.derived.ttftMs).toBe(500); // 100 → 600, not 999 → 600
108
+ });
109
+
110
+ it("flags an out-of-order checkpoint without reordering", () => {
111
+ const tracer = new EndToEndLatencyTracer();
112
+ const turnId = tracer.beginTurn({});
113
+ tracer.mark(turnId, "vad-trigger", 100);
114
+ tracer.mark(turnId, "llm-first-token", 500);
115
+ // asr-final ordered before llm-first-token but timestamped after it.
116
+ tracer.mark(turnId, "asr-final", 700);
117
+ const trace = tracer.endTurn(turnId);
118
+ if (!trace) throw new Error("expected trace");
119
+ expect(trace.anomalies.some((a) => a.includes("clock skew"))).toBe(true);
120
+ expect(trace.derived.llmFirstTokenAfterAsrMs).toBe(-200); // 700 → 500 recorded as-is
121
+ });
122
+
123
+ it("ignores marks for unknown / closed turns", () => {
124
+ const tracer = new EndToEndLatencyTracer();
125
+ const turnId = tracer.beginTurn({});
126
+ tracer.mark(turnId, "vad-trigger", 1);
127
+ tracer.endTurn(turnId);
128
+ // Late mark — must not throw, must not resurrect the turn.
129
+ tracer.mark(turnId, "llm-first-token", 5);
130
+ tracer.mark("does-not-exist", "vad-trigger", 5);
131
+ expect(tracer.recentTraces()).toHaveLength(1);
132
+ });
133
+
134
+ it("evicts the oldest trace when the ring is full", () => {
135
+ const tracer = new EndToEndLatencyTracer({ ringCapacity: 3 });
136
+ for (let i = 0; i < 5; i += 1) {
137
+ const turnId = tracer.beginTurn({ roomId: `room-${i}` });
138
+ tracer.mark(turnId, "vad-trigger", i * 1000);
139
+ tracer.mark(turnId, "llm-first-token", i * 1000 + 100);
140
+ tracer.endTurn(turnId);
141
+ }
142
+ const traces = tracer.recentTraces();
143
+ expect(traces).toHaveLength(3);
144
+ expect(traces.map((t) => t.roomId)).toEqual(["room-2", "room-3", "room-4"]);
145
+ });
146
+
147
+ it("evicts the oldest *open* turn past maxOpenTurns", () => {
148
+ const tracer = new EndToEndLatencyTracer({ maxOpenTurns: 2 });
149
+ const a = tracer.beginTurn({ roomId: "a" });
150
+ tracer.mark(a, "vad-trigger", 10);
151
+ tracer.beginTurn({ roomId: "b" });
152
+ tracer.beginTurn({ roomId: "c" }); // forces eviction of `a`
153
+ expect(tracer.openTurnCount).toBe(2);
154
+ // `a` was emitted with whatever it had.
155
+ const traces = tracer.recentTraces();
156
+ expect(traces.some((t) => t.roomId === "a")).toBe(true);
157
+ // A mark on the evicted turn is now ignored.
158
+ tracer.mark(a, "llm-first-token", 50);
159
+ expect(
160
+ tracer.recentTraces().find((t) => t.roomId === "a")?.missing,
161
+ ).toContain("llm-first-token");
162
+ });
163
+
164
+ it("builds histograms with nearest-rank percentiles", () => {
165
+ const tracer = new EndToEndLatencyTracer();
166
+ // Five turns with ttftMs of 100, 200, 300, 400, 500.
167
+ for (const v of [100, 200, 300, 400, 500]) {
168
+ const turnId = tracer.beginTurn({});
169
+ tracer.mark(turnId, "vad-trigger", 0);
170
+ tracer.mark(turnId, "llm-first-token", v);
171
+ tracer.endTurn(turnId);
172
+ }
173
+ const h = tracer.histogramSummaries();
174
+ expect(h.ttftMs.count).toBe(5);
175
+ expect(h.ttftMs.min).toBe(100);
176
+ expect(h.ttftMs.max).toBe(500);
177
+ expect(h.ttftMs.p50).toBe(300);
178
+ expect(h.ttftMs.p90).toBe(500);
179
+ expect(h.ttftMs.p99).toBe(500);
180
+ expect(h.ttftMs.mean).toBe(300);
181
+ // A metric with no samples is all-null, not zero.
182
+ expect(h.ttfaMs.count).toBe(0);
183
+ expect(h.ttfaMs.p50).toBeNull();
184
+ });
185
+
186
+ it("only feeds non-null derived metrics into the histograms", () => {
187
+ const tracer = new EndToEndLatencyTracer();
188
+ const turnId = tracer.beginTurn({});
189
+ tracer.mark(turnId, "vad-trigger", 0);
190
+ // No llm-first-token → ttftMs is null and must not become a 0 sample.
191
+ tracer.mark(turnId, "asr-first-partial", 150);
192
+ tracer.endTurn(turnId);
193
+ const h = tracer.histogramSummaries();
194
+ expect(h.ttftMs.count).toBe(0);
195
+ expect(h.asrFirstPartialMs.count).toBe(1);
196
+ expect(h.asrFirstPartialMs.p50).toBe(150);
197
+ });
198
+
199
+ it("bridges a VadEventSource onto vad-trigger / vad-speech-start", () => {
200
+ const listeners = new Set<VadEventListener>();
201
+ const source = {
202
+ onVadEvent(l: VadEventListener) {
203
+ listeners.add(l);
204
+ return () => listeners.delete(l);
205
+ },
206
+ };
207
+ const emit = (e: VadEvent) => {
208
+ for (const l of listeners) l(e);
209
+ };
210
+ const tracer = new EndToEndLatencyTracer();
211
+ let openedTurnId: string | null = null;
212
+ const unsub = tracer.bindVadDetector(source, {
213
+ roomId: "roomVad",
214
+ onTurnOpen: (id) => {
215
+ openedTurnId = id;
216
+ },
217
+ });
218
+ emit({ type: "speech-start", timestampMs: 4_242, probability: 0.9 });
219
+ expect(openedTurnId).not.toBeNull();
220
+ if (!openedTurnId) return;
221
+ // The bridge recorded vad-trigger + vad-speech-start at the event ts.
222
+ const peek = tracer.peekTurn(openedTurnId);
223
+ expect(peek?.checkpoints.map((c) => c.name)).toEqual([
224
+ "vad-trigger",
225
+ "vad-speech-start",
226
+ ]);
227
+ expect(peek?.t0EpochMs).toBe(4_242);
228
+ // speech-active / speech-end do not open new turns.
229
+ emit({
230
+ type: "speech-active",
231
+ timestampMs: 4_500,
232
+ probability: 0.95,
233
+ speechDurationMs: 258,
234
+ });
235
+ expect(tracer.openTurnCount).toBe(1);
236
+ unsub();
237
+ emit({ type: "speech-start", timestampMs: 9_000, probability: 0.8 });
238
+ // After unsubscribe, no new turn.
239
+ expect(tracer.openTurnCount).toBe(1);
240
+ });
241
+
242
+ it("buildVoiceLatencyDevPayload exposes traces + histograms + metadata", () => {
243
+ const tracer = new EndToEndLatencyTracer();
244
+ const turnId = fullTurn(tracer, 5_000_000, CANONICAL_OFFSETS, "roomP");
245
+ tracer.endTurn(turnId);
246
+ const payload = buildVoiceLatencyDevPayload(tracer, 10);
247
+ expect(payload.checkpoints).toEqual(VOICE_CHECKPOINTS);
248
+ expect(payload.derivedKeys).toEqual(LATENCY_DERIVED_KEYS);
249
+ expect(payload.openTurnCount).toBe(0);
250
+ expect(payload.traces).toHaveLength(1);
251
+ expect(payload.traces[0]?.roomId).toBe("roomP");
252
+ expect(payload.histograms.ttftMs.count).toBe(1);
253
+ expect(typeof payload.generatedAtEpochMs).toBe("number");
254
+ });
255
+
256
+ it("reset() clears traces, histograms, and open turns", () => {
257
+ const tracer = new EndToEndLatencyTracer();
258
+ const t = fullTurn(tracer, 1, CANONICAL_OFFSETS);
259
+ tracer.endTurn(t);
260
+ tracer.beginTurn({ roomId: "still-open" });
261
+ tracer.reset();
262
+ expect(tracer.recentTraces()).toHaveLength(0);
263
+ expect(tracer.openTurnCount).toBe(0);
264
+ expect(tracer.histogramSummaries().ttftMs.count).toBe(0);
265
+ });
266
+ });