@elizaos/plugin-local-inference 2.0.3-beta.2 → 2.0.3-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (881) hide show
  1. package/README.md +84 -10
  2. package/dist/actions/generate-media.d.ts.map +1 -0
  3. package/dist/actions/identify-speaker.d.ts.map +1 -0
  4. package/dist/actions/transcription-control.d.ts.map +1 -0
  5. package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
  6. package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
  7. package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
  8. package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
  9. package/dist/adapters/capacitor-llama/index.d.ts +18 -0
  10. package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
  11. package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
  12. package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
  13. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
  14. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
  15. package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
  16. package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
  17. package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
  18. package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
  19. package/dist/adapters/capacitor-llama/types.d.ts +338 -0
  20. package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
  21. package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
  22. package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
  23. package/dist/backends/apple-foundation.d.ts +56 -0
  24. package/dist/backends/apple-foundation.d.ts.map +1 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +39647 -0
  27. package/dist/index.js.map +217 -0
  28. package/{src → dist}/local-inference-routes.d.ts +9 -0
  29. package/dist/local-inference-routes.d.ts.map +1 -0
  30. package/dist/provider.d.ts.map +1 -0
  31. package/{src → dist}/routes/compat-helpers.d.ts +1 -1
  32. package/dist/routes/compat-helpers.d.ts.map +1 -0
  33. package/dist/routes/family-member-route.d.ts.map +1 -0
  34. package/{src → dist}/routes/index.d.ts +1 -0
  35. package/dist/routes/index.d.ts.map +1 -0
  36. package/dist/routes/index.js +42040 -0
  37. package/dist/routes/index.js.map +236 -0
  38. package/{src → dist}/routes/live-diarization-route.d.ts +7 -0
  39. package/dist/routes/live-diarization-route.d.ts.map +1 -0
  40. package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
  41. package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  42. package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
  43. package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
  44. package/dist/routes/native-pcm-turn-route.d.ts +3 -0
  45. package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
  46. package/dist/routes/transcript-audio-store.d.ts.map +1 -0
  47. package/{src → dist}/routes/transcripts-routes.d.ts +8 -0
  48. package/dist/routes/transcripts-routes.d.ts.map +1 -0
  49. package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
  50. package/dist/routes/voice-models-routes.d.ts.map +1 -0
  51. package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  52. package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
  53. package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  54. package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
  55. package/dist/runtime/embedding-presets.d.ts.map +1 -0
  56. package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
  57. package/{src → dist}/runtime/ensure-local-inference-handler.d.ts +8 -0
  58. package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  59. package/{src → dist}/runtime/index.d.ts +1 -1
  60. package/dist/runtime/index.d.ts.map +1 -0
  61. package/dist/runtime/index.js +38768 -0
  62. package/dist/runtime/index.js.map +217 -0
  63. package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
  64. package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  65. package/{src → dist}/runtime/voice-entity-binding.d.ts +10 -0
  66. package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
  67. package/{src → dist}/services/active-model.d.ts +28 -0
  68. package/dist/services/active-model.d.ts.map +1 -0
  69. package/dist/services/asr-provenance.d.ts +5 -0
  70. package/dist/services/asr-provenance.d.ts.map +1 -0
  71. package/{src → dist}/services/assignments.d.ts +16 -3
  72. package/dist/services/assignments.d.ts.map +1 -0
  73. package/dist/services/backend-selector.d.ts +55 -0
  74. package/dist/services/backend-selector.d.ts.map +1 -0
  75. package/{src → dist}/services/backend.d.ts +110 -16
  76. package/dist/services/backend.d.ts.map +1 -0
  77. package/{src → dist}/services/bionic-host-loader.d.ts +21 -0
  78. package/dist/services/bionic-host-loader.d.ts.map +1 -0
  79. package/dist/services/bundled-models.d.ts.map +1 -0
  80. package/dist/services/cache-bridge.d.ts.map +1 -0
  81. package/dist/services/catalog.d.ts +10 -0
  82. package/dist/services/catalog.d.ts.map +1 -0
  83. package/dist/services/checkpoint-client.d.ts.map +1 -0
  84. package/dist/services/checkpoint-manager.d.ts +217 -0
  85. package/dist/services/checkpoint-manager.d.ts.map +1 -0
  86. package/dist/services/cloud-fallback.d.ts.map +1 -0
  87. package/dist/services/context-fit.d.ts +36 -0
  88. package/dist/services/context-fit.d.ts.map +1 -0
  89. package/dist/services/conversation-registry.d.ts.map +1 -0
  90. package/{src → dist}/services/desktop-fused-ffi-backend-runtime.d.ts +22 -6
  91. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  92. package/dist/services/device-bridge.d.ts.map +1 -0
  93. package/dist/services/device-resource-metrics.d.ts.map +1 -0
  94. package/{src → dist}/services/device-tier.d.ts +19 -1
  95. package/dist/services/device-tier.d.ts.map +1 -0
  96. package/{src → dist}/services/downloader.d.ts +16 -4
  97. package/dist/services/downloader.d.ts.map +1 -0
  98. package/{src → dist}/services/engine.d.ts +43 -4
  99. package/dist/services/engine.d.ts.map +1 -0
  100. package/dist/services/ensure-local-artifacts.d.ts +82 -0
  101. package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
  102. package/dist/services/external-scanner.d.ts.map +1 -0
  103. package/dist/services/ffi-llm-mock.d.ts +90 -0
  104. package/dist/services/ffi-llm-mock.d.ts.map +1 -0
  105. package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
  106. package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
  107. package/{src → dist}/services/ffi-streaming-backend.d.ts +28 -7
  108. package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
  109. package/{src → dist}/services/ffi-streaming-runner.d.ts +24 -0
  110. package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
  111. package/dist/services/gpu-autotune.d.ts +150 -0
  112. package/dist/services/gpu-autotune.d.ts.map +1 -0
  113. package/dist/services/gpu-detect.d.ts.map +1 -0
  114. package/dist/services/handler-registry.d.ts.map +1 -0
  115. package/dist/services/hardware.d.ts.map +1 -0
  116. package/dist/services/image-description-runtime.d.ts.map +1 -0
  117. package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  118. package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
  119. package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  120. package/dist/services/imagegen/errors.d.ts.map +1 -0
  121. package/dist/services/imagegen/index.d.ts.map +1 -0
  122. package/dist/services/imagegen/mflux.d.ts.map +1 -0
  123. package/{src → dist}/services/imagegen/sd-cpp.d.ts +1 -0
  124. package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
  125. package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  126. package/dist/services/imagegen/types.d.ts.map +1 -0
  127. package/{src → dist}/services/index.d.ts +3 -1
  128. package/dist/services/index.d.ts.map +1 -0
  129. package/dist/services/index.js +39453 -0
  130. package/dist/services/index.js.map +227 -0
  131. package/dist/services/inference-capabilities.d.ts.map +1 -0
  132. package/dist/services/inference-telemetry.d.ts.map +1 -0
  133. package/dist/services/ios-llama-streaming.d.ts +119 -0
  134. package/dist/services/ios-llama-streaming.d.ts.map +1 -0
  135. package/dist/services/kv-spill.d.ts.map +1 -0
  136. package/dist/services/latency-trace.d.ts.map +1 -0
  137. package/dist/services/lib-target.d.ts +55 -0
  138. package/dist/services/lib-target.d.ts.map +1 -0
  139. package/dist/services/live-signals.d.ts +86 -0
  140. package/dist/services/live-signals.d.ts.map +1 -0
  141. package/dist/services/llama-server-metrics.d.ts +114 -0
  142. package/dist/services/llama-server-metrics.d.ts.map +1 -0
  143. package/dist/services/llm-streaming-binding.d.ts.map +1 -0
  144. package/dist/services/load-args.d.ts.map +1 -0
  145. package/dist/services/manifest/index.d.ts +4 -0
  146. package/dist/services/manifest/index.d.ts.map +1 -0
  147. package/{src → dist}/services/manifest/schema.d.ts +196 -6
  148. package/dist/services/manifest/schema.d.ts.map +1 -0
  149. package/{src → dist}/services/manifest/types.d.ts +3 -1
  150. package/dist/services/manifest/types.d.ts.map +1 -0
  151. package/dist/services/manifest/validator.d.ts.map +1 -0
  152. package/{src → dist}/services/memory-arbiter.d.ts +33 -3
  153. package/dist/services/memory-arbiter.d.ts.map +1 -0
  154. package/dist/services/memory-benchmark.d.ts +76 -0
  155. package/dist/services/memory-benchmark.d.ts.map +1 -0
  156. package/{src → dist}/services/memory-monitor.d.ts +6 -0
  157. package/dist/services/memory-monitor.d.ts.map +1 -0
  158. package/dist/services/memory-pressure.d.ts.map +1 -0
  159. package/dist/services/mtp-doctor.d.ts.map +1 -0
  160. package/dist/services/network-policy.d.ts.map +1 -0
  161. package/dist/services/paths.d.ts.map +1 -0
  162. package/dist/services/planner-skeleton.d.ts.map +1 -0
  163. package/dist/services/providers.d.ts.map +1 -0
  164. package/dist/services/ram-budget.d.ts.map +1 -0
  165. package/dist/services/readiness.d.ts.map +1 -0
  166. package/dist/services/recommendation.d.ts.map +1 -0
  167. package/{src → dist}/services/registry.d.ts +11 -13
  168. package/dist/services/registry.d.ts.map +1 -0
  169. package/{src → dist}/services/router-handler.d.ts +2 -2
  170. package/dist/services/router-handler.d.ts.map +1 -0
  171. package/{src → dist}/services/routing-policy.d.ts +32 -9
  172. package/dist/services/routing-policy.d.ts.map +1 -0
  173. package/dist/services/routing-preferences.d.ts.map +1 -0
  174. package/dist/services/runtime-target.d.ts.map +1 -0
  175. package/{src → dist}/services/service.d.ts +1 -1
  176. package/dist/services/service.d.ts.map +1 -0
  177. package/dist/services/session-pool.d.ts.map +1 -0
  178. package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
  179. package/dist/services/structured-output/index.d.ts +2 -0
  180. package/dist/services/structured-output/index.d.ts.map +1 -0
  181. package/dist/services/structured-output.d.ts.map +1 -0
  182. package/dist/services/system-memory.d.ts.map +1 -0
  183. package/{src → dist}/services/types.d.ts +1 -1
  184. package/dist/services/types.d.ts.map +1 -0
  185. package/dist/services/verify-on-device.d.ts.map +1 -0
  186. package/dist/services/verify.d.ts.map +1 -0
  187. package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
  188. package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
  189. package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
  190. package/dist/services/vision/hash.d.ts.map +1 -0
  191. package/{src → dist}/services/vision/index.d.ts +1 -1
  192. package/dist/services/vision/index.d.ts.map +1 -0
  193. package/dist/services/vision/llama-server.d.ts.map +1 -0
  194. package/{src → dist}/services/vision/types.d.ts +13 -4
  195. package/dist/services/vision/types.d.ts.map +1 -0
  196. package/dist/services/vision/vast-fallback.d.ts.map +1 -0
  197. package/{src → dist}/services/vision-embedding-cache.d.ts +1 -1
  198. package/dist/services/vision-embedding-cache.d.ts.map +1 -0
  199. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
  200. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
  201. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
  202. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
  203. package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
  204. package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
  205. package/{src → dist}/services/voice/audio-frame-consumer.d.ts +82 -0
  206. package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
  207. package/dist/services/voice/barge-in.d.ts.map +1 -0
  208. package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
  209. package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
  210. package/dist/services/voice/checkpoint-policy.d.ts +178 -0
  211. package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
  212. package/dist/services/voice/corpus-augment.d.ts +111 -0
  213. package/dist/services/voice/corpus-augment.d.ts.map +1 -0
  214. package/dist/services/voice/corpus-generator.d.ts +134 -0
  215. package/dist/services/voice/corpus-generator.d.ts.map +1 -0
  216. package/dist/services/voice/diarization-error-rate.d.ts +40 -0
  217. package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
  218. package/dist/services/voice/e2e-harness.d.ts +297 -0
  219. package/dist/services/voice/e2e-harness.d.ts.map +1 -0
  220. package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
  221. package/dist/services/voice/echo-delay.d.ts +67 -0
  222. package/dist/services/voice/echo-delay.d.ts.map +1 -0
  223. package/dist/services/voice/echo-metrics.d.ts +7 -0
  224. package/dist/services/voice/echo-metrics.d.ts.map +1 -0
  225. package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
  226. package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
  227. package/{src → dist}/services/voice/eliza1-eot-scorer.d.ts +8 -8
  228. package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  229. package/dist/services/voice/embedding-server.d.ts +37 -0
  230. package/dist/services/voice/embedding-server.d.ts.map +1 -0
  231. package/{src → dist}/services/voice/embedding.d.ts +2 -3
  232. package/dist/services/voice/embedding.d.ts.map +1 -0
  233. package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
  234. package/{src → dist}/services/voice/engine-bridge.d.ts +8 -5
  235. package/dist/services/voice/engine-bridge.d.ts.map +1 -0
  236. package/{src → dist}/services/voice/eot-classifier-ggml.d.ts +22 -22
  237. package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  238. package/{src → dist}/services/voice/eot-classifier.d.ts +9 -12
  239. package/dist/services/voice/eot-classifier.d.ts.map +1 -0
  240. package/{src → dist}/services/voice/errors.d.ts +1 -1
  241. package/dist/services/voice/errors.d.ts.map +1 -0
  242. package/{src → dist}/services/voice/expressive-tags.d.ts +5 -5
  243. package/dist/services/voice/expressive-tags.d.ts.map +1 -0
  244. package/{src → dist}/services/voice/ffi-bindings.d.ts +26 -4
  245. package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
  246. package/dist/services/voice/first-line-cache.d.ts.map +1 -0
  247. package/{src → dist}/services/voice/fused-eot-scorer.d.ts +6 -6
  248. package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
  249. package/{src → dist}/services/voice/index.d.ts +8 -3
  250. package/dist/services/voice/index.d.ts.map +1 -0
  251. package/dist/services/voice/kokoro/index.d.ts +24 -0
  252. package/dist/services/voice/kokoro/index.d.ts.map +1 -0
  253. package/{src → dist}/services/voice/kokoro/kokoro-backend.d.ts +15 -0
  254. package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  255. package/{src → dist}/services/voice/kokoro/kokoro-engine-discovery.d.ts +1 -1
  256. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  257. package/{src → dist}/services/voice/kokoro/kokoro-ffi-runtime.d.ts +3 -3
  258. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  259. package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  260. package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
  261. package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
  262. package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  263. package/{src → dist}/services/voice/kokoro/pick-runtime.d.ts +1 -1
  264. package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  265. package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
  266. package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  267. package/dist/services/voice/kokoro/types.d.ts.map +1 -0
  268. package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  269. package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
  270. package/dist/services/voice/lifecycle.d.ts.map +1 -0
  271. package/dist/services/voice/live-diarization-session.d.ts +196 -0
  272. package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
  273. package/dist/services/voice/metric-math.d.ts +10 -0
  274. package/dist/services/voice/metric-math.d.ts.map +1 -0
  275. package/{src → dist}/services/voice/mic-source.d.ts +1 -1
  276. package/dist/services/voice/mic-source.d.ts.map +1 -0
  277. package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
  278. package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
  279. package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
  280. package/dist/services/voice/optimistic-rollback.d.ts +151 -0
  281. package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
  282. package/{src → dist}/services/voice/partial-stabilizer.d.ts +1 -1
  283. package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
  284. package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  285. package/dist/services/voice/phrase-cache.d.ts.map +1 -0
  286. package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
  287. package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
  288. package/dist/services/voice/pipeline.d.ts.map +1 -0
  289. package/dist/services/voice/prefill-client.d.ts.map +1 -0
  290. package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  291. package/dist/services/voice/profile-store.d.ts.map +1 -0
  292. package/dist/services/voice/ring-buffer.d.ts.map +1 -0
  293. package/dist/services/voice/rollback-queue.d.ts.map +1 -0
  294. package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  295. package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  296. package/dist/services/voice/scheduler.d.ts.map +1 -0
  297. package/dist/services/voice/self-voice-imprint.d.ts +33 -0
  298. package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
  299. package/{src → dist}/services/voice/shared-resources.d.ts +14 -0
  300. package/dist/services/voice/shared-resources.d.ts.map +1 -0
  301. package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  302. package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  303. package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
  304. package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  305. package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  306. package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
  307. package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
  308. package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
  309. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
  310. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
  311. package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
  312. package/{src → dist}/services/voice/transcriber.d.ts +4 -4
  313. package/dist/services/voice/transcriber.d.ts.map +1 -0
  314. package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
  315. package/{src → dist}/services/voice/transcript-service.d.ts +20 -1
  316. package/dist/services/voice/transcript-service.d.ts.map +1 -0
  317. package/{src → dist}/services/voice/transcript-store.d.ts +12 -1
  318. package/dist/services/voice/transcript-store.d.ts.map +1 -0
  319. package/dist/services/voice/turn-controller.d.ts.map +1 -0
  320. package/{src → dist}/services/voice/types.d.ts +6 -6
  321. package/dist/services/voice/types.d.ts.map +1 -0
  322. package/{src → dist}/services/voice/vad.d.ts +6 -5
  323. package/dist/services/voice/vad.d.ts.map +1 -0
  324. package/dist/services/voice/voice-budget.d.ts.map +1 -0
  325. package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  326. package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
  327. package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
  328. package/{src → dist}/services/voice/voice-preset-format.d.ts +2 -2
  329. package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
  330. package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
  331. package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
  332. package/dist/services/voice/voice-scenario.d.ts +131 -0
  333. package/dist/services/voice/voice-scenario.d.ts.map +1 -0
  334. package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
  335. package/dist/services/voice/voice-workbench-report.d.ts +117 -0
  336. package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
  337. package/{src → dist}/services/voice/wake-word-ggml.d.ts +8 -9
  338. package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
  339. package/dist/services/voice/wake-word.d.ts.map +1 -0
  340. package/dist/services/voice/wav-codec.d.ts +11 -0
  341. package/dist/services/voice/wav-codec.d.ts.map +1 -0
  342. package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
  343. package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
  344. package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
  345. package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
  346. package/dist/services/voice/workbench-logic-services.d.ts +36 -0
  347. package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
  348. package/dist/services/voice/workbench-real-services.d.ts +17 -0
  349. package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
  350. package/dist/services/voice/workbench-scenarios.d.ts +24 -0
  351. package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
  352. package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  353. package/dist/services/voice-model-updater.d.ts.map +1 -0
  354. package/dist/services/voice-prewarm.d.ts.map +1 -0
  355. package/dist/voice-workbench.d.ts +18 -0
  356. package/dist/voice-workbench.d.ts.map +1 -0
  357. package/dist/voice-workbench.js +5259 -0
  358. package/dist/voice-workbench.js.map +34 -0
  359. package/package.json +28 -9
  360. package/registry-entry.json +137 -0
  361. package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
  362. package/src/adapters/capacitor-llama/environment.ts +1 -1
  363. package/src/adapters/capacitor-llama/index.ts +28 -4
  364. package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
  365. package/src/adapters/capacitor-llama/text-streaming.ts +2 -2
  366. package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
  367. package/src/backends/apple-foundation.ts +1 -1
  368. package/src/local-inference-routes.test.ts +57 -11
  369. package/src/local-inference-routes.ts +90 -8
  370. package/src/provider.ts +32 -3
  371. package/src/routes/compat-helpers.ts +2 -1
  372. package/src/routes/index.ts +1 -0
  373. package/src/routes/live-diarization-route.test.ts +134 -0
  374. package/src/routes/live-diarization-route.ts +79 -3
  375. package/src/routes/local-inference-asr-route.test.ts +43 -2
  376. package/src/routes/local-inference-asr-route.ts +7 -4
  377. package/src/routes/local-inference-asr-transcribe.test.ts +4 -4
  378. package/src/routes/local-inference-asr-transcribe.ts +1 -1
  379. package/src/routes/local-inference-compat-routes.test.ts +3 -3
  380. package/src/routes/local-inference-compat-routes.ts +23 -56
  381. package/src/routes/native-pcm-turn-route.test.ts +136 -0
  382. package/src/routes/native-pcm-turn-route.ts +121 -0
  383. package/src/routes/transcripts-routes.test.ts +51 -0
  384. package/src/routes/transcripts-routes.ts +35 -3
  385. package/src/runtime/bionic-wire-encoding.test.ts +147 -0
  386. package/src/runtime/ensure-local-inference-handler.test.ts +203 -5
  387. package/src/runtime/ensure-local-inference-handler.ts +203 -11
  388. package/src/runtime/index.ts +4 -1
  389. package/src/runtime/mobile-local-inference-gate.test.ts +85 -2
  390. package/src/runtime/mobile-local-inference-gate.ts +60 -5
  391. package/src/runtime/voice-entity-binding.transcript.test.ts +29 -0
  392. package/src/runtime/voice-entity-binding.ts +46 -6
  393. package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
  394. package/src/services/README.md +2 -2
  395. package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
  396. package/src/services/active-model-context-fit.test.ts +125 -0
  397. package/src/services/active-model.ts +211 -8
  398. package/src/services/asr-provenance.ts +68 -0
  399. package/src/services/assignment-validation.test.ts +118 -0
  400. package/src/services/assignments.test.ts +26 -0
  401. package/src/services/assignments.ts +52 -4
  402. package/src/services/backend.test.ts +84 -0
  403. package/src/services/backend.ts +198 -19
  404. package/src/services/bionic-host-loader.test.ts +94 -1
  405. package/src/services/bionic-host-loader.ts +72 -0
  406. package/src/services/cache-bridge.test.ts +7 -7
  407. package/src/services/catalog.test.ts +32 -11
  408. package/src/services/catalog.ts +6 -0
  409. package/src/services/cloud-fallback.ts +1 -1
  410. package/src/services/context-fit.test.ts +121 -0
  411. package/src/services/context-fit.ts +113 -0
  412. package/src/services/desktop-fused-ffi-backend-runtime.ts +99 -7
  413. package/src/services/device-tier.test.ts +89 -2
  414. package/src/services/device-tier.ts +103 -11
  415. package/src/services/downloader.test.ts +199 -58
  416. package/src/services/downloader.ts +141 -27
  417. package/src/services/engine-direct-bundle.test.ts +38 -6
  418. package/src/services/engine.ts +291 -104
  419. package/src/services/ensure-local-artifacts.ts +1 -1
  420. package/src/services/ffi-llm-streaming-abi.ts +6 -3
  421. package/src/services/ffi-streaming-backend.ts +44 -8
  422. package/src/services/ffi-streaming-runner.test.ts +163 -3
  423. package/src/services/ffi-streaming-runner.ts +54 -1
  424. package/src/services/ffi-unload-ordering.test.ts +5 -1
  425. package/src/services/fused-eliza1-no-regression.test.ts +144 -0
  426. package/src/services/hardware.test.ts +7 -2
  427. package/src/services/hardware.ts +28 -0
  428. package/src/services/imagegen/backend-selector.test.ts +190 -0
  429. package/src/services/imagegen/sd-cpp.ts +6 -9
  430. package/src/services/index.ts +18 -0
  431. package/src/services/ios-llama-streaming.ts +1 -1
  432. package/src/services/kv-spill.ts +6 -5
  433. package/src/services/lib-target.test.ts +145 -0
  434. package/src/services/lib-target.ts +102 -0
  435. package/src/services/live-signals.test.ts +132 -0
  436. package/src/services/live-signals.ts +177 -0
  437. package/src/services/llama-server-metrics.test.ts +168 -0
  438. package/src/services/manifest/eliza-1.manifest.v1.json +84 -2
  439. package/src/services/manifest/index.ts +6 -0
  440. package/src/services/manifest/manifest.test.ts +156 -54
  441. package/src/services/manifest/schema.ts +160 -52
  442. package/src/services/manifest/types.ts +6 -0
  443. package/src/services/manifest/validator.ts +91 -25
  444. package/src/services/memory-arbiter.test.ts +139 -0
  445. package/src/services/memory-arbiter.ts +81 -15
  446. package/src/services/memory-benchmark.test.ts +91 -0
  447. package/src/services/memory-benchmark.ts +354 -0
  448. package/src/services/memory-monitor.test.ts +24 -0
  449. package/src/services/memory-monitor.ts +12 -0
  450. package/src/services/mtp-doctor.ts +10 -2
  451. package/src/services/network-policy.ts +5 -5
  452. package/src/services/ram-budget-cache.test.ts +2 -1
  453. package/src/services/ram-budget.ts +0 -0
  454. package/src/services/recommendation.test.ts +216 -0
  455. package/src/services/registry.ts +25 -19
  456. package/src/services/required-kernels-gate.test.ts +64 -0
  457. package/src/services/router-handler.ts +43 -24
  458. package/src/services/routing-policy.test.ts +211 -23
  459. package/src/services/routing-policy.ts +92 -22
  460. package/src/services/service.test.ts +3 -3
  461. package/src/services/service.ts +22 -7
  462. package/src/services/transcription-priority.test.ts +2 -2
  463. package/src/services/types.ts +4 -0
  464. package/src/services/verify-on-device.test.ts +2 -2
  465. package/src/services/vision/hash.ts +1 -1
  466. package/src/services/vision/index.ts +2 -2
  467. package/src/services/vision/llama-server.ts +1 -1
  468. package/src/services/vision/types.ts +13 -4
  469. package/src/services/vision-embedding-cache.ts +1 -1
  470. package/src/services/voice/VOICE_WORKBENCH.md +71 -26
  471. package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
  472. package/src/services/voice/__test-helpers__/synthetic-speech.ts +72 -2
  473. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +29 -29
  474. package/src/services/voice/__tests__/streaming-asr.test.ts +1 -1
  475. package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
  476. package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
  477. package/src/services/voice/asr-timed.real.test.ts +6 -8
  478. package/src/services/voice/audio-frame-consumer.test.ts +327 -1
  479. package/src/services/voice/audio-frame-consumer.ts +165 -5
  480. package/src/services/voice/barge-in.ts +2 -3
  481. package/src/services/voice/corpus-augment.test.ts +276 -0
  482. package/src/services/voice/corpus-augment.ts +451 -0
  483. package/src/services/voice/corpus-generator.test.ts +201 -0
  484. package/src/services/voice/corpus-generator.ts +413 -0
  485. package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
  486. package/src/services/voice/diarization-error-rate.test.ts +100 -0
  487. package/src/services/voice/diarization-error-rate.ts +249 -0
  488. package/src/services/voice/e2e-harness.der.test.ts +94 -0
  489. package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
  490. package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
  491. package/src/services/voice/e2e-harness.test.ts +2 -2
  492. package/src/services/voice/e2e-harness.ts +175 -16
  493. package/src/services/voice/echo-delay.test.ts +118 -0
  494. package/src/services/voice/echo-delay.ts +135 -0
  495. package/src/services/voice/echo-metrics.test.ts +17 -0
  496. package/src/services/voice/echo-metrics.ts +20 -0
  497. package/src/services/voice/echo-reference-buffer.test.ts +86 -0
  498. package/src/services/voice/echo-reference-buffer.ts +165 -0
  499. package/src/services/voice/eliza1-eot-scorer.ts +22 -22
  500. package/src/services/voice/embedding.ts +2 -3
  501. package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
  502. package/src/services/voice/engine-bridge.ts +151 -110
  503. package/src/services/voice/eot-classifier-ggml.ts +42 -39
  504. package/src/services/voice/eot-classifier.test.ts +98 -0
  505. package/src/services/voice/eot-classifier.ts +11 -122
  506. package/src/services/voice/errors.ts +2 -0
  507. package/src/services/voice/expressive-tags.asr.test.ts +77 -0
  508. package/src/services/voice/expressive-tags.test.ts +102 -0
  509. package/src/services/voice/expressive-tags.ts +8 -8
  510. package/src/services/voice/ffi-bindings.test.ts +10 -3
  511. package/src/services/voice/ffi-bindings.ts +177 -15
  512. package/src/services/voice/fused-eot-scorer.ts +17 -13
  513. package/src/services/voice/index.ts +33 -12
  514. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +112 -1
  515. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +88 -3
  516. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +37 -201
  517. package/src/services/voice/kokoro/kokoro-backend.ts +16 -0
  518. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +1 -1
  519. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +3 -3
  520. package/src/services/voice/kokoro/pick-runtime.ts +1 -1
  521. package/src/services/voice/kokoro/runtime-selection.ts +28 -201
  522. package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
  523. package/src/services/voice/live-diarization-session.ts +335 -2
  524. package/src/services/voice/metric-math.test.ts +61 -0
  525. package/src/services/voice/metric-math.ts +25 -0
  526. package/src/services/voice/mic-source.ts +1 -1
  527. package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
  528. package/src/services/voice/nlms-echo-canceller.ts +317 -0
  529. package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
  530. package/src/services/voice/partial-stabilizer.ts +1 -1
  531. package/src/services/voice/pipeline.ts +3 -4
  532. package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
  533. package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
  534. package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
  535. package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
  536. package/src/services/voice/self-voice-imprint.test.ts +59 -0
  537. package/src/services/voice/self-voice-imprint.ts +102 -0
  538. package/src/services/voice/shared-resources.ts +23 -0
  539. package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
  540. package/src/services/voice/speaker/attribution-pipeline.ts +85 -22
  541. package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
  542. package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
  543. package/src/services/voice/transcriber.ts +4 -4
  544. package/src/services/voice/transcript-service.test.ts +58 -0
  545. package/src/services/voice/transcript-service.ts +64 -0
  546. package/src/services/voice/transcript-store.test.ts +36 -0
  547. package/src/services/voice/transcript-store.ts +32 -0
  548. package/src/services/voice/types.ts +7 -7
  549. package/src/services/voice/vad.test.ts +33 -15
  550. package/src/services/voice/vad.ts +25 -20
  551. package/src/services/voice/voice-budget.test.ts +0 -3
  552. package/src/services/voice/voice-budget.ts +6 -6
  553. package/src/services/voice/voice-duet.test.ts +1 -1
  554. package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
  555. package/src/services/voice/voice-preload-predictor.test.ts +130 -0
  556. package/src/services/voice/voice-preload-predictor.ts +113 -0
  557. package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
  558. package/src/services/voice/voice-preset-format.test.ts +75 -0
  559. package/src/services/voice/voice-preset-format.ts +17 -4
  560. package/src/services/voice/voice-scenario.test.ts +159 -0
  561. package/src/services/voice/voice-scenario.ts +133 -7
  562. package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
  563. package/src/services/voice/voice-workbench-report.ts +58 -17
  564. package/src/services/voice/wake-word-ggml.ts +12 -13
  565. package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
  566. package/src/services/voice/wav-codec.test.ts +32 -0
  567. package/src/services/voice/wav-codec.ts +101 -0
  568. package/src/services/voice/workbench-entrypoint.test.ts +55 -0
  569. package/src/services/voice/workbench-entrypoint.ts +88 -0
  570. package/src/services/voice/workbench-headless-runner.test.ts +162 -0
  571. package/src/services/voice/workbench-headless-runner.ts +396 -0
  572. package/src/services/voice/workbench-logic-services.test.ts +225 -0
  573. package/src/services/voice/workbench-logic-services.ts +184 -0
  574. package/src/services/voice/workbench-real-services.ts +629 -0
  575. package/src/services/voice/workbench-scenarios.ts +407 -0
  576. package/src/services/voice-prewarm.ts +1 -1
  577. package/src/voice-workbench.ts +71 -0
  578. package/src/actions/generate-media.d.ts.map +0 -1
  579. package/src/actions/identify-speaker.d.ts.map +0 -1
  580. package/src/actions/transcription-control.d.ts.map +0 -1
  581. package/src/index.d.ts.map +0 -1
  582. package/src/local-inference-routes.d.ts.map +0 -1
  583. package/src/provider.d.ts.map +0 -1
  584. package/src/routes/compat-helpers.d.ts.map +0 -1
  585. package/src/routes/family-member-route.d.ts.map +0 -1
  586. package/src/routes/index.d.ts.map +0 -1
  587. package/src/routes/live-diarization-route.d.ts.map +0 -1
  588. package/src/routes/local-inference-asr-route.d.ts.map +0 -1
  589. package/src/routes/local-inference-asr-transcribe.d.ts.map +0 -1
  590. package/src/routes/local-inference-compat-routes.d.ts.map +0 -1
  591. package/src/routes/local-inference-tts-route.d.ts.map +0 -1
  592. package/src/routes/transcript-audio-store.d.ts.map +0 -1
  593. package/src/routes/transcripts-routes.d.ts.map +0 -1
  594. package/src/routes/voice-first-run-routes.d.ts.map +0 -1
  595. package/src/routes/voice-models-routes.d.ts.map +0 -1
  596. package/src/routes/voice-profile-plugin-routes.d.ts.map +0 -1
  597. package/src/routes/voice-profiles-management-routes.d.ts.map +0 -1
  598. package/src/routes/voice-speaker-profile-routes.d.ts.map +0 -1
  599. package/src/runtime/embedding-manager-support.d.ts.map +0 -1
  600. package/src/runtime/embedding-presets.d.ts.map +0 -1
  601. package/src/runtime/embedding-warmup-policy.d.ts.map +0 -1
  602. package/src/runtime/ensure-local-inference-handler.d.ts.map +0 -1
  603. package/src/runtime/index.d.ts.map +0 -1
  604. package/src/runtime/mobile-local-inference-gate.d.ts +0 -31
  605. package/src/runtime/mobile-local-inference-gate.d.ts.map +0 -1
  606. package/src/runtime/voice-entity-binding.d.ts.map +0 -1
  607. package/src/services/active-model.d.ts.map +0 -1
  608. package/src/services/assignments.d.ts.map +0 -1
  609. package/src/services/backend.d.ts.map +0 -1
  610. package/src/services/bionic-host-loader.d.ts.map +0 -1
  611. package/src/services/bundled-models.d.ts.map +0 -1
  612. package/src/services/cache-bridge.d.ts.map +0 -1
  613. package/src/services/catalog.d.ts +0 -10
  614. package/src/services/catalog.d.ts.map +0 -1
  615. package/src/services/checkpoint-client.d.ts.map +0 -1
  616. package/src/services/cloud-fallback.d.ts.map +0 -1
  617. package/src/services/conversation-registry.d.ts.map +0 -1
  618. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +0 -1
  619. package/src/services/device-bridge.d.ts.map +0 -1
  620. package/src/services/device-resource-metrics.d.ts.map +0 -1
  621. package/src/services/device-tier.d.ts.map +0 -1
  622. package/src/services/downloader.d.ts.map +0 -1
  623. package/src/services/engine.d.ts.map +0 -1
  624. package/src/services/external-scanner.d.ts.map +0 -1
  625. package/src/services/ffi-streaming-backend.d.ts.map +0 -1
  626. package/src/services/ffi-streaming-runner.d.ts.map +0 -1
  627. package/src/services/gpu-detect.d.ts.map +0 -1
  628. package/src/services/handler-registry.d.ts.map +0 -1
  629. package/src/services/hardware.d.ts.map +0 -1
  630. package/src/services/hf-search.d.ts +0 -26
  631. package/src/services/hf-search.d.ts.map +0 -1
  632. package/src/services/hf-search.test.ts +0 -69
  633. package/src/services/hf-search.ts +0 -420
  634. package/src/services/image-description-runtime.d.ts.map +0 -1
  635. package/src/services/imagegen/aosp-unavailable.d.ts.map +0 -1
  636. package/src/services/imagegen/backend-selector.d.ts.map +0 -1
  637. package/src/services/imagegen/coreml-unavailable.d.ts.map +0 -1
  638. package/src/services/imagegen/errors.d.ts.map +0 -1
  639. package/src/services/imagegen/index.d.ts.map +0 -1
  640. package/src/services/imagegen/mflux.d.ts.map +0 -1
  641. package/src/services/imagegen/sd-cpp.d.ts.map +0 -1
  642. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +0 -1
  643. package/src/services/imagegen/types.d.ts.map +0 -1
  644. package/src/services/index.d.ts.map +0 -1
  645. package/src/services/inference-capabilities.d.ts.map +0 -1
  646. package/src/services/inference-telemetry.d.ts.map +0 -1
  647. package/src/services/kv-spill.d.ts.map +0 -1
  648. package/src/services/latency-trace.d.ts.map +0 -1
  649. package/src/services/llm-streaming-binding.d.ts.map +0 -1
  650. package/src/services/load-args.d.ts.map +0 -1
  651. package/src/services/manifest/index.d.ts +0 -4
  652. package/src/services/manifest/index.d.ts.map +0 -1
  653. package/src/services/manifest/schema.d.ts.map +0 -1
  654. package/src/services/manifest/types.d.ts.map +0 -1
  655. package/src/services/manifest/validator.d.ts.map +0 -1
  656. package/src/services/memory-arbiter.d.ts.map +0 -1
  657. package/src/services/memory-monitor.d.ts.map +0 -1
  658. package/src/services/memory-pressure.d.ts.map +0 -1
  659. package/src/services/mtp-doctor.d.ts.map +0 -1
  660. package/src/services/network-policy.d.ts.map +0 -1
  661. package/src/services/paths.d.ts.map +0 -1
  662. package/src/services/planner-skeleton.d.ts.map +0 -1
  663. package/src/services/providers.d.ts.map +0 -1
  664. package/src/services/ram-budget.d.ts.map +0 -1
  665. package/src/services/readiness.d.ts.map +0 -1
  666. package/src/services/recommendation.d.ts.map +0 -1
  667. package/src/services/registry.d.ts.map +0 -1
  668. package/src/services/router-handler.d.ts.map +0 -1
  669. package/src/services/routing-policy.d.ts.map +0 -1
  670. package/src/services/routing-preferences.d.ts.map +0 -1
  671. package/src/services/runtime-target.d.ts.map +0 -1
  672. package/src/services/service.d.ts.map +0 -1
  673. package/src/services/session-pool.d.ts.map +0 -1
  674. package/src/services/structured-output/deterministic-repair.d.ts.map +0 -1
  675. package/src/services/structured-output.d.ts.map +0 -1
  676. package/src/services/system-memory.d.ts.map +0 -1
  677. package/src/services/types.d.ts.map +0 -1
  678. package/src/services/verify-on-device.d.ts.map +0 -1
  679. package/src/services/verify.d.ts.map +0 -1
  680. package/src/services/vision/aosp-unavailable.d.ts.map +0 -1
  681. package/src/services/vision/capacitor-llama.d.ts.map +0 -1
  682. package/src/services/vision/cloud-fallback.d.ts.map +0 -1
  683. package/src/services/vision/hash.d.ts.map +0 -1
  684. package/src/services/vision/index.d.ts.map +0 -1
  685. package/src/services/vision/llama-server.d.ts.map +0 -1
  686. package/src/services/vision/types.d.ts.map +0 -1
  687. package/src/services/vision/vast-fallback.d.ts.map +0 -1
  688. package/src/services/vision-embedding-cache.d.ts.map +0 -1
  689. package/src/services/voice/audio-frame-consumer.d.ts.map +0 -1
  690. package/src/services/voice/barge-in.d.ts.map +0 -1
  691. package/src/services/voice/cancellation-coordinator.d.ts.map +0 -1
  692. package/src/services/voice/checkpoint-manager.d.ts.map +0 -1
  693. package/src/services/voice/eager-context-builder.d.ts.map +0 -1
  694. package/src/services/voice/eliza1-eot-scorer.d.ts.map +0 -1
  695. package/src/services/voice/embedding.d.ts.map +0 -1
  696. package/src/services/voice/emotion-attribution.d.ts.map +0 -1
  697. package/src/services/voice/engine-bridge.d.ts.map +0 -1
  698. package/src/services/voice/eot-classifier-ggml.d.ts.map +0 -1
  699. package/src/services/voice/eot-classifier.d.ts.map +0 -1
  700. package/src/services/voice/errors.d.ts.map +0 -1
  701. package/src/services/voice/expressive-tags.d.ts.map +0 -1
  702. package/src/services/voice/ffi-bindings.d.ts.map +0 -1
  703. package/src/services/voice/first-line-cache.d.ts.map +0 -1
  704. package/src/services/voice/fused-eot-scorer.d.ts.map +0 -1
  705. package/src/services/voice/index.d.ts.map +0 -1
  706. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +0 -1
  707. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +0 -1
  708. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +0 -1
  709. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +0 -1
  710. package/src/services/voice/kokoro/phonemizer.d.ts.map +0 -1
  711. package/src/services/voice/kokoro/pick-runtime.d.ts.map +0 -1
  712. package/src/services/voice/kokoro/runtime-selection.d.ts +0 -92
  713. package/src/services/voice/kokoro/runtime-selection.d.ts.map +0 -1
  714. package/src/services/voice/kokoro/types.d.ts.map +0 -1
  715. package/src/services/voice/kokoro/voice-presets.d.ts.map +0 -1
  716. package/src/services/voice/kokoro/voices.d.ts.map +0 -1
  717. package/src/services/voice/lifecycle.d.ts.map +0 -1
  718. package/src/services/voice/live-diarization-session.d.ts +0 -96
  719. package/src/services/voice/live-diarization-session.d.ts.map +0 -1
  720. package/src/services/voice/mic-source.d.ts.map +0 -1
  721. package/src/services/voice/optimistic-policy.d.ts.map +0 -1
  722. package/src/services/voice/partial-stabilizer.d.ts.map +0 -1
  723. package/src/services/voice/phoneme-tokenizer.d.ts.map +0 -1
  724. package/src/services/voice/phrase-cache.d.ts.map +0 -1
  725. package/src/services/voice/phrase-chunker.d.ts.map +0 -1
  726. package/src/services/voice/pipeline-impls.d.ts.map +0 -1
  727. package/src/services/voice/pipeline.d.ts.map +0 -1
  728. package/src/services/voice/prefill-client.d.ts.map +0 -1
  729. package/src/services/voice/prefix-preserving-queue.d.ts.map +0 -1
  730. package/src/services/voice/profile-store.d.ts.map +0 -1
  731. package/src/services/voice/ring-buffer.d.ts.map +0 -1
  732. package/src/services/voice/rollback-queue.d.ts.map +0 -1
  733. package/src/services/voice/samantha-preset-placeholder.d.ts.map +0 -1
  734. package/src/services/voice/samantha-preset-regenerator.d.ts.map +0 -1
  735. package/src/services/voice/scheduler.d.ts.map +0 -1
  736. package/src/services/voice/shared-resources.d.ts.map +0 -1
  737. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +0 -1
  738. package/src/services/voice/speaker/diarizer-fused.d.ts.map +0 -1
  739. package/src/services/voice/speaker/diarizer.d.ts.map +0 -1
  740. package/src/services/voice/speaker/encoder-fused.d.ts.map +0 -1
  741. package/src/services/voice/speaker/encoder-ggml.d.ts.map +0 -1
  742. package/src/services/voice/speaker/encoder.d.ts.map +0 -1
  743. package/src/services/voice/speaker-imprint.d.ts.map +0 -1
  744. package/src/services/voice/speaker-preset-cache.d.ts.map +0 -1
  745. package/src/services/voice/system-audio-sink.d.ts.map +0 -1
  746. package/src/services/voice/transcriber.d.ts.map +0 -1
  747. package/src/services/voice/transcript-knowledge.d.ts.map +0 -1
  748. package/src/services/voice/transcript-service.d.ts.map +0 -1
  749. package/src/services/voice/transcript-store.d.ts.map +0 -1
  750. package/src/services/voice/turn-controller.d.ts.map +0 -1
  751. package/src/services/voice/types.d.ts.map +0 -1
  752. package/src/services/voice/vad.d.ts.map +0 -1
  753. package/src/services/voice/voice-budget.d.ts.map +0 -1
  754. package/src/services/voice/voice-emotion-classifier.d.ts.map +0 -1
  755. package/src/services/voice/voice-preset-format.d.ts.map +0 -1
  756. package/src/services/voice/voice-profile-artifact.d.ts.map +0 -1
  757. package/src/services/voice/voice-profile-routes.d.ts.map +0 -1
  758. package/src/services/voice/voice-settings.d.ts +0 -82
  759. package/src/services/voice/voice-settings.d.ts.map +0 -1
  760. package/src/services/voice/voice-settings.ts +0 -172
  761. package/src/services/voice/voice-state-machine.d.ts.map +0 -1
  762. package/src/services/voice/wake-word-ggml.d.ts.map +0 -1
  763. package/src/services/voice/wake-word.d.ts.map +0 -1
  764. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +0 -1
  765. package/src/services/voice-model-updater.d.ts.map +0 -1
  766. package/src/services/voice-prewarm.d.ts.map +0 -1
  767. /package/{src → dist}/actions/generate-media.d.ts +0 -0
  768. /package/{src → dist}/actions/identify-speaker.d.ts +0 -0
  769. /package/{src → dist}/actions/transcription-control.d.ts +0 -0
  770. /package/{src → dist}/index.d.ts +0 -0
  771. /package/{src → dist}/provider.d.ts +0 -0
  772. /package/{src → dist}/routes/family-member-route.d.ts +0 -0
  773. /package/{src → dist}/routes/local-inference-asr-route.d.ts +0 -0
  774. /package/{src → dist}/routes/local-inference-asr-transcribe.d.ts +0 -0
  775. /package/{src → dist}/routes/local-inference-compat-routes.d.ts +0 -0
  776. /package/{src → dist}/routes/local-inference-tts-route.d.ts +0 -0
  777. /package/{src → dist}/routes/transcript-audio-store.d.ts +0 -0
  778. /package/{src → dist}/routes/voice-first-run-routes.d.ts +0 -0
  779. /package/{src → dist}/routes/voice-models-routes.d.ts +0 -0
  780. /package/{src → dist}/routes/voice-profile-plugin-routes.d.ts +0 -0
  781. /package/{src → dist}/routes/voice-profiles-management-routes.d.ts +0 -0
  782. /package/{src → dist}/routes/voice-speaker-profile-routes.d.ts +0 -0
  783. /package/{src → dist}/runtime/embedding-manager-support.d.ts +0 -0
  784. /package/{src → dist}/runtime/embedding-presets.d.ts +0 -0
  785. /package/{src → dist}/runtime/embedding-warmup-policy.d.ts +0 -0
  786. /package/{src → dist}/services/bundled-models.d.ts +0 -0
  787. /package/{src → dist}/services/cache-bridge.d.ts +0 -0
  788. /package/{src → dist}/services/checkpoint-client.d.ts +0 -0
  789. /package/{src → dist}/services/cloud-fallback.d.ts +0 -0
  790. /package/{src → dist}/services/conversation-registry.d.ts +0 -0
  791. /package/{src → dist}/services/device-bridge.d.ts +0 -0
  792. /package/{src → dist}/services/device-resource-metrics.d.ts +0 -0
  793. /package/{src → dist}/services/external-scanner.d.ts +0 -0
  794. /package/{src → dist}/services/gpu-detect.d.ts +0 -0
  795. /package/{src → dist}/services/handler-registry.d.ts +0 -0
  796. /package/{src → dist}/services/hardware.d.ts +0 -0
  797. /package/{src → dist}/services/image-description-runtime.d.ts +0 -0
  798. /package/{src → dist}/services/imagegen/aosp-unavailable.d.ts +0 -0
  799. /package/{src → dist}/services/imagegen/backend-selector.d.ts +0 -0
  800. /package/{src → dist}/services/imagegen/coreml-unavailable.d.ts +0 -0
  801. /package/{src → dist}/services/imagegen/errors.d.ts +0 -0
  802. /package/{src → dist}/services/imagegen/index.d.ts +0 -0
  803. /package/{src → dist}/services/imagegen/mflux.d.ts +0 -0
  804. /package/{src → dist}/services/imagegen/tensorrt-unavailable.d.ts +0 -0
  805. /package/{src → dist}/services/imagegen/types.d.ts +0 -0
  806. /package/{src → dist}/services/inference-capabilities.d.ts +0 -0
  807. /package/{src → dist}/services/inference-telemetry.d.ts +0 -0
  808. /package/{src → dist}/services/kv-spill.d.ts +0 -0
  809. /package/{src → dist}/services/latency-trace.d.ts +0 -0
  810. /package/{src → dist}/services/llm-streaming-binding.d.ts +0 -0
  811. /package/{src → dist}/services/load-args.d.ts +0 -0
  812. /package/{src → dist}/services/manifest/validator.d.ts +0 -0
  813. /package/{src → dist}/services/memory-pressure.d.ts +0 -0
  814. /package/{src → dist}/services/mtp-doctor.d.ts +0 -0
  815. /package/{src → dist}/services/network-policy.d.ts +0 -0
  816. /package/{src → dist}/services/paths.d.ts +0 -0
  817. /package/{src → dist}/services/planner-skeleton.d.ts +0 -0
  818. /package/{src → dist}/services/providers.d.ts +0 -0
  819. /package/{src → dist}/services/ram-budget.d.ts +0 -0
  820. /package/{src → dist}/services/readiness.d.ts +0 -0
  821. /package/{src → dist}/services/recommendation.d.ts +0 -0
  822. /package/{src → dist}/services/routing-preferences.d.ts +0 -0
  823. /package/{src → dist}/services/runtime-target.d.ts +0 -0
  824. /package/{src → dist}/services/session-pool.d.ts +0 -0
  825. /package/{src → dist}/services/structured-output/deterministic-repair.d.ts +0 -0
  826. /package/{src → dist}/services/structured-output.d.ts +0 -0
  827. /package/{src → dist}/services/system-memory.d.ts +0 -0
  828. /package/{src → dist}/services/verify-on-device.d.ts +0 -0
  829. /package/{src → dist}/services/verify.d.ts +0 -0
  830. /package/{src → dist}/services/vision/aosp-unavailable.d.ts +0 -0
  831. /package/{src → dist}/services/vision/capacitor-llama.d.ts +0 -0
  832. /package/{src → dist}/services/vision/cloud-fallback.d.ts +0 -0
  833. /package/{src → dist}/services/vision/hash.d.ts +0 -0
  834. /package/{src → dist}/services/vision/llama-server.d.ts +0 -0
  835. /package/{src → dist}/services/vision/vast-fallback.d.ts +0 -0
  836. /package/{src → dist}/services/voice/barge-in.d.ts +0 -0
  837. /package/{src → dist}/services/voice/cancellation-coordinator.d.ts +0 -0
  838. /package/{src → dist}/services/voice/checkpoint-manager.d.ts +0 -0
  839. /package/{src → dist}/services/voice/eager-context-builder.d.ts +0 -0
  840. /package/{src → dist}/services/voice/emotion-attribution.d.ts +0 -0
  841. /package/{src → dist}/services/voice/first-line-cache.d.ts +0 -0
  842. /package/{src → dist}/services/voice/kokoro/kokoro-runtime.d.ts +0 -0
  843. /package/{src → dist}/services/voice/kokoro/phonemizer.d.ts +0 -0
  844. /package/{src → dist}/services/voice/kokoro/types.d.ts +0 -0
  845. /package/{src → dist}/services/voice/kokoro/voice-presets.d.ts +0 -0
  846. /package/{src → dist}/services/voice/kokoro/voices.d.ts +0 -0
  847. /package/{src → dist}/services/voice/lifecycle.d.ts +0 -0
  848. /package/{src → dist}/services/voice/optimistic-policy.d.ts +0 -0
  849. /package/{src → dist}/services/voice/phoneme-tokenizer.d.ts +0 -0
  850. /package/{src → dist}/services/voice/phrase-cache.d.ts +0 -0
  851. /package/{src → dist}/services/voice/phrase-chunker.d.ts +0 -0
  852. /package/{src → dist}/services/voice/pipeline-impls.d.ts +0 -0
  853. /package/{src → dist}/services/voice/pipeline.d.ts +0 -0
  854. /package/{src → dist}/services/voice/prefill-client.d.ts +0 -0
  855. /package/{src → dist}/services/voice/prefix-preserving-queue.d.ts +0 -0
  856. /package/{src → dist}/services/voice/profile-store.d.ts +0 -0
  857. /package/{src → dist}/services/voice/ring-buffer.d.ts +0 -0
  858. /package/{src → dist}/services/voice/rollback-queue.d.ts +0 -0
  859. /package/{src → dist}/services/voice/samantha-preset-placeholder.d.ts +0 -0
  860. /package/{src → dist}/services/voice/samantha-preset-regenerator.d.ts +0 -0
  861. /package/{src → dist}/services/voice/scheduler.d.ts +0 -0
  862. /package/{src → dist}/services/voice/speaker/attribution-pipeline.d.ts +0 -0
  863. /package/{src → dist}/services/voice/speaker/diarizer-fused.d.ts +0 -0
  864. /package/{src → dist}/services/voice/speaker/diarizer.d.ts +0 -0
  865. /package/{src → dist}/services/voice/speaker/encoder-fused.d.ts +0 -0
  866. /package/{src → dist}/services/voice/speaker/encoder-ggml.d.ts +0 -0
  867. /package/{src → dist}/services/voice/speaker/encoder.d.ts +0 -0
  868. /package/{src → dist}/services/voice/speaker-imprint.d.ts +0 -0
  869. /package/{src → dist}/services/voice/speaker-preset-cache.d.ts +0 -0
  870. /package/{src → dist}/services/voice/system-audio-sink.d.ts +0 -0
  871. /package/{src → dist}/services/voice/transcript-knowledge.d.ts +0 -0
  872. /package/{src → dist}/services/voice/turn-controller.d.ts +0 -0
  873. /package/{src → dist}/services/voice/voice-budget.d.ts +0 -0
  874. /package/{src → dist}/services/voice/voice-emotion-classifier.d.ts +0 -0
  875. /package/{src → dist}/services/voice/voice-profile-artifact.d.ts +0 -0
  876. /package/{src → dist}/services/voice/voice-profile-routes.d.ts +0 -0
  877. /package/{src → dist}/services/voice/voice-state-machine.d.ts +0 -0
  878. /package/{src → dist}/services/voice/wake-word.d.ts +0 -0
  879. /package/{src → dist}/services/voice/wrap-with-first-line-cache.d.ts +0 -0
  880. /package/{src → dist}/services/voice-model-updater.d.ts +0 -0
  881. /package/{src → dist}/services/voice-prewarm.d.ts +0 -0
@@ -83,10 +83,10 @@ export interface FfiBackendSession {
83
83
  } | null;
84
84
  /**
85
85
  * Absolute path to a *separate* MTP drafter GGUF resolved during load.
86
- * `null` means same-file MTP: the NextN head is embedded in the main
87
- * text GGUF and the native runner activates `--spec-type draft-mtp`
88
- * with no `-md`. Speculative decoding is governed by `mtp`, not by the
89
- * presence of this path.
86
+ * `null` means embedded-draft-head MTP: the draft head is embedded in
87
+ * the main text GGUF and the native runner activates `--spec-type
88
+ * draft-mtp` with no `-md`. Speculative decoding is governed by `mtp`,
89
+ * not by the presence of this path.
90
90
  */
91
91
  readonly draftModelPath: string | null;
92
92
  /**
@@ -97,13 +97,14 @@ export interface FfiBackendSession {
97
97
  readonly mmprojPath: string | null;
98
98
  /**
99
99
  * Per-load runtime config the fused libelizainference path applies at its
100
- * first `llmStreamOpen` (gpuLayers + KV-cache quant types). The desktop
100
+ * first `llmStreamOpen` (context size, gpuLayers + KV-cache quant types). The desktop
101
101
  * libllama runtime applies these at `loadModel()` instead and leaves this
102
102
  * `null` — the backend forwards them into the runner's per-call config only
103
103
  * when present, so the fused path mirrors the libllama load decision without
104
104
  * the libllama path double-applying them.
105
105
  */
106
106
  readonly loadConfig?: {
107
+ contextSize?: number;
107
108
  gpuLayers?: number;
108
109
  cacheTypeK?: string | null;
109
110
  cacheTypeV?: string | null;
@@ -195,7 +196,9 @@ export class FfiStreamingBackend implements LocalInferenceBackend {
195
196
  gpuLayers: loadConfig?.gpuLayers,
196
197
  cacheTypeK: loadConfig?.cacheTypeK,
197
198
  cacheTypeV: loadConfig?.cacheTypeV,
199
+ contextSize: loadConfig?.contextSize,
198
200
  signal: args.signal,
201
+ maxTokensPerStep: args.maxTokensPerStep,
199
202
  onTextChunk: args.onTextChunk,
200
203
  onVerifierEvent: args.onVerifierEvent,
201
204
  });
@@ -280,14 +283,39 @@ export class FfiStreamingBackend implements LocalInferenceBackend {
280
283
  gpuLayers: loadConfig?.gpuLayers,
281
284
  cacheTypeK: loadConfig?.cacheTypeK,
282
285
  cacheTypeV: loadConfig?.cacheTypeV,
286
+ contextSize: loadConfig?.contextSize,
283
287
  });
284
288
  return true;
285
289
  }
286
290
 
291
+ currentRuntimeLoadConfig() {
292
+ if (!this.session) return null;
293
+ const loadConfig = this.session.loadConfig;
294
+ return {
295
+ modelId: null,
296
+ modelPath: this.loadedPath,
297
+ contextSize: loadConfig?.contextSize ?? null,
298
+ cacheTypeK: loadConfig?.cacheTypeK ?? null,
299
+ cacheTypeV: loadConfig?.cacheTypeV ?? null,
300
+ gpuLayers:
301
+ typeof loadConfig?.gpuLayers === "number" ? loadConfig.gpuLayers : null,
302
+ parallel: this.parallelSlots(),
303
+ binaryPath: null,
304
+ backend: this.id,
305
+ mtp: this.session.mtp
306
+ ? {
307
+ specType: "draft-mtp" as const,
308
+ draftMin: this.session.mtp.draftMin,
309
+ draftMax: this.session.mtp.draftMax,
310
+ }
311
+ : null,
312
+ };
313
+ }
314
+
287
315
  /**
288
316
  * True when Eliza-1 native MTP is active for the loaded target model.
289
- * Covers both shapes: same-file MTP (NextN head embedded in the text
290
- * GGUF, `draftModelPath` null) and separate-drafter MTP.
317
+ * Covers both shapes: embedded-draft-head MTP (draft head embedded in
318
+ * the text GGUF, `draftModelPath` null) and separate-drafter MTP.
291
319
  */
292
320
  mtpEnabled(): boolean {
293
321
  return Boolean(this.session?.mtp);
@@ -327,6 +355,10 @@ export class FfiStreamingBackend implements LocalInferenceBackend {
327
355
  maxTokens?: number;
328
356
  temperature?: number;
329
357
  signal?: AbortSignal;
358
+ /** Per-token callback — when set + the runtime supports streaming vision,
359
+ * the description is decoded token-by-token through the same pipe as chat. */
360
+ onTextChunk?: (chunk: string) => void | Promise<void>;
361
+ maxTokensPerStep?: number;
330
362
  }): Promise<{ text: string; projectorMs?: number; decodeMs?: number }> {
331
363
  if (!this.session) {
332
364
  throw new Error(
@@ -342,7 +374,7 @@ export class FfiStreamingBackend implements LocalInferenceBackend {
342
374
  // The runtime adapter has visionSupported() + describeImage(args).
343
375
  // We re-shape `bytes` → `imageBytes` and merge in the resolved
344
376
  // mmprojPath; the rest of args pass through unchanged.
345
- const runtime = this.runtime as unknown as {
377
+ const runtime = this.runtime as FfiBackendRuntime & {
346
378
  describeImage?: (args: {
347
379
  imageBytes: Uint8Array;
348
380
  mmprojPath: string;
@@ -350,6 +382,8 @@ export class FfiStreamingBackend implements LocalInferenceBackend {
350
382
  maxTokens?: number;
351
383
  temperature?: number;
352
384
  signal?: AbortSignal;
385
+ onTextChunk?: (chunk: string) => void | Promise<void>;
386
+ maxTokensPerStep?: number;
353
387
  }) => Promise<{ text: string; projectorMs?: number; decodeMs?: number }>;
354
388
  };
355
389
  if (!runtime.describeImage) {
@@ -364,6 +398,8 @@ export class FfiStreamingBackend implements LocalInferenceBackend {
364
398
  maxTokens: args.maxTokens,
365
399
  temperature: args.temperature,
366
400
  signal: args.signal,
401
+ onTextChunk: args.onTextChunk,
402
+ maxTokensPerStep: args.maxTokensPerStep,
367
403
  });
368
404
  }
369
405
 
@@ -1,10 +1,63 @@
1
- import { describe, expect, it, vi } from "vitest";
2
- import { FfiStreamingRunner } from "./ffi-streaming-runner";
1
+ import { afterEach, describe, expect, it, vi } from "vitest";
2
+ import {
3
+ FfiStreamingRunner,
4
+ resolveMaxTokensPerStep,
5
+ } from "./ffi-streaming-runner";
3
6
  import type {
4
7
  LlmCtxHandle,
5
8
  LlmStreamingBinding,
6
9
  } from "./llm-streaming-binding";
7
- import type { LlmStreamHandle } from "./voice/ffi-bindings";
10
+ import type { LlmStreamHandle, LlmStreamStep } from "./voice/ffi-bindings";
11
+
12
+ /**
13
+ * Build a binding whose `llmStreamNext` emits `steps.length` steps (the last
14
+ * with `done: true`), and that records the `maxTokensPerStep` passed on every
15
+ * call so tests can assert the resolved per-step cap.
16
+ */
17
+ function makeStepBinding(steps: string[]): {
18
+ binding: LlmStreamingBinding;
19
+ stepCaps: number[];
20
+ } {
21
+ const stream = 7n as LlmStreamHandle;
22
+ const stepCaps: number[] = [];
23
+ let i = 0;
24
+ const llmStreamNext = vi.fn(
25
+ (args: { maxTokensPerStep?: number }): LlmStreamStep => {
26
+ stepCaps.push(args.maxTokensPerStep ?? -1);
27
+ const text = steps[i] ?? "";
28
+ const done = i >= steps.length - 1;
29
+ i += 1;
30
+ return {
31
+ tokens: [i],
32
+ text,
33
+ done,
34
+ drafterDrafted: 0,
35
+ drafterAccepted: 0,
36
+ };
37
+ },
38
+ );
39
+ const binding: LlmStreamingBinding = {
40
+ llmStreamSupported: () => true,
41
+ llmStreamOpen: vi.fn().mockReturnValue(stream),
42
+ llmStreamPrefill: vi.fn(),
43
+ llmStreamNext,
44
+ llmStreamCancel: vi.fn(),
45
+ llmStreamClose: vi.fn(),
46
+ };
47
+ return { binding, stepCaps };
48
+ }
49
+
50
+ const BASE_ARGS = {
51
+ slotId: 0,
52
+ maxTokens: 64,
53
+ temperature: 0,
54
+ topP: 1,
55
+ topK: 0,
56
+ repeatPenalty: 1,
57
+ draftMin: 0,
58
+ draftMax: 0,
59
+ draftModelPath: null,
60
+ } as const;
8
61
 
9
62
  describe("FfiStreamingRunner prewarm", () => {
10
63
  it("treats maxTokens: 0 as prefill-only and never calls next-token generation", async () => {
@@ -38,10 +91,17 @@ describe("FfiStreamingRunner prewarm", () => {
38
91
  draftMin: 0,
39
92
  draftMax: 0,
40
93
  draftModelPath: null,
94
+ contextSize: 32_768,
41
95
  onTextChunk,
42
96
  });
43
97
 
44
98
  expect(binding.llmStreamOpen).toHaveBeenCalledTimes(1);
99
+ expect(binding.llmStreamOpen).toHaveBeenCalledWith({
100
+ ctx: 1n,
101
+ config: expect.objectContaining({
102
+ contextSize: 32_768,
103
+ }),
104
+ });
45
105
  expect(binding.llmStreamPrefill).toHaveBeenCalledWith({
46
106
  stream,
47
107
  tokens: promptTokens,
@@ -58,3 +118,103 @@ describe("FfiStreamingRunner prewarm", () => {
58
118
  });
59
119
  });
60
120
  });
121
+
122
+ describe("FfiStreamingRunner per-step granularity (#9174)", () => {
123
+ const ORIGINAL = process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP;
124
+ afterEach(() => {
125
+ if (ORIGINAL === undefined) {
126
+ delete process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP;
127
+ } else {
128
+ process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = ORIGINAL;
129
+ }
130
+ });
131
+
132
+ it("defaults to a 32-token per-step cap when no override is set", async () => {
133
+ delete process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP;
134
+ const { binding, stepCaps } = makeStepBinding(["Hi ", "there"]);
135
+ const runner = new FfiStreamingRunner(binding, 1n as LlmCtxHandle);
136
+ await runner.generateWithUsage({
137
+ ...BASE_ARGS,
138
+ promptTokens: new Int32Array([1]),
139
+ });
140
+ expect(stepCaps).toEqual([32, 32]);
141
+ });
142
+
143
+ it("forwards a per-call maxTokensPerStep to every llmStreamNext call", async () => {
144
+ delete process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP;
145
+ const { binding, stepCaps } = makeStepBinding(["a", "b", "c"]);
146
+ const runner = new FfiStreamingRunner(binding, 1n as LlmCtxHandle);
147
+ await runner.generateWithUsage({
148
+ ...BASE_ARGS,
149
+ promptTokens: new Int32Array([1]),
150
+ maxTokensPerStep: 4,
151
+ });
152
+ expect(stepCaps).toEqual([4, 4, 4]);
153
+ });
154
+
155
+ it("honors the ELIZA_LOCAL_STREAM_TOKENS_PER_STEP env override", async () => {
156
+ process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = "8";
157
+ const { binding, stepCaps } = makeStepBinding(["x", "y"]);
158
+ const runner = new FfiStreamingRunner(binding, 1n as LlmCtxHandle);
159
+ await runner.generateWithUsage({
160
+ ...BASE_ARGS,
161
+ promptTokens: new Int32Array([1]),
162
+ });
163
+ expect(stepCaps).toEqual([8, 8]);
164
+ });
165
+
166
+ it("lets a per-call override win over the env var", async () => {
167
+ process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = "8";
168
+ const { binding, stepCaps } = makeStepBinding(["x"]);
169
+ const runner = new FfiStreamingRunner(binding, 1n as LlmCtxHandle);
170
+ await runner.generateWithUsage({
171
+ ...BASE_ARGS,
172
+ promptTokens: new Int32Array([1]),
173
+ maxTokensPerStep: 1,
174
+ });
175
+ expect(stepCaps).toEqual([1]);
176
+ });
177
+
178
+ it("clamps out-of-range per-call overrides into the supported window", async () => {
179
+ delete process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP;
180
+ const low = makeStepBinding(["x"]);
181
+ const runnerLow = new FfiStreamingRunner(low.binding, 1n as LlmCtxHandle);
182
+ await runnerLow.generateWithUsage({
183
+ ...BASE_ARGS,
184
+ promptTokens: new Int32Array([1]),
185
+ maxTokensPerStep: 0,
186
+ });
187
+ // 0 floors to the minimum of 1, never disables generation.
188
+ expect(low.stepCaps).toEqual([1]);
189
+
190
+ const high = makeStepBinding(["x"]);
191
+ const runnerHigh = new FfiStreamingRunner(high.binding, 1n as LlmCtxHandle);
192
+ await runnerHigh.generateWithUsage({
193
+ ...BASE_ARGS,
194
+ promptTokens: new Int32Array([1]),
195
+ maxTokensPerStep: 100_000,
196
+ });
197
+ expect(high.stepCaps).toEqual([512]);
198
+ });
199
+
200
+ describe("resolveMaxTokensPerStep", () => {
201
+ it("returns 32 when unset", () => {
202
+ delete process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP;
203
+ expect(resolveMaxTokensPerStep()).toBe(32);
204
+ });
205
+
206
+ it("parses and clamps a valid env value", () => {
207
+ process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = "16";
208
+ expect(resolveMaxTokensPerStep()).toBe(16);
209
+ process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = "9999";
210
+ expect(resolveMaxTokensPerStep()).toBe(512);
211
+ process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = "0";
212
+ expect(resolveMaxTokensPerStep()).toBe(1);
213
+ });
214
+
215
+ it("falls back to 32 on a non-numeric env value", () => {
216
+ process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP = "smooth";
217
+ expect(resolveMaxTokensPerStep()).toBe(32);
218
+ });
219
+ });
220
+ });
@@ -61,6 +61,12 @@ export interface FfiStreamingGenerateArgs {
61
61
  */
62
62
  cacheTypeK?: string | null;
63
63
  cacheTypeV?: string | null;
64
+ /**
65
+ * Runtime context window in tokens (ABI v9). Forwarded into the fused
66
+ * session config on `llmStreamOpen`; `undefined` keeps the native
67
+ * ELIZA_LLM_N_CTX/default fallback.
68
+ */
69
+ contextSize?: number;
64
70
  /**
65
71
  * GBNF grammar source forcing the structured-reply envelope. Passed to
66
72
  * the native session's `llmStreamOpen` config so sampling is
@@ -69,6 +75,16 @@ export interface FfiStreamingGenerateArgs {
69
75
  gbnfGrammar?: string | null;
70
76
  /** Cancellation signal — fires `llmStreamCancel` on the active session. */
71
77
  signal?: AbortSignal;
78
+ /**
79
+ * Per-step token cap for the native decode loop. Lower values make the
80
+ * local UI stream in finer-grained jumps (smoother token-by-token render)
81
+ * at the cost of more JS↔FFI round-trips per reply; higher values batch
82
+ * more tokens per step. When omitted, falls back to
83
+ * `resolveMaxTokensPerStep()` (env `ELIZA_LOCAL_STREAM_TOKENS_PER_STEP`,
84
+ * else `DEFAULT_MAX_TOKENS_PER_STEP`). Clamped to
85
+ * `[MIN_MAX_TOKENS_PER_STEP, MAX_MAX_TOKENS_PER_STEP]`.
86
+ */
87
+ maxTokensPerStep?: number;
72
88
  /** Per-chunk text callback. */
73
89
  onTextChunk?: (chunk: string) => void | Promise<void>;
74
90
  /** Speculative accept/reject events from MTP verification. */
@@ -86,6 +102,37 @@ export interface FfiStreamingGenerateResult {
86
102
  /** Default per-step caps. Match upstream llama-server's `n_predict` chunk size. */
87
103
  const DEFAULT_MAX_TOKENS_PER_STEP = 32;
88
104
  const DEFAULT_MAX_TEXT_BYTES = 1024;
105
+ /**
106
+ * Sane bounds for the per-step token cap. The floor is 1 (true
107
+ * token-by-token); the ceiling guards against pathological values that would
108
+ * defeat streaming by emitting the whole reply in one step.
109
+ */
110
+ const MIN_MAX_TOKENS_PER_STEP = 1;
111
+ const MAX_MAX_TOKENS_PER_STEP = 512;
112
+
113
+ /** Clamp a caller-supplied per-step cap into the supported range. */
114
+ function clampMaxTokensPerStep(value: number): number {
115
+ if (!Number.isFinite(value)) return DEFAULT_MAX_TOKENS_PER_STEP;
116
+ return Math.min(
117
+ MAX_MAX_TOKENS_PER_STEP,
118
+ Math.max(MIN_MAX_TOKENS_PER_STEP, Math.trunc(value)),
119
+ );
120
+ }
121
+
122
+ /**
123
+ * Resolve the per-step token cap for the native decode loop. Override via the
124
+ * `ELIZA_LOCAL_STREAM_TOKENS_PER_STEP` env var (e.g. set to `8` for smoother
125
+ * local streaming, weighed against the extra JS↔FFI round-trips and the shared
126
+ * voice phrase-chunker). Falls back to `DEFAULT_MAX_TOKENS_PER_STEP` (32) when
127
+ * unset or invalid; clamped to `[MIN_MAX_TOKENS_PER_STEP, MAX_MAX_TOKENS_PER_STEP]`.
128
+ */
129
+ export function resolveMaxTokensPerStep(): number {
130
+ const raw = process.env.ELIZA_LOCAL_STREAM_TOKENS_PER_STEP?.trim();
131
+ if (!raw) return DEFAULT_MAX_TOKENS_PER_STEP;
132
+ const parsed = Number.parseInt(raw, 10);
133
+ if (!Number.isFinite(parsed)) return DEFAULT_MAX_TOKENS_PER_STEP;
134
+ return clampMaxTokensPerStep(parsed);
135
+ }
89
136
 
90
137
  /**
91
138
  * Backend used by the mobile and desktop FFI routes.
@@ -285,6 +332,7 @@ export class FfiStreamingRunner {
285
332
  gpuLayers: args.gpuLayers,
286
333
  cacheTypeK: args.cacheTypeK,
287
334
  cacheTypeV: args.cacheTypeV,
335
+ contextSize: args.contextSize,
288
336
  },
289
337
  });
290
338
 
@@ -307,6 +355,11 @@ export class FfiStreamingRunner {
307
355
  return;
308
356
  }
309
357
 
358
+ const maxTokensPerStep =
359
+ args.maxTokensPerStep !== undefined
360
+ ? clampMaxTokensPerStep(args.maxTokensPerStep)
361
+ : resolveMaxTokensPerStep();
362
+
310
363
  let tokenIndex = 0;
311
364
  while (true) {
312
365
  if (args.signal?.aborted) {
@@ -315,7 +368,7 @@ export class FfiStreamingRunner {
315
368
  }
316
369
  const step = this.ffi.llmStreamNext({
317
370
  stream,
318
- maxTokensPerStep: DEFAULT_MAX_TOKENS_PER_STEP,
371
+ maxTokensPerStep,
319
372
  maxTextBytes: DEFAULT_MAX_TEXT_BYTES,
320
373
  });
321
374
  onStep(step);
@@ -158,5 +158,9 @@ describe("DesktopFusedFfiBackendRuntime.release() ordering (#14)", () => {
158
158
  // is explicitly poisoned so a new native model is not allocated over a
159
159
  // failed cleanup state.
160
160
  await expect(runtime.acquire(PLAN)).rejects.toThrow(/restart required/i);
161
- });
161
+ // Heavy path (dynamic import + FFI acquire/release/acquire): fast in
162
+ // isolation but CPU-starved under the full 2122-test parallel suite, where
163
+ // it brushed the old 20s ceiling (20012ms). Headroom; a true hang still
164
+ // fails well within this bound.
165
+ }, 45_000);
162
166
  });
@@ -0,0 +1,144 @@
1
+ /**
2
+ * #8808 acceptance criterion 4 — fused eliza-1 no-regression.
3
+ *
4
+ * The local stack is Eliza-1 only (#8808 cutover), so serving is deterministic.
5
+ * This test pins the invariants:
6
+ * - `decideBackend` / `BackendDispatcher` route a `runtimeClass:"fused-eliza1"`
7
+ * model to the fused `llama-cpp` runtime,
8
+ * - the fused path retains its full-pipeline binding: the `BackendPlan` that
9
+ * reaches the fused backend still carries the catalog entry and the
10
+ * bundle-root override that `DesktopFusedFfiBackendRuntime.acquire()` reads
11
+ * to anchor the fused context.
12
+ *
13
+ * It complements `backend-runtime-class.test.ts` (which proves the binary
14
+ * routing) by asserting that the FULL fused load contract is forwarded intact.
15
+ */
16
+
17
+ import { describe, expect, it } from "vitest";
18
+
19
+ import {
20
+ BackendDispatcher,
21
+ type BackendPlan,
22
+ decideBackend,
23
+ type GenerateArgs,
24
+ type GenerateResult,
25
+ type LocalInferenceBackend,
26
+ } from "./backend";
27
+ import { findCatalogModel } from "./catalog";
28
+ import type { CatalogModel } from "./types";
29
+
30
+ const FUSED_TIER = findCatalogModel("eliza-1-4b") as CatalogModel;
31
+
32
+ function makeBackend(id: LocalInferenceBackend["id"]): LocalInferenceBackend & {
33
+ loaded: BackendPlan[];
34
+ } {
35
+ const loaded: BackendPlan[] = [];
36
+ return {
37
+ id,
38
+ loaded,
39
+ available: async () => true,
40
+ load: async (plan: BackendPlan) => {
41
+ loaded.push(plan);
42
+ },
43
+ unload: async () => {},
44
+ generate: async (_args: GenerateArgs): Promise<GenerateResult> => "ok",
45
+ hasLoadedModel: () => loaded.length > 0,
46
+ currentModelPath: () => loaded.at(-1)?.modelPath ?? null,
47
+ };
48
+ }
49
+
50
+ describe("fused eliza-1 no-regression (C4)", () => {
51
+ it("the catalog tier under test really is a fused-eliza1 tier", () => {
52
+ expect(FUSED_TIER).toBeTruthy();
53
+ expect(FUSED_TIER.runtimeClass).toBe("fused-eliza1");
54
+ expect(FUSED_TIER.runtime?.mtp).toBeUndefined();
55
+ });
56
+
57
+ it("decideBackend routes a fused Eliza-1 tier to the fused llama-cpp runtime", () => {
58
+ const decision = decideBackend({
59
+ override: "auto",
60
+ catalog: FUSED_TIER,
61
+ llamaCppAvailable: true,
62
+ });
63
+ expect(decision.backend).toBe("llama-cpp");
64
+ });
65
+
66
+ it("decideBackend routes everything to llama-cpp — the stack is Eliza-1 only", () => {
67
+ // Post-#8808 cutover: there is no generic-gguf backend; every model
68
+ // (even an unknown catalog entry) routes to the fused llama-cpp runtime.
69
+ const decision = decideBackend({
70
+ override: "auto",
71
+ catalog: undefined,
72
+ llamaCppAvailable: true,
73
+ });
74
+ expect(decision.backend).toBe("llama-cpp");
75
+ });
76
+
77
+ it("dispatcher forwards the fused full-pipeline binding (catalog + bundleRoot) to the fused backend", async () => {
78
+ const ffi = makeBackend("llama-cpp");
79
+ const dispatcher = new BackendDispatcher(
80
+ ffi,
81
+ () => true,
82
+ () => null,
83
+ );
84
+
85
+ const bundleRoot = "/models/eliza-1-4b";
86
+ const plan: BackendPlan = {
87
+ modelPath: `${bundleRoot}/text/eliza-1-4b-128k.gguf`,
88
+ modelId: "eliza-1-4b",
89
+ catalog: FUSED_TIER,
90
+ runtimeClass: "fused-eliza1",
91
+ overrides: {
92
+ bundleRoot,
93
+ draftModelPath: `${bundleRoot}/text/eliza-1-4b-mtp.gguf`,
94
+ gpuLayers: "max",
95
+ cacheTypeK: "tbq4_0",
96
+ cacheTypeV: "tbq3_0",
97
+ },
98
+ };
99
+
100
+ await dispatcher.load(plan);
101
+
102
+ // Routed to the fused runtime.
103
+ expect(ffi.loaded).toHaveLength(1);
104
+ expect(dispatcher.activeBackendId()).toBe("llama-cpp");
105
+
106
+ // The full-pipeline binding survives dispatch: the fused backend receives
107
+ // the same catalog entry plus the bundle-root and explicit drafter
108
+ // overrides that anchor the fused context and preserve fork KV-cache
109
+ // kernel settings.
110
+ const forwarded = ffi.loaded[0];
111
+ expect(forwarded.catalog).toBe(FUSED_TIER);
112
+ expect(forwarded.catalog?.runtime?.mtp).toBeUndefined();
113
+ expect(forwarded.overrides?.bundleRoot).toBe(bundleRoot);
114
+ expect(forwarded.overrides?.draftModelPath).toBe(
115
+ `${bundleRoot}/text/eliza-1-4b-mtp.gguf`,
116
+ );
117
+ expect(forwarded.overrides?.cacheTypeK).toBe("tbq4_0");
118
+ expect(forwarded.overrides?.cacheTypeV).toBe("tbq3_0");
119
+ });
120
+
121
+ it("env-override=llama-cpp keeps a fused tier on the fused path", async () => {
122
+ const prev = process.env.ELIZA_INFERENCE_BACKEND;
123
+ process.env.ELIZA_INFERENCE_BACKEND = "llama-cpp";
124
+ try {
125
+ const ffi = makeBackend("llama-cpp");
126
+ const dispatcher = new BackendDispatcher(
127
+ ffi,
128
+ () => true,
129
+ () => null,
130
+ );
131
+ const decision = dispatcher.decide({
132
+ modelPath: "/models/eliza-1-4b/text/eliza-1-4b-128k.gguf",
133
+ modelId: "eliza-1-4b",
134
+ catalog: FUSED_TIER,
135
+ runtimeClass: "fused-eliza1",
136
+ });
137
+ expect(decision.backend).toBe("llama-cpp");
138
+ expect(decision.reason).toBe("env-override");
139
+ } finally {
140
+ if (prev === undefined) delete process.env.ELIZA_INFERENCE_BACKEND;
141
+ else process.env.ELIZA_INFERENCE_BACKEND = prev;
142
+ }
143
+ });
144
+ });
@@ -213,8 +213,13 @@ describe("probeHardware GPU detection", () => {
213
213
  expect(probe.gpu).not.toBeNull();
214
214
  expect(probe.gpu?.backend).toBe("cuda");
215
215
  expect(probe.gpu?.totalVramGb).toBeGreaterThanOrEqual(23);
216
- // A 24 GB discrete GPU must not be mis-tiered as a CPU box.
217
- expect(["MAX", "GOOD"]).toContain(classifyDeviceTier(probe).tier);
216
+ // A 24 GB discrete GPU must not be mis-tiered as a CPU box (POOR). MAX/GOOD
217
+ // additionally require >=16/>=8 GB *free* RAM at session start, which this test
218
+ // only mocks the GPU for; under parallel-suite memory pressure real free RAM
219
+ // can dip below those gates, legitimately tiering a 24 GB-GPU box to OKAY
220
+ // (still GPU-backed, not a CPU box). Assert "off CPU" (not POOR) -- the stated
221
+ // intent -- which is deterministic regardless of host free RAM.
222
+ expect(["MAX", "GOOD", "OKAY"]).toContain(classifyDeviceTier(probe).tier);
218
223
  });
219
224
 
220
225
  it("reports gpu:null when nvidia-smi is absent on a non-Apple host", async () => {
@@ -13,8 +13,10 @@
13
13
  import { execFileSync } from "node:child_process";
14
14
  import fs from "node:fs";
15
15
  import os from "node:os";
16
+ import path from "node:path";
16
17
  import { detectGpu } from "./gpu-detect";
17
18
  import type { Eliza1Backend, Eliza1DeviceCaps } from "./manifest";
19
+ import { elizaModelsDir } from "./paths";
18
20
  import type {
19
21
  CpuFeatureProbe,
20
22
  HardwareProbe,
@@ -29,6 +31,30 @@ function bytesToGb(bytes: number): number {
29
31
  return Math.round((bytes / BYTES_PER_GB) * 10) / 10;
30
32
  }
31
33
 
34
+ /**
35
+ * Free disk space (GB) on the volume that holds the models directory. Walks up
36
+ * to the nearest existing ancestor before `statfs` so a not-yet-created models
37
+ * dir still resolves to its parent volume. Returns `undefined` when the volume
38
+ * cannot be stat'd (the fit check then falls back to RAM-only / mobile
39
+ * storage), never throws.
40
+ */
41
+ async function probeFreeDiskGb(): Promise<number | undefined> {
42
+ try {
43
+ let dir = elizaModelsDir();
44
+ for (let i = 0; i < 12 && !fs.existsSync(dir); i += 1) {
45
+ const parent = path.dirname(dir);
46
+ if (parent === dir) break;
47
+ dir = parent;
48
+ }
49
+ const stats = await fs.promises.statfs(dir);
50
+ const available = stats.bavail * stats.bsize;
51
+ if (!Number.isFinite(available) || available < 0) return undefined;
52
+ return bytesToGb(available);
53
+ } catch {
54
+ return undefined;
55
+ }
56
+ }
57
+
32
58
  /**
33
59
  * Pick a default bucket based on total available memory and architecture.
34
60
  *
@@ -331,9 +357,11 @@ export async function probeHardware(): Promise<HardwareProbe> {
331
357
 
332
358
  const gpu = detectProbeGpu(appleSilicon, totalRamBytes, freeRamBytes);
333
359
  const totalRamGb = bytesToGb(totalRamBytes);
360
+ const freeDiskGb = await probeFreeDiskGb();
334
361
  return {
335
362
  totalRamGb,
336
363
  freeRamGb: bytesToGb(freeRamBytes),
364
+ ...(freeDiskGb !== undefined ? { freeDiskGb } : {}),
337
365
  gpu,
338
366
  cpuCores,
339
367
  cpuFeatures,