@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (893) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +157 -0
  3. package/dist/actions/generate-media.d.ts +59 -0
  4. package/dist/actions/generate-media.d.ts.map +1 -0
  5. package/dist/actions/identify-speaker.d.ts +23 -0
  6. package/dist/actions/identify-speaker.d.ts.map +1 -0
  7. package/dist/actions/transcription-control.d.ts +29 -0
  8. package/dist/actions/transcription-control.d.ts.map +1 -0
  9. package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
  10. package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
  11. package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
  12. package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
  13. package/dist/adapters/capacitor-llama/index.d.ts +18 -0
  14. package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
  15. package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
  16. package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
  17. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
  18. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
  19. package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
  20. package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
  21. package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
  22. package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
  23. package/dist/adapters/capacitor-llama/types.d.ts +338 -0
  24. package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
  25. package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
  26. package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
  27. package/dist/backends/apple-foundation.d.ts +56 -0
  28. package/dist/backends/apple-foundation.d.ts.map +1 -0
  29. package/dist/index.d.ts +8 -37
  30. package/dist/index.d.ts.map +1 -0
  31. package/dist/index.js +38979 -430
  32. package/dist/index.js.map +217 -0
  33. package/dist/local-inference-routes.d.ts +47 -0
  34. package/dist/local-inference-routes.d.ts.map +1 -0
  35. package/dist/provider.d.ts +21 -0
  36. package/dist/provider.d.ts.map +1 -0
  37. package/dist/routes/compat-helpers.d.ts +18 -0
  38. package/dist/routes/compat-helpers.d.ts.map +1 -0
  39. package/dist/routes/family-member-route.d.ts +62 -0
  40. package/dist/routes/family-member-route.d.ts.map +1 -0
  41. package/dist/routes/index.d.ts +20 -0
  42. package/dist/routes/index.d.ts.map +1 -0
  43. package/dist/routes/index.js +42040 -0
  44. package/dist/routes/index.js.map +236 -0
  45. package/dist/routes/live-diarization-route.d.ts +33 -0
  46. package/dist/routes/live-diarization-route.d.ts.map +1 -0
  47. package/dist/routes/local-inference-asr-route.d.ts +4 -0
  48. package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
  49. package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
  50. package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  51. package/dist/routes/local-inference-compat-routes.d.ts +16 -0
  52. package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
  53. package/dist/routes/local-inference-tts-route.d.ts +7 -0
  54. package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/dist/routes/native-pcm-turn-route.d.ts +3 -0
  56. package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
  57. package/dist/routes/transcript-audio-store.d.ts +15 -0
  58. package/dist/routes/transcript-audio-store.d.ts.map +1 -0
  59. package/dist/routes/transcripts-routes.d.ts +44 -0
  60. package/dist/routes/transcripts-routes.d.ts.map +1 -0
  61. package/dist/routes/voice-first-run-routes.d.ts +62 -0
  62. package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
  63. package/dist/routes/voice-models-routes.d.ts +62 -0
  64. package/dist/routes/voice-models-routes.d.ts.map +1 -0
  65. package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
  66. package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  67. package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
  68. package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
  69. package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/dist/runtime/embedding-manager-support.d.ts +77 -0
  72. package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
  73. package/dist/runtime/embedding-presets.d.ts +16 -0
  74. package/dist/runtime/embedding-presets.d.ts.map +1 -0
  75. package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
  76. package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
  77. package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
  78. package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  79. package/dist/runtime/index.d.ts +15 -0
  80. package/dist/runtime/index.d.ts.map +1 -0
  81. package/dist/runtime/index.js +38768 -0
  82. package/dist/runtime/index.js.map +217 -0
  83. package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
  84. package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  85. package/dist/runtime/voice-entity-binding.d.ts +113 -0
  86. package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
  87. package/dist/services/active-model.d.ts +310 -0
  88. package/dist/services/active-model.d.ts.map +1 -0
  89. package/dist/services/asr-provenance.d.ts +5 -0
  90. package/dist/services/asr-provenance.d.ts.map +1 -0
  91. package/dist/services/assignments.d.ts +84 -0
  92. package/dist/services/assignments.d.ts.map +1 -0
  93. package/dist/services/backend-selector.d.ts +55 -0
  94. package/dist/services/backend-selector.d.ts.map +1 -0
  95. package/dist/services/backend.d.ts +440 -0
  96. package/dist/services/backend.d.ts.map +1 -0
  97. package/dist/services/bionic-host-loader.d.ts +67 -0
  98. package/dist/services/bionic-host-loader.d.ts.map +1 -0
  99. package/dist/services/bundled-models.d.ts +34 -0
  100. package/dist/services/bundled-models.d.ts.map +1 -0
  101. package/dist/services/cache-bridge.d.ts +206 -0
  102. package/dist/services/cache-bridge.d.ts.map +1 -0
  103. package/dist/services/catalog.d.ts +10 -0
  104. package/dist/services/catalog.d.ts.map +1 -0
  105. package/dist/services/checkpoint-client.d.ts +109 -0
  106. package/dist/services/checkpoint-client.d.ts.map +1 -0
  107. package/dist/services/checkpoint-manager.d.ts +217 -0
  108. package/dist/services/checkpoint-manager.d.ts.map +1 -0
  109. package/dist/services/cloud-fallback.d.ts +102 -0
  110. package/dist/services/cloud-fallback.d.ts.map +1 -0
  111. package/dist/services/context-fit.d.ts +36 -0
  112. package/dist/services/context-fit.d.ts.map +1 -0
  113. package/dist/services/conversation-registry.d.ts +142 -0
  114. package/dist/services/conversation-registry.d.ts.map +1 -0
  115. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
  116. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  117. package/dist/services/device-bridge.d.ts +188 -0
  118. package/dist/services/device-bridge.d.ts.map +1 -0
  119. package/dist/services/device-resource-metrics.d.ts +149 -0
  120. package/dist/services/device-resource-metrics.d.ts.map +1 -0
  121. package/dist/services/device-tier.d.ts +133 -0
  122. package/dist/services/device-tier.d.ts.map +1 -0
  123. package/dist/services/downloader.d.ts +94 -0
  124. package/dist/services/downloader.d.ts.map +1 -0
  125. package/dist/services/engine.d.ts +579 -0
  126. package/dist/services/engine.d.ts.map +1 -0
  127. package/dist/services/ensure-local-artifacts.d.ts +82 -0
  128. package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
  129. package/dist/services/external-scanner.d.ts +17 -0
  130. package/dist/services/external-scanner.d.ts.map +1 -0
  131. package/dist/services/ffi-llm-mock.d.ts +90 -0
  132. package/dist/services/ffi-llm-mock.d.ts.map +1 -0
  133. package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
  134. package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
  135. package/dist/services/ffi-streaming-backend.d.ts +201 -0
  136. package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
  137. package/dist/services/ffi-streaming-runner.d.ts +146 -0
  138. package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
  139. package/dist/services/gpu-autotune.d.ts +150 -0
  140. package/dist/services/gpu-autotune.d.ts.map +1 -0
  141. package/dist/services/gpu-detect.d.ts +56 -0
  142. package/dist/services/gpu-detect.d.ts.map +1 -0
  143. package/dist/services/handler-registry.d.ts +72 -0
  144. package/dist/services/handler-registry.d.ts.map +1 -0
  145. package/dist/services/hardware.d.ts +63 -0
  146. package/dist/services/hardware.d.ts.map +1 -0
  147. package/dist/services/image-description-runtime.d.ts +14 -0
  148. package/dist/services/image-description-runtime.d.ts.map +1 -0
  149. package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
  150. package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  151. package/dist/services/imagegen/backend-selector.d.ts +118 -0
  152. package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
  153. package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
  154. package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  155. package/dist/services/imagegen/errors.d.ts +16 -0
  156. package/dist/services/imagegen/errors.d.ts.map +1 -0
  157. package/dist/services/imagegen/index.d.ts +58 -0
  158. package/dist/services/imagegen/index.d.ts.map +1 -0
  159. package/dist/services/imagegen/mflux.d.ts +74 -0
  160. package/dist/services/imagegen/mflux.d.ts.map +1 -0
  161. package/dist/services/imagegen/sd-cpp.d.ts +181 -0
  162. package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
  163. package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  164. package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  165. package/dist/services/imagegen/types.d.ts +181 -0
  166. package/dist/services/imagegen/types.d.ts.map +1 -0
  167. package/dist/services/index.d.ts +31 -0
  168. package/dist/services/index.d.ts.map +1 -0
  169. package/dist/services/index.js +39453 -0
  170. package/dist/services/index.js.map +227 -0
  171. package/dist/services/inference-capabilities.d.ts +132 -0
  172. package/dist/services/inference-capabilities.d.ts.map +1 -0
  173. package/dist/services/inference-telemetry.d.ts +59 -0
  174. package/dist/services/inference-telemetry.d.ts.map +1 -0
  175. package/dist/services/ios-llama-streaming.d.ts +119 -0
  176. package/dist/services/ios-llama-streaming.d.ts.map +1 -0
  177. package/dist/services/kv-spill.d.ts +189 -0
  178. package/dist/services/kv-spill.d.ts.map +1 -0
  179. package/dist/services/latency-trace.d.ts +346 -0
  180. package/dist/services/latency-trace.d.ts.map +1 -0
  181. package/dist/services/lib-target.d.ts +55 -0
  182. package/dist/services/lib-target.d.ts.map +1 -0
  183. package/dist/services/live-signals.d.ts +86 -0
  184. package/dist/services/live-signals.d.ts.map +1 -0
  185. package/dist/services/llama-server-metrics.d.ts +114 -0
  186. package/dist/services/llama-server-metrics.d.ts.map +1 -0
  187. package/dist/services/llm-streaming-binding.d.ts +96 -0
  188. package/dist/services/llm-streaming-binding.d.ts.map +1 -0
  189. package/dist/services/load-args.d.ts +82 -0
  190. package/dist/services/load-args.d.ts.map +1 -0
  191. package/dist/services/manifest/index.d.ts +4 -0
  192. package/dist/services/manifest/index.d.ts.map +1 -0
  193. package/dist/services/manifest/schema.d.ts +903 -0
  194. package/dist/services/manifest/schema.d.ts.map +1 -0
  195. package/dist/services/manifest/types.d.ts +32 -0
  196. package/dist/services/manifest/types.d.ts.map +1 -0
  197. package/dist/services/manifest/validator.d.ts +66 -0
  198. package/dist/services/manifest/validator.d.ts.map +1 -0
  199. package/dist/services/memory-arbiter.d.ts +348 -0
  200. package/dist/services/memory-arbiter.d.ts.map +1 -0
  201. package/dist/services/memory-benchmark.d.ts +76 -0
  202. package/dist/services/memory-benchmark.d.ts.map +1 -0
  203. package/dist/services/memory-monitor.d.ts +128 -0
  204. package/dist/services/memory-monitor.d.ts.map +1 -0
  205. package/dist/services/memory-pressure.d.ts +130 -0
  206. package/dist/services/memory-pressure.d.ts.map +1 -0
  207. package/dist/services/mtp-doctor.d.ts +13 -0
  208. package/dist/services/mtp-doctor.d.ts.map +1 -0
  209. package/dist/services/network-policy.d.ts +127 -0
  210. package/dist/services/network-policy.d.ts.map +1 -0
  211. package/dist/services/paths.d.ts +6 -0
  212. package/dist/services/paths.d.ts.map +1 -0
  213. package/dist/services/planner-skeleton.d.ts +124 -0
  214. package/dist/services/planner-skeleton.d.ts.map +1 -0
  215. package/dist/services/providers.d.ts +38 -0
  216. package/dist/services/providers.d.ts.map +1 -0
  217. package/dist/services/ram-budget.d.ts +110 -0
  218. package/dist/services/ram-budget.d.ts.map +1 -0
  219. package/dist/services/readiness.d.ts +9 -0
  220. package/dist/services/readiness.d.ts.map +1 -0
  221. package/dist/services/recommendation.d.ts +111 -0
  222. package/dist/services/recommendation.d.ts.map +1 -0
  223. package/dist/services/registry.d.ts +33 -0
  224. package/dist/services/registry.d.ts.map +1 -0
  225. package/dist/services/router-handler.d.ts +92 -0
  226. package/dist/services/router-handler.d.ts.map +1 -0
  227. package/dist/services/routing-policy.d.ts +92 -0
  228. package/dist/services/routing-policy.d.ts.map +1 -0
  229. package/dist/services/routing-preferences.d.ts +8 -0
  230. package/dist/services/routing-preferences.d.ts.map +1 -0
  231. package/dist/services/runtime-target.d.ts +98 -0
  232. package/dist/services/runtime-target.d.ts.map +1 -0
  233. package/dist/services/service.d.ts +128 -0
  234. package/dist/services/service.d.ts.map +1 -0
  235. package/dist/services/session-pool.d.ts +72 -0
  236. package/dist/services/session-pool.d.ts.map +1 -0
  237. package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
  238. package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
  239. package/dist/services/structured-output/index.d.ts +2 -0
  240. package/dist/services/structured-output/index.d.ts.map +1 -0
  241. package/dist/services/structured-output.d.ts +311 -0
  242. package/dist/services/structured-output.d.ts.map +1 -0
  243. package/dist/services/system-memory.d.ts +33 -0
  244. package/dist/services/system-memory.d.ts.map +1 -0
  245. package/dist/services/types.d.ts +19 -0
  246. package/dist/services/types.d.ts.map +1 -0
  247. package/dist/services/verify-on-device.d.ts +34 -0
  248. package/dist/services/verify-on-device.d.ts.map +1 -0
  249. package/dist/services/verify.d.ts +8 -0
  250. package/dist/services/verify.d.ts.map +1 -0
  251. package/dist/services/vision/aosp-unavailable.d.ts +115 -0
  252. package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
  253. package/dist/services/vision/capacitor-llama.d.ts +99 -0
  254. package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
  255. package/dist/services/vision/cloud-fallback.d.ts +47 -0
  256. package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
  257. package/dist/services/vision/hash.d.ts +71 -0
  258. package/dist/services/vision/hash.d.ts.map +1 -0
  259. package/dist/services/vision/index.d.ts +95 -0
  260. package/dist/services/vision/index.d.ts.map +1 -0
  261. package/dist/services/vision/llama-server.d.ts +73 -0
  262. package/dist/services/vision/llama-server.d.ts.map +1 -0
  263. package/dist/services/vision/types.d.ts +162 -0
  264. package/dist/services/vision/types.d.ts.map +1 -0
  265. package/dist/services/vision/vast-fallback.d.ts +18 -0
  266. package/dist/services/vision/vast-fallback.d.ts.map +1 -0
  267. package/dist/services/vision-embedding-cache.d.ts +98 -0
  268. package/dist/services/vision-embedding-cache.d.ts.map +1 -0
  269. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
  270. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
  271. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
  272. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
  273. package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
  274. package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
  275. package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
  276. package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
  277. package/dist/services/voice/barge-in.d.ts +112 -0
  278. package/dist/services/voice/barge-in.d.ts.map +1 -0
  279. package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
  280. package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
  281. package/dist/services/voice/checkpoint-manager.d.ts +199 -0
  282. package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
  283. package/dist/services/voice/checkpoint-policy.d.ts +178 -0
  284. package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
  285. package/dist/services/voice/corpus-augment.d.ts +111 -0
  286. package/dist/services/voice/corpus-augment.d.ts.map +1 -0
  287. package/dist/services/voice/corpus-generator.d.ts +134 -0
  288. package/dist/services/voice/corpus-generator.d.ts.map +1 -0
  289. package/dist/services/voice/diarization-error-rate.d.ts +40 -0
  290. package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
  291. package/dist/services/voice/e2e-harness.d.ts +297 -0
  292. package/dist/services/voice/e2e-harness.d.ts.map +1 -0
  293. package/dist/services/voice/eager-context-builder.d.ts +170 -0
  294. package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
  295. package/dist/services/voice/echo-delay.d.ts +67 -0
  296. package/dist/services/voice/echo-delay.d.ts.map +1 -0
  297. package/dist/services/voice/echo-metrics.d.ts +7 -0
  298. package/dist/services/voice/echo-metrics.d.ts.map +1 -0
  299. package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
  300. package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
  301. package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
  302. package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  303. package/dist/services/voice/embedding-server.d.ts +37 -0
  304. package/dist/services/voice/embedding-server.d.ts.map +1 -0
  305. package/dist/services/voice/embedding.d.ts +132 -0
  306. package/dist/services/voice/embedding.d.ts.map +1 -0
  307. package/dist/services/voice/emotion-attribution.d.ts +68 -0
  308. package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
  309. package/dist/services/voice/engine-bridge.d.ts +762 -0
  310. package/dist/services/voice/engine-bridge.d.ts.map +1 -0
  311. package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
  312. package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  313. package/dist/services/voice/eot-classifier.d.ts +211 -0
  314. package/dist/services/voice/eot-classifier.d.ts.map +1 -0
  315. package/dist/services/voice/errors.d.ts +20 -0
  316. package/dist/services/voice/errors.d.ts.map +1 -0
  317. package/dist/services/voice/expressive-tags.d.ts +158 -0
  318. package/dist/services/voice/expressive-tags.d.ts.map +1 -0
  319. package/dist/services/voice/ffi-bindings.d.ts +696 -0
  320. package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
  321. package/dist/services/voice/first-line-cache.d.ts +181 -0
  322. package/dist/services/voice/first-line-cache.d.ts.map +1 -0
  323. package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
  324. package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
  325. package/dist/services/voice/index.d.ts +96 -0
  326. package/dist/services/voice/index.d.ts.map +1 -0
  327. package/dist/services/voice/kokoro/index.d.ts +24 -0
  328. package/dist/services/voice/kokoro/index.d.ts.map +1 -0
  329. package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
  330. package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  331. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  332. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  333. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  334. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  335. package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  336. package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  337. package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
  338. package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
  339. package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
  340. package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  341. package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
  342. package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  343. package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
  344. package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  345. package/dist/services/voice/kokoro/types.d.ts +82 -0
  346. package/dist/services/voice/kokoro/types.d.ts.map +1 -0
  347. package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
  348. package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  349. package/dist/services/voice/kokoro/voices.d.ts +30 -0
  350. package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
  351. package/dist/services/voice/lifecycle.d.ts +135 -0
  352. package/dist/services/voice/lifecycle.d.ts.map +1 -0
  353. package/dist/services/voice/live-diarization-session.d.ts +196 -0
  354. package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
  355. package/dist/services/voice/metric-math.d.ts +10 -0
  356. package/dist/services/voice/metric-math.d.ts.map +1 -0
  357. package/dist/services/voice/mic-source.d.ts +136 -0
  358. package/dist/services/voice/mic-source.d.ts.map +1 -0
  359. package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
  360. package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
  361. package/dist/services/voice/optimistic-policy.d.ts +109 -0
  362. package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
  363. package/dist/services/voice/optimistic-rollback.d.ts +151 -0
  364. package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
  365. package/dist/services/voice/partial-stabilizer.d.ts +73 -0
  366. package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
  367. package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
  368. package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  369. package/dist/services/voice/phrase-cache.d.ts +76 -0
  370. package/dist/services/voice/phrase-cache.d.ts.map +1 -0
  371. package/dist/services/voice/phrase-chunker.d.ts +62 -0
  372. package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
  373. package/dist/services/voice/pipeline-impls.d.ts +151 -0
  374. package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
  375. package/dist/services/voice/pipeline.d.ts +216 -0
  376. package/dist/services/voice/pipeline.d.ts.map +1 -0
  377. package/dist/services/voice/prefill-client.d.ts +123 -0
  378. package/dist/services/voice/prefill-client.d.ts.map +1 -0
  379. package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
  380. package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  381. package/dist/services/voice/profile-store.d.ts +248 -0
  382. package/dist/services/voice/profile-store.d.ts.map +1 -0
  383. package/dist/services/voice/ring-buffer.d.ts +40 -0
  384. package/dist/services/voice/ring-buffer.d.ts.map +1 -0
  385. package/dist/services/voice/rollback-queue.d.ts +24 -0
  386. package/dist/services/voice/rollback-queue.d.ts.map +1 -0
  387. package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
  388. package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  389. package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
  390. package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  391. package/dist/services/voice/scheduler.d.ts +146 -0
  392. package/dist/services/voice/scheduler.d.ts.map +1 -0
  393. package/dist/services/voice/self-voice-imprint.d.ts +33 -0
  394. package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
  395. package/dist/services/voice/shared-resources.d.ts +204 -0
  396. package/dist/services/voice/shared-resources.d.ts.map +1 -0
  397. package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  398. package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  399. package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
  400. package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  401. package/dist/services/voice/speaker/diarizer.d.ts +75 -0
  402. package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
  403. package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
  404. package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  405. package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
  406. package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  407. package/dist/services/voice/speaker/encoder.d.ts +37 -0
  408. package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
  409. package/dist/services/voice/speaker-imprint.d.ts +83 -0
  410. package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
  411. package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
  412. package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
  413. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
  414. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
  415. package/dist/services/voice/system-audio-sink.d.ts +73 -0
  416. package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
  417. package/dist/services/voice/transcriber.d.ts +244 -0
  418. package/dist/services/voice/transcriber.d.ts.map +1 -0
  419. package/dist/services/voice/transcript-knowledge.d.ts +37 -0
  420. package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
  421. package/dist/services/voice/transcript-service.d.ts +60 -0
  422. package/dist/services/voice/transcript-service.d.ts.map +1 -0
  423. package/dist/services/voice/transcript-store.d.ts +64 -0
  424. package/dist/services/voice/transcript-store.d.ts.map +1 -0
  425. package/dist/services/voice/turn-controller.d.ts +183 -0
  426. package/dist/services/voice/turn-controller.d.ts.map +1 -0
  427. package/dist/services/voice/types.d.ts +643 -0
  428. package/dist/services/voice/types.d.ts.map +1 -0
  429. package/dist/services/voice/vad.d.ts +283 -0
  430. package/dist/services/voice/vad.d.ts.map +1 -0
  431. package/dist/services/voice/voice-budget.d.ts +241 -0
  432. package/dist/services/voice/voice-budget.d.ts.map +1 -0
  433. package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
  434. package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  435. package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
  436. package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
  437. package/dist/services/voice/voice-preset-format.d.ts +158 -0
  438. package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
  439. package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
  440. package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
  441. package/dist/services/voice/voice-profile-routes.d.ts +83 -0
  442. package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
  443. package/dist/services/voice/voice-scenario.d.ts +131 -0
  444. package/dist/services/voice/voice-scenario.d.ts.map +1 -0
  445. package/dist/services/voice/voice-state-machine.d.ts +364 -0
  446. package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
  447. package/dist/services/voice/voice-workbench-report.d.ts +117 -0
  448. package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
  449. package/dist/services/voice/wake-word-ggml.d.ts +100 -0
  450. package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
  451. package/dist/services/voice/wake-word.d.ts +255 -0
  452. package/dist/services/voice/wake-word.d.ts.map +1 -0
  453. package/dist/services/voice/wav-codec.d.ts +11 -0
  454. package/dist/services/voice/wav-codec.d.ts.map +1 -0
  455. package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
  456. package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
  457. package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
  458. package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
  459. package/dist/services/voice/workbench-logic-services.d.ts +36 -0
  460. package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
  461. package/dist/services/voice/workbench-real-services.d.ts +17 -0
  462. package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
  463. package/dist/services/voice/workbench-scenarios.d.ts +24 -0
  464. package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
  465. package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  466. package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  467. package/dist/services/voice-model-updater.d.ts +240 -0
  468. package/dist/services/voice-model-updater.d.ts.map +1 -0
  469. package/dist/services/voice-prewarm.d.ts +3 -0
  470. package/dist/services/voice-prewarm.d.ts.map +1 -0
  471. package/dist/voice-workbench.d.ts +18 -0
  472. package/dist/voice-workbench.d.ts.map +1 -0
  473. package/dist/voice-workbench.js +5259 -0
  474. package/dist/voice-workbench.js.map +34 -0
  475. package/package.json +101 -15
  476. package/registry-entry.json +137 -0
  477. package/src/actions/generate-media.ts +647 -0
  478. package/src/actions/identify-speaker.ts +171 -0
  479. package/src/actions/transcription-control.test.ts +100 -0
  480. package/src/actions/transcription-control.ts +127 -0
  481. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  482. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  483. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  484. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  485. package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
  486. package/src/adapters/capacitor-llama/environment.ts +71 -0
  487. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  488. package/src/adapters/capacitor-llama/index.ts +831 -0
  489. package/src/adapters/capacitor-llama/loader.ts +109 -0
  490. package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
  491. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  492. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  493. package/src/adapters/capacitor-llama/types.ts +374 -0
  494. package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
  495. package/src/backends/apple-foundation.ts +127 -0
  496. package/src/index.ts +62 -0
  497. package/src/local-inference-routes.test.ts +390 -0
  498. package/src/local-inference-routes.ts +1625 -0
  499. package/src/provider.ts +1111 -0
  500. package/src/routes/compat-helpers.ts +275 -0
  501. package/src/routes/family-member-route.ts +353 -0
  502. package/src/routes/index.ts +61 -0
  503. package/src/routes/live-diarization-route.test.ts +347 -0
  504. package/src/routes/live-diarization-route.ts +198 -0
  505. package/src/routes/local-inference-asr-route.test.ts +246 -0
  506. package/src/routes/local-inference-asr-route.ts +166 -0
  507. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  508. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  509. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  510. package/src/routes/local-inference-compat-routes.ts +775 -0
  511. package/src/routes/local-inference-tts-route.test.ts +179 -0
  512. package/src/routes/local-inference-tts-route.ts +230 -0
  513. package/src/routes/native-pcm-turn-route.test.ts +136 -0
  514. package/src/routes/native-pcm-turn-route.ts +121 -0
  515. package/src/routes/transcript-audio-store.ts +27 -0
  516. package/src/routes/transcripts-routes.test.ts +195 -0
  517. package/src/routes/transcripts-routes.ts +191 -0
  518. package/src/routes/voice-first-run-routes.ts +524 -0
  519. package/src/routes/voice-models-routes.ts +554 -0
  520. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  521. package/src/routes/voice-profiles-management-routes.ts +476 -0
  522. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  523. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  524. package/src/runtime/bionic-wire-encoding.test.ts +147 -0
  525. package/src/runtime/capacitor-llama.d.ts +25 -0
  526. package/src/runtime/embedding-manager-support.ts +497 -0
  527. package/src/runtime/embedding-presets.ts +81 -0
  528. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  529. package/src/runtime/embedding-warmup-policy.ts +48 -0
  530. package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
  531. package/src/runtime/ensure-local-inference-handler.ts +1640 -0
  532. package/src/runtime/index.ts +36 -0
  533. package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
  534. package/src/runtime/mobile-local-inference-gate.ts +99 -0
  535. package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
  536. package/src/runtime/voice-entity-binding.ts +368 -0
  537. package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
  538. package/src/services/README.md +71 -0
  539. package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
  540. package/src/services/__tests__/backend-selector.test.ts +101 -0
  541. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  542. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  543. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  544. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  545. package/src/services/__tests__/runtime-target.test.ts +176 -0
  546. package/src/services/active-model-context-fit.test.ts +125 -0
  547. package/src/services/active-model-switch-rollback.test.ts +183 -0
  548. package/src/services/active-model.ts +1416 -0
  549. package/src/services/asr-provenance.ts +68 -0
  550. package/src/services/assignment-validation.test.ts +118 -0
  551. package/src/services/assignments.test.ts +106 -0
  552. package/src/services/assignments.ts +278 -0
  553. package/src/services/backend-selector.ts +95 -0
  554. package/src/services/backend.test.ts +84 -0
  555. package/src/services/backend.ts +791 -0
  556. package/src/services/bionic-host-loader.test.ts +226 -0
  557. package/src/services/bionic-host-loader.ts +252 -0
  558. package/src/services/bundled-models.ts +129 -0
  559. package/src/services/cache-bridge.test.ts +516 -0
  560. package/src/services/cache-bridge.ts +423 -0
  561. package/src/services/catalog.test.ts +259 -0
  562. package/src/services/catalog.ts +33 -0
  563. package/src/services/checkpoint-client.ts +258 -0
  564. package/src/services/checkpoint-manager.ts +474 -0
  565. package/src/services/cloud-fallback.ts +230 -0
  566. package/src/services/context-fit.test.ts +121 -0
  567. package/src/services/context-fit.ts +113 -0
  568. package/src/services/conversation-registry.test.ts +235 -0
  569. package/src/services/conversation-registry.ts +264 -0
  570. package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
  571. package/src/services/device-bridge.ts +1237 -0
  572. package/src/services/device-resource-metrics.test.ts +98 -0
  573. package/src/services/device-resource-metrics.ts +346 -0
  574. package/src/services/device-tier.test.ts +458 -0
  575. package/src/services/device-tier.ts +502 -0
  576. package/src/services/downloader.test.ts +888 -0
  577. package/src/services/downloader.ts +1039 -0
  578. package/src/services/engine-direct-bundle.test.ts +90 -0
  579. package/src/services/engine-streaming.test.ts +80 -0
  580. package/src/services/engine.ts +2096 -0
  581. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  582. package/src/services/ensure-local-artifacts.test.ts +368 -0
  583. package/src/services/ensure-local-artifacts.ts +351 -0
  584. package/src/services/external-scanner.ts +312 -0
  585. package/src/services/ffi-llm-mock.ts +354 -0
  586. package/src/services/ffi-llm-streaming-abi.ts +445 -0
  587. package/src/services/ffi-streaming-backend.ts +418 -0
  588. package/src/services/ffi-streaming-runner.test.ts +220 -0
  589. package/src/services/ffi-streaming-runner.ts +407 -0
  590. package/src/services/ffi-unload-ordering.test.ts +166 -0
  591. package/src/services/fused-eliza1-no-regression.test.ts +144 -0
  592. package/src/services/gpu-autotune.ts +534 -0
  593. package/src/services/gpu-detect.ts +139 -0
  594. package/src/services/handler-registry.ts +240 -0
  595. package/src/services/hardware.test.ts +236 -0
  596. package/src/services/hardware.ts +438 -0
  597. package/src/services/image-description-runtime.test.ts +61 -0
  598. package/src/services/image-description-runtime.ts +118 -0
  599. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  600. package/src/services/imagegen/backend-selector.test.ts +190 -0
  601. package/src/services/imagegen/backend-selector.ts +277 -0
  602. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  603. package/src/services/imagegen/errors.ts +40 -0
  604. package/src/services/imagegen/index.ts +144 -0
  605. package/src/services/imagegen/mflux.ts +313 -0
  606. package/src/services/imagegen/sd-cpp.ts +715 -0
  607. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  608. package/src/services/imagegen/types.ts +193 -0
  609. package/src/services/index.ts +229 -0
  610. package/src/services/inference-capabilities.test.ts +75 -0
  611. package/src/services/inference-capabilities.ts +204 -0
  612. package/src/services/inference-telemetry.ts +143 -0
  613. package/src/services/ios-llama-streaming.ts +248 -0
  614. package/src/services/kv-spill.test.ts +222 -0
  615. package/src/services/kv-spill.ts +357 -0
  616. package/src/services/latency-trace.test.ts +266 -0
  617. package/src/services/latency-trace.ts +844 -0
  618. package/src/services/lib-target.test.ts +145 -0
  619. package/src/services/lib-target.ts +102 -0
  620. package/src/services/live-signals.test.ts +132 -0
  621. package/src/services/live-signals.ts +177 -0
  622. package/src/services/llama-server-metrics.test.ts +168 -0
  623. package/src/services/llama-server-metrics.ts +304 -0
  624. package/src/services/llm-streaming-binding.ts +136 -0
  625. package/src/services/load-args.ts +81 -0
  626. package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
  627. package/src/services/manifest/index.ts +72 -0
  628. package/src/services/manifest/manifest.test.ts +791 -0
  629. package/src/services/manifest/schema.ts +761 -0
  630. package/src/services/manifest/types.ts +61 -0
  631. package/src/services/manifest/validator.ts +633 -0
  632. package/src/services/memory-arbiter.test.ts +558 -0
  633. package/src/services/memory-arbiter.ts +991 -0
  634. package/src/services/memory-benchmark.test.ts +91 -0
  635. package/src/services/memory-benchmark.ts +354 -0
  636. package/src/services/memory-monitor.test.ts +232 -0
  637. package/src/services/memory-monitor.ts +309 -0
  638. package/src/services/memory-pressure.ts +414 -0
  639. package/src/services/mtp-doctor.ts +86 -0
  640. package/src/services/network-policy.ts +346 -0
  641. package/src/services/paths.ts +25 -0
  642. package/src/services/planner-skeleton.ts +175 -0
  643. package/src/services/providers.ts +507 -0
  644. package/src/services/ram-budget-cache.test.ts +164 -0
  645. package/src/services/ram-budget.ts +309 -0
  646. package/src/services/readiness.test.ts +87 -0
  647. package/src/services/readiness.ts +238 -0
  648. package/src/services/recommendation.test.ts +216 -0
  649. package/src/services/recommendation.ts +671 -0
  650. package/src/services/registry.ts +157 -0
  651. package/src/services/required-kernels-gate.test.ts +64 -0
  652. package/src/services/router-handler.test.ts +45 -0
  653. package/src/services/router-handler.ts +426 -0
  654. package/src/services/routing-policy.test.ts +352 -0
  655. package/src/services/routing-policy.ts +367 -0
  656. package/src/services/routing-preferences.ts +17 -0
  657. package/src/services/runtime-target.ts +154 -0
  658. package/src/services/service.test.ts +223 -0
  659. package/src/services/service.ts +750 -0
  660. package/src/services/session-pool.ts +153 -0
  661. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  662. package/src/services/structured-output/deterministic-repair.ts +443 -0
  663. package/src/services/structured-output/index.ts +4 -0
  664. package/src/services/structured-output.test.ts +483 -0
  665. package/src/services/structured-output.ts +712 -0
  666. package/src/services/system-memory.test.ts +47 -0
  667. package/src/services/system-memory.ts +67 -0
  668. package/src/services/transcription-priority.test.ts +211 -0
  669. package/src/services/types.ts +59 -0
  670. package/src/services/verify-on-device.test.ts +87 -0
  671. package/src/services/verify-on-device.ts +127 -0
  672. package/src/services/verify.ts +13 -0
  673. package/src/services/vision/aosp-unavailable.ts +163 -0
  674. package/src/services/vision/capacitor-llama.ts +255 -0
  675. package/src/services/vision/cloud-fallback.test.ts +243 -0
  676. package/src/services/vision/cloud-fallback.ts +268 -0
  677. package/src/services/vision/fallback-chain.test.ts +86 -0
  678. package/src/services/vision/hash.ts +157 -0
  679. package/src/services/vision/index.ts +251 -0
  680. package/src/services/vision/llama-server.ts +177 -0
  681. package/src/services/vision/types.ts +163 -0
  682. package/src/services/vision/vast-fallback.ts +127 -0
  683. package/src/services/vision-embedding-cache.ts +189 -0
  684. package/src/services/voice/VOICE_WORKBENCH.md +133 -0
  685. package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
  686. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  687. package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
  688. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  689. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  690. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  691. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  692. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  693. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  694. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  695. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  696. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  697. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  698. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  699. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  700. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  701. package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
  702. package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
  703. package/src/services/voice/asr-timed.real.test.ts +139 -0
  704. package/src/services/voice/audio-frame-consumer.test.ts +669 -0
  705. package/src/services/voice/audio-frame-consumer.ts +651 -0
  706. package/src/services/voice/barge-in.test.ts +244 -0
  707. package/src/services/voice/barge-in.ts +335 -0
  708. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  709. package/src/services/voice/cancellation-coordinator.ts +269 -0
  710. package/src/services/voice/checkpoint-manager.ts +401 -0
  711. package/src/services/voice/checkpoint-policy.ts +336 -0
  712. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  713. package/src/services/voice/corpus-augment.test.ts +276 -0
  714. package/src/services/voice/corpus-augment.ts +451 -0
  715. package/src/services/voice/corpus-generator.test.ts +201 -0
  716. package/src/services/voice/corpus-generator.ts +413 -0
  717. package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
  718. package/src/services/voice/diarization-error-rate.test.ts +100 -0
  719. package/src/services/voice/diarization-error-rate.ts +249 -0
  720. package/src/services/voice/e2e-harness.der.test.ts +94 -0
  721. package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
  722. package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
  723. package/src/services/voice/e2e-harness.test.ts +182 -0
  724. package/src/services/voice/e2e-harness.ts +902 -0
  725. package/src/services/voice/eager-context-builder.ts +262 -0
  726. package/src/services/voice/echo-delay.test.ts +118 -0
  727. package/src/services/voice/echo-delay.ts +135 -0
  728. package/src/services/voice/echo-metrics.test.ts +17 -0
  729. package/src/services/voice/echo-metrics.ts +20 -0
  730. package/src/services/voice/echo-reference-buffer.test.ts +86 -0
  731. package/src/services/voice/echo-reference-buffer.ts +165 -0
  732. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  733. package/src/services/voice/embedding-server.ts +200 -0
  734. package/src/services/voice/embedding.test.ts +131 -0
  735. package/src/services/voice/embedding.ts +242 -0
  736. package/src/services/voice/emotion-attribution.test.ts +129 -0
  737. package/src/services/voice/emotion-attribution.ts +361 -0
  738. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  739. package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
  740. package/src/services/voice/engine-bridge.test.ts +384 -0
  741. package/src/services/voice/engine-bridge.ts +2343 -0
  742. package/src/services/voice/eot-classifier-ggml.ts +569 -0
  743. package/src/services/voice/eot-classifier.test.ts +98 -0
  744. package/src/services/voice/eot-classifier.ts +422 -0
  745. package/src/services/voice/errors.ts +34 -0
  746. package/src/services/voice/expressive-tags.asr.test.ts +77 -0
  747. package/src/services/voice/expressive-tags.test.ts +102 -0
  748. package/src/services/voice/expressive-tags.ts +405 -0
  749. package/src/services/voice/ffi-bindings.test.ts +735 -0
  750. package/src/services/voice/ffi-bindings.ts +3387 -0
  751. package/src/services/voice/first-line-cache.ts +725 -0
  752. package/src/services/voice/fused-eot-scorer.ts +139 -0
  753. package/src/services/voice/index.ts +502 -0
  754. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
  755. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
  756. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  757. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  758. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  759. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  760. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  761. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
  762. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  763. package/src/services/voice/kokoro/index.ts +79 -0
  764. package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
  765. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  766. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  767. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  768. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  769. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  770. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  771. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  772. package/src/services/voice/kokoro/runtime-selection.ts +64 -0
  773. package/src/services/voice/kokoro/types.ts +95 -0
  774. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  775. package/src/services/voice/kokoro/voices.ts +64 -0
  776. package/src/services/voice/lifecycle.test.ts +315 -0
  777. package/src/services/voice/lifecycle.ts +301 -0
  778. package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
  779. package/src/services/voice/live-diarization-session.ts +622 -0
  780. package/src/services/voice/metric-math.test.ts +61 -0
  781. package/src/services/voice/metric-math.ts +25 -0
  782. package/src/services/voice/mic-source.test.ts +210 -0
  783. package/src/services/voice/mic-source.ts +503 -0
  784. package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
  785. package/src/services/voice/nlms-echo-canceller.ts +317 -0
  786. package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
  787. package/src/services/voice/optimistic-policy.test.ts +101 -0
  788. package/src/services/voice/optimistic-policy.ts +192 -0
  789. package/src/services/voice/optimistic-rollback.ts +343 -0
  790. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  791. package/src/services/voice/partial-stabilizer.ts +140 -0
  792. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  793. package/src/services/voice/phrase-cache.test.ts +242 -0
  794. package/src/services/voice/phrase-cache.ts +186 -0
  795. package/src/services/voice/phrase-chunker.test.ts +239 -0
  796. package/src/services/voice/phrase-chunker.ts +281 -0
  797. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  798. package/src/services/voice/pipeline-impls.test.ts +292 -0
  799. package/src/services/voice/pipeline-impls.ts +315 -0
  800. package/src/services/voice/pipeline.ts +504 -0
  801. package/src/services/voice/prefill-client.ts +316 -0
  802. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  803. package/src/services/voice/profile-store.ts +887 -0
  804. package/src/services/voice/real-audio-decode.test.ts +148 -0
  805. package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
  806. package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
  807. package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
  808. package/src/services/voice/ring-buffer.test.ts +129 -0
  809. package/src/services/voice/ring-buffer.ts +123 -0
  810. package/src/services/voice/rollback-queue.ts +74 -0
  811. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  812. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  813. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  814. package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
  815. package/src/services/voice/scheduler.t2.test.ts +141 -0
  816. package/src/services/voice/scheduler.ts +927 -0
  817. package/src/services/voice/self-voice-imprint.test.ts +59 -0
  818. package/src/services/voice/self-voice-imprint.ts +102 -0
  819. package/src/services/voice/shared-resources.ts +343 -0
  820. package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
  821. package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
  822. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  823. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  824. package/src/services/voice/speaker/diarizer.ts +218 -0
  825. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  826. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  827. package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
  828. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  829. package/src/services/voice/speaker/encoder.ts +105 -0
  830. package/src/services/voice/speaker-imprint.test.ts +185 -0
  831. package/src/services/voice/speaker-imprint.ts +312 -0
  832. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  833. package/src/services/voice/speaker-preset-cache.ts +195 -0
  834. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  835. package/src/services/voice/system-audio-sink.test.ts +29 -0
  836. package/src/services/voice/system-audio-sink.ts +366 -0
  837. package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
  838. package/src/services/voice/transcriber.test.ts +392 -0
  839. package/src/services/voice/transcriber.ts +704 -0
  840. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  841. package/src/services/voice/transcript-knowledge.ts +75 -0
  842. package/src/services/voice/transcript-service.test.ts +195 -0
  843. package/src/services/voice/transcript-service.ts +205 -0
  844. package/src/services/voice/transcript-store.test.ts +189 -0
  845. package/src/services/voice/transcript-store.ts +164 -0
  846. package/src/services/voice/turn-controller.test.ts +575 -0
  847. package/src/services/voice/turn-controller.ts +596 -0
  848. package/src/services/voice/types.ts +699 -0
  849. package/src/services/voice/vad.test.ts +498 -0
  850. package/src/services/voice/vad.ts +832 -0
  851. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  852. package/src/services/voice/voice-budget.test.ts +415 -0
  853. package/src/services/voice/voice-budget.ts +635 -0
  854. package/src/services/voice/voice-duet.test.ts +375 -0
  855. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  856. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  857. package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
  858. package/src/services/voice/voice-preload-predictor.test.ts +130 -0
  859. package/src/services/voice/voice-preload-predictor.ts +113 -0
  860. package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
  861. package/src/services/voice/voice-preset-format.test.ts +75 -0
  862. package/src/services/voice/voice-preset-format.ts +713 -0
  863. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  864. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  865. package/src/services/voice/voice-profile-artifact.ts +518 -0
  866. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  867. package/src/services/voice/voice-profile-routes.ts +425 -0
  868. package/src/services/voice/voice-scenario.test.ts +159 -0
  869. package/src/services/voice/voice-scenario.ts +280 -0
  870. package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
  871. package/src/services/voice/voice-state-machine.ts +727 -0
  872. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  873. package/src/services/voice/voice-workbench-report.ts +367 -0
  874. package/src/services/voice/voice-workbench.test.ts +158 -0
  875. package/src/services/voice/voice.test.ts +1070 -0
  876. package/src/services/voice/wake-word-ggml.ts +319 -0
  877. package/src/services/voice/wake-word.test.ts +298 -0
  878. package/src/services/voice/wake-word.ts +554 -0
  879. package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
  880. package/src/services/voice/wav-codec.test.ts +32 -0
  881. package/src/services/voice/wav-codec.ts +101 -0
  882. package/src/services/voice/workbench-entrypoint.test.ts +55 -0
  883. package/src/services/voice/workbench-entrypoint.ts +88 -0
  884. package/src/services/voice/workbench-headless-runner.test.ts +162 -0
  885. package/src/services/voice/workbench-headless-runner.ts +396 -0
  886. package/src/services/voice/workbench-logic-services.test.ts +225 -0
  887. package/src/services/voice/workbench-logic-services.ts +184 -0
  888. package/src/services/voice/workbench-real-services.ts +629 -0
  889. package/src/services/voice/workbench-scenarios.ts +407 -0
  890. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  891. package/src/services/voice-model-updater.ts +724 -0
  892. package/src/services/voice-prewarm.ts +51 -0
  893. package/src/voice-workbench.ts +71 -0
@@ -0,0 +1,264 @@
1
+ /**
2
+ * Conversation registry for the local-inference path.
3
+ *
4
+ * Today's slot allocation is purely a hash function: `deriveSlotId` maps a
5
+ * `promptCacheKey` (or any stable string) to `slot_id` in `[0, parallel)`.
6
+ * That works for one-shot calls but breaks for long agentic loops:
7
+ *
8
+ * - Two distinct conversations whose cache keys hash to the same slot
9
+ * evict each other's KV every turn (slot thrashing).
10
+ * - The current high-water mark of concurrent conversations is invisible,
11
+ * so `--parallel N` cannot be tuned to fit.
12
+ * - There is no notion of an explicit "I am still using this slot" lease,
13
+ * so eviction is purely best-effort.
14
+ *
15
+ * This registry keeps a per-conversation reservation. `openConversation`
16
+ * picks the lowest-loaded slot and pins the conversation to it; subsequent
17
+ * `generateInConversation` calls always land on the same slot. When the
18
+ * pool is full, slot reuse falls back to the same-as-before hash policy
19
+ * (two leases on the same slot still serialise correctly via the dispatcher's
20
+ * generation queue).
21
+ *
22
+ * The registry tracks the high-water mark of concurrently-open conversations
23
+ * so the engine can warn, or later restart llama-server with a higher
24
+ * --parallel, when the load outgrows the configured slot count.
25
+ */
26
+
27
+ import { createHash } from "node:crypto";
28
+
29
+ /**
30
+ * Opaque handle returned by `openConversation`. Callers MUST treat this as
31
+ * opaque — the registry owns the slot id and lifetime.
32
+ */
33
+ export interface ConversationHandle {
34
+ readonly conversationId: string;
35
+ readonly modelId: string;
36
+ /**
37
+ * Pinned slot id in `[0, parallel)`, or `-1` when slot pinning is disabled
38
+ * (parallel <= 0). Used by both backends as the cache key:
39
+ * - llama-server: forwarded as `slot_id` in the request payload.
40
+ * - node-llama-cpp: combined with the conversation id to derive the
41
+ * session-pool key so identical conversations share a session.
42
+ */
43
+ readonly slotId: number;
44
+ /** Wall-clock ms when the handle was opened. */
45
+ readonly openedAtMs: number;
46
+ /** Wall-clock ms when the handle was last touched (open or generate). */
47
+ lastUsedMs: number;
48
+ /** TTL after which the registry MAY auto-close on the next sweep. */
49
+ readonly ttlMs: number;
50
+ /** True when `closeConversation` has been called; further use is rejected. */
51
+ closed: boolean;
52
+ }
53
+
54
+ export interface OpenConversationArgs {
55
+ conversationId: string;
56
+ modelId: string;
57
+ /** Slot count from the running server (`--parallel N`). Defaults to 1. */
58
+ parallel?: number;
59
+ /**
60
+ * TTL after which the handle is considered idle and may be auto-closed
61
+ * by `evictIdle`. Defaults to 60 minutes — long enough for an LLM call
62
+ * to finish even on a slow drafter, short enough to recover from forgotten
63
+ * close calls within the long-cache window.
64
+ */
65
+ ttlMs?: number;
66
+ }
67
+
68
+ const DEFAULT_HANDLE_TTL_MS = 60 * 60 * 1000;
69
+
70
+ /**
71
+ * In-memory registry of open conversation handles. A single instance is
72
+ * shared by the engine; each backend reads from it on every generate to
73
+ * decide which slot to pin to.
74
+ */
75
+ export class ConversationRegistry {
76
+ private readonly handles = new Map<string, ConversationHandle>();
77
+ /** Per-slot reference count; lowest-loaded slot wins on next open. */
78
+ private readonly slotLoad = new Map<number, number>();
79
+ /** Largest concurrent open count seen; the engine reads this for parallel auto-tune. */
80
+ private highWaterMark = 0;
81
+
82
+ /**
83
+ * Lookup / open a conversation handle. Idempotent for the same
84
+ * conversation id + model id; callers can call this on every turn
85
+ * without leaking handles. When the call is reusing an existing handle,
86
+ * `lastUsedMs` is bumped for LRU-style eviction tracking.
87
+ */
88
+ open(args: OpenConversationArgs): ConversationHandle {
89
+ if (!args.conversationId) {
90
+ throw new Error("[conversation-registry] conversationId is required");
91
+ }
92
+ if (!args.modelId) {
93
+ throw new Error("[conversation-registry] modelId is required");
94
+ }
95
+ const compositeKey = this.compositeKey(args.conversationId, args.modelId);
96
+ const existing = this.handles.get(compositeKey);
97
+ if (existing && !existing.closed) {
98
+ existing.lastUsedMs = Date.now();
99
+ return existing;
100
+ }
101
+
102
+ const parallel =
103
+ typeof args.parallel === "number" && args.parallel > 0
104
+ ? Math.floor(args.parallel)
105
+ : 1;
106
+ const slotId = this.pickLowestLoadedSlot(parallel, args.conversationId);
107
+ const now = Date.now();
108
+ const handle: ConversationHandle = {
109
+ conversationId: args.conversationId,
110
+ modelId: args.modelId,
111
+ slotId,
112
+ openedAtMs: now,
113
+ lastUsedMs: now,
114
+ ttlMs: args.ttlMs ?? DEFAULT_HANDLE_TTL_MS,
115
+ closed: false,
116
+ };
117
+ this.handles.set(compositeKey, handle);
118
+ this.slotLoad.set(slotId, (this.slotLoad.get(slotId) ?? 0) + 1);
119
+ if (this.handles.size > this.highWaterMark) {
120
+ this.highWaterMark = this.handles.size;
121
+ }
122
+ return handle;
123
+ }
124
+
125
+ /**
126
+ * Lookup an open handle by conversation+model. Returns null when the
127
+ * conversation has not been opened or has already been closed. Bumps
128
+ * `lastUsedMs` so an LRU sweep treats reads as activity.
129
+ */
130
+ get(conversationId: string, modelId: string): ConversationHandle | null {
131
+ const handle = this.handles.get(this.compositeKey(conversationId, modelId));
132
+ if (!handle || handle.closed) return null;
133
+ handle.lastUsedMs = Date.now();
134
+ return handle;
135
+ }
136
+
137
+ /**
138
+ * Close + drop a handle. Idempotent — closing an unknown / already-closed
139
+ * handle has no additional effect, so callers can call this from cleanup paths
140
+ * unconditionally.
141
+ */
142
+ close(conversationId: string, modelId: string): void {
143
+ const compositeKey = this.compositeKey(conversationId, modelId);
144
+ const handle = this.handles.get(compositeKey);
145
+ if (!handle) return;
146
+ handle.closed = true;
147
+ this.handles.delete(compositeKey);
148
+ const remaining = (this.slotLoad.get(handle.slotId) ?? 0) - 1;
149
+ if (remaining <= 0) {
150
+ this.slotLoad.delete(handle.slotId);
151
+ } else {
152
+ this.slotLoad.set(handle.slotId, remaining);
153
+ }
154
+ }
155
+
156
+ /**
157
+ * Sweep handles whose `lastUsedMs` is older than their TTL. Returns the
158
+ * conversation ids dropped so callers can persist final KV state to
159
+ * disk, etc. Safe to call on a timer.
160
+ */
161
+ evictIdle(now: number = Date.now()): string[] {
162
+ const dropped: string[] = [];
163
+ for (const [compositeKey, handle] of this.handles) {
164
+ if (now - handle.lastUsedMs > handle.ttlMs) {
165
+ handle.closed = true;
166
+ this.handles.delete(compositeKey);
167
+ const remaining = (this.slotLoad.get(handle.slotId) ?? 0) - 1;
168
+ if (remaining <= 0) {
169
+ this.slotLoad.delete(handle.slotId);
170
+ } else {
171
+ this.slotLoad.set(handle.slotId, remaining);
172
+ }
173
+ dropped.push(handle.conversationId);
174
+ }
175
+ }
176
+ return dropped;
177
+ }
178
+
179
+ /**
180
+ * Snapshot every currently-open handle. Used by the shutdown path to
181
+ * emit a save-state request per slot.
182
+ */
183
+ snapshot(): readonly ConversationHandle[] {
184
+ return [...this.handles.values()];
185
+ }
186
+
187
+ /** Largest concurrent open count seen since the registry was created. */
188
+ highWater(): number {
189
+ return this.highWaterMark;
190
+ }
191
+
192
+ /** Number of currently-open handles. */
193
+ size(): number {
194
+ return this.handles.size;
195
+ }
196
+
197
+ /**
198
+ * Recommended `--parallel` slot count given the observed high-water mark
199
+ * of concurrently-open conversations plus a small headroom (max(2, 25%)).
200
+ * The engine's auto-tune (J4) compares this against the running server's
201
+ * slot count: when this is larger AND there's RAM headroom, it restarts
202
+ * llama-server with the higher value so new conversations get their own
203
+ * KV slots instead of thrashing.
204
+ *
205
+ * `running` is the currently-configured slot count; when the high-water
206
+ * mark hasn't outgrown it, this returns `running` (no resize needed) so
207
+ * callers can compare against equality without a second branch.
208
+ */
209
+ recommendedParallel(running: number): number {
210
+ const headroom = Math.max(2, Math.ceil(this.highWaterMark * 0.25));
211
+ const desired = Math.max(1, this.highWaterMark + headroom);
212
+ return Math.max(running, desired);
213
+ }
214
+
215
+ /**
216
+ * Drop every handle and reset the high-water mark + slot-load bookkeeping.
217
+ * Test-only — the module singleton leaks state across files when the suite
218
+ * runs together; call this in `beforeEach` to isolate. Not part of the
219
+ * runtime contract.
220
+ */
221
+ __resetForTests(): void {
222
+ for (const handle of this.handles.values()) handle.closed = true;
223
+ this.handles.clear();
224
+ this.slotLoad.clear();
225
+ this.highWaterMark = 0;
226
+ }
227
+
228
+ /**
229
+ * Pick the slot with the fewest in-flight handles. Ties are broken by a
230
+ * deterministic hash of the conversation id, which avoids consistently
231
+ * loading slot 0 when N concurrent opens race.
232
+ */
233
+ private pickLowestLoadedSlot(
234
+ parallel: number,
235
+ conversationId: string,
236
+ ): number {
237
+ if (parallel <= 1) return 0;
238
+ let bestSlot = 0;
239
+ let bestLoad = Number.POSITIVE_INFINITY;
240
+ for (let slot = 0; slot < parallel; slot += 1) {
241
+ const load = this.slotLoad.get(slot) ?? 0;
242
+ if (load < bestLoad) {
243
+ bestLoad = load;
244
+ bestSlot = slot;
245
+ }
246
+ }
247
+ if (bestLoad === 0) return bestSlot;
248
+ // All slots are loaded equally — use the conversation hash for a
249
+ // deterministic tie-break. Same conversation, same slot when reopened.
250
+ const digest = createHash("sha256").update(conversationId).digest();
251
+ return digest.readUInt32BE(0) % parallel;
252
+ }
253
+
254
+ private compositeKey(conversationId: string, modelId: string): string {
255
+ return `${modelId}::${conversationId}`;
256
+ }
257
+ }
258
+
259
+ /**
260
+ * Module-singleton registry. The engine reads this on every generate; the
261
+ * conversation lifecycle API (`openConversation`, `closeConversation`)
262
+ * mutates it.
263
+ */
264
+ export const conversationRegistry = new ConversationRegistry();
@@ -0,0 +1,431 @@
1
+ /**
2
+ * Desktop production `FfiBackendRuntime` over the FUSED `libelizainference` —
3
+ * the SOLE desktop text runtime now that libllama has been retired.
4
+ *
5
+ * Desktop text generation runs through the fused library: the same
6
+ * `eliza_inference_llm_stream_*` ABI (v9) the voice subsystem already loads,
7
+ * so text + voice share one native lib, one GGML pin, and one resident text
8
+ * model.
9
+ *
10
+ * - The fused lib's `eliza_inference_llm_stream_open` loads the bundle's text
11
+ * GGUF (`<bundleRoot>/text/*.gguf`) and applies MTP speculative
12
+ * decoding + KV-cache quant + per-load GPU layers natively (ABI v9). The
13
+ * path is gated on the capability probes
14
+ * (`llmStreamSupported && llmMtpSupported && llmKvQuantSupported`).
15
+ * - A fused lib that lacks MTP / KV-quant / native tokenize is REFUSED by
16
+ * `supported()` → the engine raises LocalInferenceUnavailable. There is no
17
+ * libllama fallback and never an unoptimized fused loop.
18
+ *
19
+ * Tokenization runs over the fused handle's resident text vocab via ABI-v9
20
+ * `eliza_inference_tokenize`: the fused `create()` + first `llmStreamOpen`
21
+ * already made the text vocab resident, so no second model is loaded.
22
+ * `tokenizeSupported()` gates this; a pre-v9 lib without the symbol is refused.
23
+ *
24
+ * Lifecycle: one fused context per loaded model; `acquire()` builds it,
25
+ * `release()` tears it down. A throwing native free poisons the runtime so no
26
+ * new allocation happens over leaked resources.
27
+ */
28
+
29
+ import fs from "node:fs";
30
+ import path from "node:path";
31
+
32
+ import { resolveStateDir } from "@elizaos/core";
33
+
34
+ import type { BackendPlan } from "./backend";
35
+ import type {
36
+ FfiBackendRuntime,
37
+ FfiBackendSession,
38
+ } from "./ffi-streaming-backend";
39
+ import { FfiStreamingRunner } from "./ffi-streaming-runner";
40
+ import { wrapElizaInferenceFfi } from "./llm-streaming-binding";
41
+ import type { ElizaInferenceContextHandle } from "./voice/ffi-bindings";
42
+ import {
43
+ type ElizaInferenceFfi,
44
+ loadElizaInferenceFfi,
45
+ } from "./voice/ffi-bindings";
46
+
47
+ function throwIfAborted(signal: AbortSignal | undefined): void {
48
+ if (!signal?.aborted) return;
49
+ throw signal.reason instanceof Error
50
+ ? signal.reason
51
+ : new DOMException("Aborted", "AbortError");
52
+ }
53
+
54
+ /**
55
+ * Candidate filenames for the fused library, per platform. Mirrors
56
+ * `samantha-preset-regenerator.ts::libraryFilenames` so the runtime and the
57
+ * voice regenerator resolve the same artifact.
58
+ */
59
+ function fusedLibraryFilenames(): string[] {
60
+ if (process.platform === "darwin") return ["libelizainference.dylib"];
61
+ if (process.platform === "win32") {
62
+ return ["elizainference.dll", "libelizainference.dll"];
63
+ }
64
+ return ["libelizainference.so"];
65
+ }
66
+
67
+ /**
68
+ * Resolve the on-disk path to the fused `libelizainference`. Precedence:
69
+ * 1. `ELIZA_INFERENCE_LIBRARY` — an explicit absolute path.
70
+ * 2. `<bundleRoot>/lib/<name>` — the bundle-local lib.
71
+ * 3. `ELIZA_INFERENCE_LIB_DIR/<name>` — an explicit lib directory.
72
+ * 4. `<stateDir>/local-inference/lib/<name>` — the default staging dir written
73
+ * by `scripts/stage-desktop-fused-lib.mjs`, so a staged desktop build is
74
+ * found with no env wiring.
75
+ * Returns null when none of the candidates exist on disk — `supported()` then
76
+ * reports unavailable and the engine raises LocalInferenceUnavailable.
77
+ */
78
+ export function resolveFusedLibraryPath(
79
+ bundleRoot: string | null,
80
+ env: NodeJS.ProcessEnv = process.env,
81
+ ): string | null {
82
+ const exact = env.ELIZA_INFERENCE_LIBRARY?.trim();
83
+ if (exact && fs.existsSync(exact)) return exact;
84
+ const dirs = [
85
+ bundleRoot ? path.join(bundleRoot, "lib") : null,
86
+ exact ? path.dirname(exact) : null,
87
+ env.ELIZA_INFERENCE_LIB_DIR?.trim() || null,
88
+ path.join(resolveStateDir(env), "local-inference", "lib"),
89
+ ].filter((dir): dir is string => Boolean(dir));
90
+ for (const dir of dirs) {
91
+ for (const name of fusedLibraryFilenames()) {
92
+ const candidate = path.join(dir, name);
93
+ if (fs.existsSync(candidate)) return candidate;
94
+ }
95
+ }
96
+ return null;
97
+ }
98
+
99
+ /**
100
+ * Derive the bundle root (the dir the fused `create()` anchors at) from a
101
+ * BackendPlan. Eliza-1 bundles set `overrides.bundleRoot` explicitly; otherwise
102
+ * the GGUF lives at `<bundleRoot>/text/<file>.gguf`, so the bundle root is
103
+ * `dirname(dirname(modelPath))`.
104
+ */
105
+ function bundleRootForPlan(plan: BackendPlan): string {
106
+ if (plan.overrides?.bundleRoot) return plan.overrides.bundleRoot;
107
+ return path.dirname(path.dirname(plan.modelPath));
108
+ }
109
+
110
+ interface ActiveFusedSession {
111
+ ffi: ElizaInferenceFfi;
112
+ ctx: ElizaInferenceContextHandle;
113
+ session: FfiBackendSession;
114
+ }
115
+
116
+ export class DesktopFusedFfiBackendRuntime implements FfiBackendRuntime {
117
+ private active: ActiveFusedSession | null = null;
118
+ private poisonedError: Error | null = null;
119
+ /** Cached `supported()` result so the engine gate and the dispatcher agree. */
120
+ private supportedCache: boolean | null = null;
121
+
122
+ /**
123
+ * Viable only when:
124
+ * - bun:ffi resolves on the current runtime,
125
+ * - the fused dylib is present AND reports ABI-v9 capability: the
126
+ * streaming-LLM surface, MTP, KV-cache quant, AND native
127
+ * tokenization (`eliza_inference_tokenize`).
128
+ * A pre-v9 fused lib reports the probes as unsupported → refused, and the
129
+ * engine raises LocalInferenceUnavailable. libllama has been retired; there
130
+ * is no fallback runtime and no tokenizer sidecar.
131
+ */
132
+ supported(): boolean {
133
+ if (this.supportedCache !== null) return this.supportedCache;
134
+ this.supportedCache = this.computeSupported();
135
+ return this.supportedCache;
136
+ }
137
+
138
+ /** Clear the cached `supported()` result (tests / lib swaps). */
139
+ resetSupportedCache(): void {
140
+ this.supportedCache = null;
141
+ }
142
+
143
+ private computeSupported(): boolean {
144
+ try {
145
+ require.resolve("bun:ffi");
146
+ } catch {
147
+ return false;
148
+ }
149
+ const libPath = resolveFusedLibraryPath(null);
150
+ if (!libPath) return false;
151
+ // Load the lib and probe the v8 LLM capabilities. This dlopen is cheap (no
152
+ // model load); we close it immediately after probing.
153
+ let ffi: ElizaInferenceFfi | null = null;
154
+ try {
155
+ ffi = loadElizaInferenceFfi(libPath);
156
+ const llmOk =
157
+ typeof ffi.llmStreamSupported === "function" &&
158
+ ffi.llmStreamSupported() === true &&
159
+ typeof ffi.llmMtpSupported === "function" &&
160
+ ffi.llmMtpSupported() === true &&
161
+ typeof ffi.llmKvQuantSupported === "function" &&
162
+ ffi.llmKvQuantSupported() === true;
163
+ if (!llmOk) return false;
164
+ // Native tokenization over the fused handle's resident text vocab
165
+ // (ABI v9) is required: libllama has been retired, so there is no
166
+ // tokenizer sidecar. A pre-v9 fused lib without `eliza_inference_tokenize`
167
+ // is refused → the engine raises LocalInferenceUnavailable.
168
+ const fusedTokenize =
169
+ typeof ffi.tokenizeSupported === "function" &&
170
+ ffi.tokenizeSupported() === true;
171
+ if (!fusedTokenize) return false;
172
+ return true;
173
+ } catch {
174
+ // dlopen / ABI-mismatch / non-Bun runtime → not viable.
175
+ return false;
176
+ } finally {
177
+ ffi?.close();
178
+ }
179
+ }
180
+
181
+ async acquire(plan: BackendPlan): Promise<FfiBackendSession> {
182
+ if (this.poisonedError) {
183
+ throw new Error(
184
+ `[desktop-fused-ffi-runtime] native cleanup previously failed; restart required before acquiring a new session: ${this.poisonedError.message}`,
185
+ );
186
+ }
187
+ if (this.active) {
188
+ throw new Error(
189
+ "[desktop-fused-ffi-runtime] acquire() called with a live session; release() first",
190
+ );
191
+ }
192
+ const bundleRoot = bundleRootForPlan(plan);
193
+ const libPath = resolveFusedLibraryPath(bundleRoot);
194
+ if (!libPath) {
195
+ throw new Error(
196
+ `[desktop-fused-ffi-runtime] fused libelizainference not found for bundle ${bundleRoot}. ` +
197
+ "Dispatcher should not have routed here; check supported().",
198
+ );
199
+ }
200
+
201
+ // 1. Fused lib + bundle context for the generation path. `create()`
202
+ // anchors at the bundle root; the first `llmStreamOpen` loads
203
+ // `<bundleRoot>/text/*.gguf` and applies gpuLayers + KV-cache quant
204
+ // from the session config (threaded via loadConfig below).
205
+ const ffi = loadElizaInferenceFfi(libPath);
206
+ let ctx: ElizaInferenceContextHandle;
207
+ try {
208
+ ctx = ffi.create(bundleRoot);
209
+ } catch (err) {
210
+ ffi.close();
211
+ throw err;
212
+ }
213
+
214
+ // 2. Tokenization over the fused handle's resident text vocab via ABI-v9
215
+ // `eliza_inference_tokenize` — no second model load. `supported()`
216
+ // already refused a pre-v9 lib, so the symbol is present here; this
217
+ // guard turns any surprise absence into a loud failure (the session is
218
+ // torn down) rather than a silent tokenizer gap. libllama is retired.
219
+ const fusedTokenizeFn = ffi.tokenize;
220
+ if (
221
+ typeof ffi.tokenizeSupported !== "function" ||
222
+ ffi.tokenizeSupported() !== true ||
223
+ typeof fusedTokenizeFn !== "function"
224
+ ) {
225
+ ffi.destroy(ctx);
226
+ ffi.close();
227
+ throw new Error(
228
+ "[desktop-fused-ffi-runtime] fused lib lacks eliza_inference_tokenize (pre-v9). " +
229
+ "libllama has been retired; rebuild the fused lib with the v9 tokenizer ABI.",
230
+ );
231
+ }
232
+ const tokenizeFn = (prompt: string): Int32Array =>
233
+ fusedTokenizeFn({ ctx, text: prompt });
234
+
235
+ const binding = wrapElizaInferenceFfi(ffi);
236
+ const runner = new FfiStreamingRunner(binding, ctx);
237
+ const overrides = plan.overrides;
238
+ const session: FfiBackendSession = {
239
+ binding,
240
+ ctx,
241
+ runner,
242
+ tokenize: (prompt) => tokenizeFn(prompt),
243
+ mtp: plan.catalog?.runtime?.mtp ?? null,
244
+ draftModelPath: overrides?.draftModelPath ?? null,
245
+ mmprojPath: overrides?.mmprojPath ?? null,
246
+ // The fused path applies these at its first `llmStreamOpen`:
247
+ // context size, gpuLayers, and KV-cache quant types from the
248
+ // session config.
249
+ loadConfig: {
250
+ contextSize:
251
+ typeof overrides?.contextSize === "number"
252
+ ? overrides.contextSize
253
+ : undefined,
254
+ gpuLayers:
255
+ typeof overrides?.gpuLayers === "number"
256
+ ? overrides.gpuLayers
257
+ : undefined,
258
+ cacheTypeK: overrides?.cacheTypeK ?? null,
259
+ cacheTypeV: overrides?.cacheTypeV ?? null,
260
+ },
261
+ };
262
+ this.active = { ffi, ctx, session };
263
+ return session;
264
+ }
265
+
266
+ parallelSlots(): number {
267
+ // The fused runtime holds one resident text context per loaded model;
268
+ // multi-slot parallelism is not exposed by the fused ABI.
269
+ return 1;
270
+ }
271
+
272
+ /**
273
+ * Whether the LIVE session can describe images through the fused
274
+ * `eliza_inference_describe_image`. Mirrors the FfiStreamingBackend gate:
275
+ * true only when a session is bound and the fused lib exposes vision.
276
+ */
277
+ visionSupported(): boolean {
278
+ if (!this.active) return false;
279
+ return (
280
+ typeof this.active.ffi.visionSupported === "function" &&
281
+ this.active.ffi.visionSupported() === true &&
282
+ typeof this.active.ffi.describeImage === "function"
283
+ );
284
+ }
285
+
286
+ /**
287
+ * Whether the LIVE session can STREAM a vision describe token-by-token
288
+ * through `eliza_inference_describe_image_stream_open` + the existing
289
+ * `llmStreamNext` loop (ABI v13). A <=v12 lib reports false and the handler
290
+ * uses the buffered one-shot `describeImage` path.
291
+ */
292
+ visionStreamSupported(): boolean {
293
+ if (!this.active) return false;
294
+ const { ffi } = this.active;
295
+ return (
296
+ typeof ffi.visionStreamSupported === "function" &&
297
+ ffi.visionStreamSupported() === true &&
298
+ typeof ffi.describeImageStreamOpen === "function" &&
299
+ typeof ffi.llmStreamNext === "function" &&
300
+ typeof ffi.llmStreamClose === "function"
301
+ );
302
+ }
303
+
304
+ /**
305
+ * Vision describe through the fused mmproj path. Reuses the mtmd machinery
306
+ * linked for ASR over the bundle's text model + the passed mmproj projector.
307
+ * The `FfiStreamingBackend` forwards `describeImage`/`visionSupported` to this
308
+ * runtime by duck-typing.
309
+ *
310
+ * When `onTextChunk` is supplied AND the fused lib exposes ABI-v13 streaming
311
+ * vision, the description is decoded token-by-token: `describeImageStreamOpen`
312
+ * primes a stream with the image+prompt KV and the EXISTING `llmStreamNext`
313
+ * loop pulls tokens — the same machinery that streams chat text, so vision
314
+ * flows into the dashboard through one pipe. Otherwise it falls back to the
315
+ * buffered one-shot `eliza_inference_describe_image`.
316
+ */
317
+ async describeImage(args: {
318
+ imageBytes: Uint8Array;
319
+ mmprojPath: string;
320
+ prompt?: string;
321
+ maxTokens?: number;
322
+ temperature?: number;
323
+ signal?: AbortSignal;
324
+ onTextChunk?: (chunk: string) => void | Promise<void>;
325
+ maxTokensPerStep?: number;
326
+ }): Promise<{ text: string; projectorMs?: number; decodeMs?: number }> {
327
+ if (!this.active) {
328
+ throw new Error(
329
+ "[desktop-fused-ffi-runtime] describeImage before acquire — no session",
330
+ );
331
+ }
332
+ const { ffi, ctx } = this.active;
333
+ if (
334
+ typeof ffi.visionSupported !== "function" ||
335
+ ffi.visionSupported() !== true ||
336
+ typeof ffi.describeImage !== "function"
337
+ ) {
338
+ throw new Error(
339
+ "[desktop-fused-ffi-runtime] describeImage: fused lib was built without " +
340
+ "vision (eliza_inference_vision_supported() == 0). Rebuild the fused " +
341
+ "lib with -DELIZA_ENABLE_VISION=ON (verify-fused-symbols requires it).",
342
+ );
343
+ }
344
+
345
+ // Token-by-token streaming path (ABI v13): open a vision stream and drive
346
+ // the shared `llmStreamNext` loop, surfacing each decoded piece through
347
+ // `onTextChunk` so the description renders as it generates.
348
+ if (
349
+ typeof args.onTextChunk === "function" &&
350
+ this.visionStreamSupported() &&
351
+ typeof ffi.describeImageStreamOpen === "function" &&
352
+ typeof ffi.llmStreamNext === "function" &&
353
+ typeof ffi.llmStreamClose === "function"
354
+ ) {
355
+ throwIfAborted(args.signal);
356
+ const startedAt = Date.now();
357
+ const stream = ffi.describeImageStreamOpen({
358
+ ctx,
359
+ imageBytes: args.imageBytes,
360
+ mmprojPath: args.mmprojPath,
361
+ prompt: args.prompt,
362
+ });
363
+ let full = "";
364
+ let generated = 0;
365
+ // JS-side token budget: the native ELIZA_VISION_MAX_TOKENS env does not
366
+ // reliably reach the loaded DLL's getenv across runtimes, so cap here.
367
+ const tokenBudget =
368
+ typeof args.maxTokens === "number" && args.maxTokens > 0
369
+ ? args.maxTokens
370
+ : 256;
371
+ try {
372
+ for (;;) {
373
+ if (args.signal?.aborted) {
374
+ ffi.llmStreamCancel?.(stream);
375
+ throwIfAborted(args.signal);
376
+ }
377
+ const step = ffi.llmStreamNext({
378
+ stream,
379
+ // Fine-grained by default so the description renders token-by-token
380
+ // in the dashboard rather than in coarse ~32-token jumps (matches
381
+ // the tuned chat default). Callers may override per request.
382
+ maxTokensPerStep: args.maxTokensPerStep ?? 8,
383
+ });
384
+ if (step.text.length > 0) {
385
+ full += step.text;
386
+ await args.onTextChunk(step.text);
387
+ }
388
+ generated += step.tokens.length;
389
+ if (step.done || generated >= tokenBudget) break;
390
+ }
391
+ } finally {
392
+ ffi.llmStreamClose(stream);
393
+ }
394
+ return { text: full, decodeMs: Date.now() - startedAt };
395
+ }
396
+
397
+ const startedAt = Date.now();
398
+ const text = ffi.describeImage({
399
+ ctx,
400
+ imageBytes: args.imageBytes,
401
+ mmprojPath: args.mmprojPath,
402
+ prompt: args.prompt,
403
+ });
404
+ return { text, decodeMs: Date.now() - startedAt };
405
+ }
406
+
407
+ async release(): Promise<void> {
408
+ if (!this.active) return;
409
+ const { ffi, ctx } = this.active;
410
+ // Free the native handles. A throwing free poisons the runtime so a new
411
+ // model cannot be allocated over leaked resources. Clear `active` in the
412
+ // finally so a throwing free can't wedge the live-session guard.
413
+ try {
414
+ ffi.destroy(ctx);
415
+ ffi.close();
416
+ } catch (err) {
417
+ this.poisonedError = err instanceof Error ? err : new Error(String(err));
418
+ throw err;
419
+ } finally {
420
+ this.active = null;
421
+ }
422
+ }
423
+ }
424
+
425
+ /**
426
+ * Process singleton — the engine wires this as the sole `FfiBackendRuntime` for
427
+ * the dispatcher's `"llama-cpp"` slot. The ABI-v9 capability probes in
428
+ * `supported()` gate whether the fused lib serves text at all.
429
+ */
430
+ export const desktopFusedFfiBackendRuntime =
431
+ new DesktopFusedFfiBackendRuntime();