@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (893) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +157 -0
  3. package/dist/actions/generate-media.d.ts +59 -0
  4. package/dist/actions/generate-media.d.ts.map +1 -0
  5. package/dist/actions/identify-speaker.d.ts +23 -0
  6. package/dist/actions/identify-speaker.d.ts.map +1 -0
  7. package/dist/actions/transcription-control.d.ts +29 -0
  8. package/dist/actions/transcription-control.d.ts.map +1 -0
  9. package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
  10. package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
  11. package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
  12. package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
  13. package/dist/adapters/capacitor-llama/index.d.ts +18 -0
  14. package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
  15. package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
  16. package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
  17. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
  18. package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
  19. package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
  20. package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
  21. package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
  22. package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
  23. package/dist/adapters/capacitor-llama/types.d.ts +338 -0
  24. package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
  25. package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
  26. package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
  27. package/dist/backends/apple-foundation.d.ts +56 -0
  28. package/dist/backends/apple-foundation.d.ts.map +1 -0
  29. package/dist/index.d.ts +8 -37
  30. package/dist/index.d.ts.map +1 -0
  31. package/dist/index.js +38979 -430
  32. package/dist/index.js.map +217 -0
  33. package/dist/local-inference-routes.d.ts +47 -0
  34. package/dist/local-inference-routes.d.ts.map +1 -0
  35. package/dist/provider.d.ts +21 -0
  36. package/dist/provider.d.ts.map +1 -0
  37. package/dist/routes/compat-helpers.d.ts +18 -0
  38. package/dist/routes/compat-helpers.d.ts.map +1 -0
  39. package/dist/routes/family-member-route.d.ts +62 -0
  40. package/dist/routes/family-member-route.d.ts.map +1 -0
  41. package/dist/routes/index.d.ts +20 -0
  42. package/dist/routes/index.d.ts.map +1 -0
  43. package/dist/routes/index.js +42040 -0
  44. package/dist/routes/index.js.map +236 -0
  45. package/dist/routes/live-diarization-route.d.ts +33 -0
  46. package/dist/routes/live-diarization-route.d.ts.map +1 -0
  47. package/dist/routes/local-inference-asr-route.d.ts +4 -0
  48. package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
  49. package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
  50. package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  51. package/dist/routes/local-inference-compat-routes.d.ts +16 -0
  52. package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
  53. package/dist/routes/local-inference-tts-route.d.ts +7 -0
  54. package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/dist/routes/native-pcm-turn-route.d.ts +3 -0
  56. package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
  57. package/dist/routes/transcript-audio-store.d.ts +15 -0
  58. package/dist/routes/transcript-audio-store.d.ts.map +1 -0
  59. package/dist/routes/transcripts-routes.d.ts +44 -0
  60. package/dist/routes/transcripts-routes.d.ts.map +1 -0
  61. package/dist/routes/voice-first-run-routes.d.ts +62 -0
  62. package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
  63. package/dist/routes/voice-models-routes.d.ts +62 -0
  64. package/dist/routes/voice-models-routes.d.ts.map +1 -0
  65. package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
  66. package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  67. package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
  68. package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
  69. package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/dist/runtime/embedding-manager-support.d.ts +77 -0
  72. package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
  73. package/dist/runtime/embedding-presets.d.ts +16 -0
  74. package/dist/runtime/embedding-presets.d.ts.map +1 -0
  75. package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
  76. package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
  77. package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
  78. package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  79. package/dist/runtime/index.d.ts +15 -0
  80. package/dist/runtime/index.d.ts.map +1 -0
  81. package/dist/runtime/index.js +38768 -0
  82. package/dist/runtime/index.js.map +217 -0
  83. package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
  84. package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  85. package/dist/runtime/voice-entity-binding.d.ts +113 -0
  86. package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
  87. package/dist/services/active-model.d.ts +310 -0
  88. package/dist/services/active-model.d.ts.map +1 -0
  89. package/dist/services/asr-provenance.d.ts +5 -0
  90. package/dist/services/asr-provenance.d.ts.map +1 -0
  91. package/dist/services/assignments.d.ts +84 -0
  92. package/dist/services/assignments.d.ts.map +1 -0
  93. package/dist/services/backend-selector.d.ts +55 -0
  94. package/dist/services/backend-selector.d.ts.map +1 -0
  95. package/dist/services/backend.d.ts +440 -0
  96. package/dist/services/backend.d.ts.map +1 -0
  97. package/dist/services/bionic-host-loader.d.ts +67 -0
  98. package/dist/services/bionic-host-loader.d.ts.map +1 -0
  99. package/dist/services/bundled-models.d.ts +34 -0
  100. package/dist/services/bundled-models.d.ts.map +1 -0
  101. package/dist/services/cache-bridge.d.ts +206 -0
  102. package/dist/services/cache-bridge.d.ts.map +1 -0
  103. package/dist/services/catalog.d.ts +10 -0
  104. package/dist/services/catalog.d.ts.map +1 -0
  105. package/dist/services/checkpoint-client.d.ts +109 -0
  106. package/dist/services/checkpoint-client.d.ts.map +1 -0
  107. package/dist/services/checkpoint-manager.d.ts +217 -0
  108. package/dist/services/checkpoint-manager.d.ts.map +1 -0
  109. package/dist/services/cloud-fallback.d.ts +102 -0
  110. package/dist/services/cloud-fallback.d.ts.map +1 -0
  111. package/dist/services/context-fit.d.ts +36 -0
  112. package/dist/services/context-fit.d.ts.map +1 -0
  113. package/dist/services/conversation-registry.d.ts +142 -0
  114. package/dist/services/conversation-registry.d.ts.map +1 -0
  115. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
  116. package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  117. package/dist/services/device-bridge.d.ts +188 -0
  118. package/dist/services/device-bridge.d.ts.map +1 -0
  119. package/dist/services/device-resource-metrics.d.ts +149 -0
  120. package/dist/services/device-resource-metrics.d.ts.map +1 -0
  121. package/dist/services/device-tier.d.ts +133 -0
  122. package/dist/services/device-tier.d.ts.map +1 -0
  123. package/dist/services/downloader.d.ts +94 -0
  124. package/dist/services/downloader.d.ts.map +1 -0
  125. package/dist/services/engine.d.ts +579 -0
  126. package/dist/services/engine.d.ts.map +1 -0
  127. package/dist/services/ensure-local-artifacts.d.ts +82 -0
  128. package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
  129. package/dist/services/external-scanner.d.ts +17 -0
  130. package/dist/services/external-scanner.d.ts.map +1 -0
  131. package/dist/services/ffi-llm-mock.d.ts +90 -0
  132. package/dist/services/ffi-llm-mock.d.ts.map +1 -0
  133. package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
  134. package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
  135. package/dist/services/ffi-streaming-backend.d.ts +201 -0
  136. package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
  137. package/dist/services/ffi-streaming-runner.d.ts +146 -0
  138. package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
  139. package/dist/services/gpu-autotune.d.ts +150 -0
  140. package/dist/services/gpu-autotune.d.ts.map +1 -0
  141. package/dist/services/gpu-detect.d.ts +56 -0
  142. package/dist/services/gpu-detect.d.ts.map +1 -0
  143. package/dist/services/handler-registry.d.ts +72 -0
  144. package/dist/services/handler-registry.d.ts.map +1 -0
  145. package/dist/services/hardware.d.ts +63 -0
  146. package/dist/services/hardware.d.ts.map +1 -0
  147. package/dist/services/image-description-runtime.d.ts +14 -0
  148. package/dist/services/image-description-runtime.d.ts.map +1 -0
  149. package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
  150. package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  151. package/dist/services/imagegen/backend-selector.d.ts +118 -0
  152. package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
  153. package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
  154. package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  155. package/dist/services/imagegen/errors.d.ts +16 -0
  156. package/dist/services/imagegen/errors.d.ts.map +1 -0
  157. package/dist/services/imagegen/index.d.ts +58 -0
  158. package/dist/services/imagegen/index.d.ts.map +1 -0
  159. package/dist/services/imagegen/mflux.d.ts +74 -0
  160. package/dist/services/imagegen/mflux.d.ts.map +1 -0
  161. package/dist/services/imagegen/sd-cpp.d.ts +181 -0
  162. package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
  163. package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  164. package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  165. package/dist/services/imagegen/types.d.ts +181 -0
  166. package/dist/services/imagegen/types.d.ts.map +1 -0
  167. package/dist/services/index.d.ts +31 -0
  168. package/dist/services/index.d.ts.map +1 -0
  169. package/dist/services/index.js +39453 -0
  170. package/dist/services/index.js.map +227 -0
  171. package/dist/services/inference-capabilities.d.ts +132 -0
  172. package/dist/services/inference-capabilities.d.ts.map +1 -0
  173. package/dist/services/inference-telemetry.d.ts +59 -0
  174. package/dist/services/inference-telemetry.d.ts.map +1 -0
  175. package/dist/services/ios-llama-streaming.d.ts +119 -0
  176. package/dist/services/ios-llama-streaming.d.ts.map +1 -0
  177. package/dist/services/kv-spill.d.ts +189 -0
  178. package/dist/services/kv-spill.d.ts.map +1 -0
  179. package/dist/services/latency-trace.d.ts +346 -0
  180. package/dist/services/latency-trace.d.ts.map +1 -0
  181. package/dist/services/lib-target.d.ts +55 -0
  182. package/dist/services/lib-target.d.ts.map +1 -0
  183. package/dist/services/live-signals.d.ts +86 -0
  184. package/dist/services/live-signals.d.ts.map +1 -0
  185. package/dist/services/llama-server-metrics.d.ts +114 -0
  186. package/dist/services/llama-server-metrics.d.ts.map +1 -0
  187. package/dist/services/llm-streaming-binding.d.ts +96 -0
  188. package/dist/services/llm-streaming-binding.d.ts.map +1 -0
  189. package/dist/services/load-args.d.ts +82 -0
  190. package/dist/services/load-args.d.ts.map +1 -0
  191. package/dist/services/manifest/index.d.ts +4 -0
  192. package/dist/services/manifest/index.d.ts.map +1 -0
  193. package/dist/services/manifest/schema.d.ts +903 -0
  194. package/dist/services/manifest/schema.d.ts.map +1 -0
  195. package/dist/services/manifest/types.d.ts +32 -0
  196. package/dist/services/manifest/types.d.ts.map +1 -0
  197. package/dist/services/manifest/validator.d.ts +66 -0
  198. package/dist/services/manifest/validator.d.ts.map +1 -0
  199. package/dist/services/memory-arbiter.d.ts +348 -0
  200. package/dist/services/memory-arbiter.d.ts.map +1 -0
  201. package/dist/services/memory-benchmark.d.ts +76 -0
  202. package/dist/services/memory-benchmark.d.ts.map +1 -0
  203. package/dist/services/memory-monitor.d.ts +128 -0
  204. package/dist/services/memory-monitor.d.ts.map +1 -0
  205. package/dist/services/memory-pressure.d.ts +130 -0
  206. package/dist/services/memory-pressure.d.ts.map +1 -0
  207. package/dist/services/mtp-doctor.d.ts +13 -0
  208. package/dist/services/mtp-doctor.d.ts.map +1 -0
  209. package/dist/services/network-policy.d.ts +127 -0
  210. package/dist/services/network-policy.d.ts.map +1 -0
  211. package/dist/services/paths.d.ts +6 -0
  212. package/dist/services/paths.d.ts.map +1 -0
  213. package/dist/services/planner-skeleton.d.ts +124 -0
  214. package/dist/services/planner-skeleton.d.ts.map +1 -0
  215. package/dist/services/providers.d.ts +38 -0
  216. package/dist/services/providers.d.ts.map +1 -0
  217. package/dist/services/ram-budget.d.ts +110 -0
  218. package/dist/services/ram-budget.d.ts.map +1 -0
  219. package/dist/services/readiness.d.ts +9 -0
  220. package/dist/services/readiness.d.ts.map +1 -0
  221. package/dist/services/recommendation.d.ts +111 -0
  222. package/dist/services/recommendation.d.ts.map +1 -0
  223. package/dist/services/registry.d.ts +33 -0
  224. package/dist/services/registry.d.ts.map +1 -0
  225. package/dist/services/router-handler.d.ts +92 -0
  226. package/dist/services/router-handler.d.ts.map +1 -0
  227. package/dist/services/routing-policy.d.ts +92 -0
  228. package/dist/services/routing-policy.d.ts.map +1 -0
  229. package/dist/services/routing-preferences.d.ts +8 -0
  230. package/dist/services/routing-preferences.d.ts.map +1 -0
  231. package/dist/services/runtime-target.d.ts +98 -0
  232. package/dist/services/runtime-target.d.ts.map +1 -0
  233. package/dist/services/service.d.ts +128 -0
  234. package/dist/services/service.d.ts.map +1 -0
  235. package/dist/services/session-pool.d.ts +72 -0
  236. package/dist/services/session-pool.d.ts.map +1 -0
  237. package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
  238. package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
  239. package/dist/services/structured-output/index.d.ts +2 -0
  240. package/dist/services/structured-output/index.d.ts.map +1 -0
  241. package/dist/services/structured-output.d.ts +311 -0
  242. package/dist/services/structured-output.d.ts.map +1 -0
  243. package/dist/services/system-memory.d.ts +33 -0
  244. package/dist/services/system-memory.d.ts.map +1 -0
  245. package/dist/services/types.d.ts +19 -0
  246. package/dist/services/types.d.ts.map +1 -0
  247. package/dist/services/verify-on-device.d.ts +34 -0
  248. package/dist/services/verify-on-device.d.ts.map +1 -0
  249. package/dist/services/verify.d.ts +8 -0
  250. package/dist/services/verify.d.ts.map +1 -0
  251. package/dist/services/vision/aosp-unavailable.d.ts +115 -0
  252. package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
  253. package/dist/services/vision/capacitor-llama.d.ts +99 -0
  254. package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
  255. package/dist/services/vision/cloud-fallback.d.ts +47 -0
  256. package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
  257. package/dist/services/vision/hash.d.ts +71 -0
  258. package/dist/services/vision/hash.d.ts.map +1 -0
  259. package/dist/services/vision/index.d.ts +95 -0
  260. package/dist/services/vision/index.d.ts.map +1 -0
  261. package/dist/services/vision/llama-server.d.ts +73 -0
  262. package/dist/services/vision/llama-server.d.ts.map +1 -0
  263. package/dist/services/vision/types.d.ts +162 -0
  264. package/dist/services/vision/types.d.ts.map +1 -0
  265. package/dist/services/vision/vast-fallback.d.ts +18 -0
  266. package/dist/services/vision/vast-fallback.d.ts.map +1 -0
  267. package/dist/services/vision-embedding-cache.d.ts +98 -0
  268. package/dist/services/vision-embedding-cache.d.ts.map +1 -0
  269. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
  270. package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
  271. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
  272. package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
  273. package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
  274. package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
  275. package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
  276. package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
  277. package/dist/services/voice/barge-in.d.ts +112 -0
  278. package/dist/services/voice/barge-in.d.ts.map +1 -0
  279. package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
  280. package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
  281. package/dist/services/voice/checkpoint-manager.d.ts +199 -0
  282. package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
  283. package/dist/services/voice/checkpoint-policy.d.ts +178 -0
  284. package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
  285. package/dist/services/voice/corpus-augment.d.ts +111 -0
  286. package/dist/services/voice/corpus-augment.d.ts.map +1 -0
  287. package/dist/services/voice/corpus-generator.d.ts +134 -0
  288. package/dist/services/voice/corpus-generator.d.ts.map +1 -0
  289. package/dist/services/voice/diarization-error-rate.d.ts +40 -0
  290. package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
  291. package/dist/services/voice/e2e-harness.d.ts +297 -0
  292. package/dist/services/voice/e2e-harness.d.ts.map +1 -0
  293. package/dist/services/voice/eager-context-builder.d.ts +170 -0
  294. package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
  295. package/dist/services/voice/echo-delay.d.ts +67 -0
  296. package/dist/services/voice/echo-delay.d.ts.map +1 -0
  297. package/dist/services/voice/echo-metrics.d.ts +7 -0
  298. package/dist/services/voice/echo-metrics.d.ts.map +1 -0
  299. package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
  300. package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
  301. package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
  302. package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  303. package/dist/services/voice/embedding-server.d.ts +37 -0
  304. package/dist/services/voice/embedding-server.d.ts.map +1 -0
  305. package/dist/services/voice/embedding.d.ts +132 -0
  306. package/dist/services/voice/embedding.d.ts.map +1 -0
  307. package/dist/services/voice/emotion-attribution.d.ts +68 -0
  308. package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
  309. package/dist/services/voice/engine-bridge.d.ts +762 -0
  310. package/dist/services/voice/engine-bridge.d.ts.map +1 -0
  311. package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
  312. package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  313. package/dist/services/voice/eot-classifier.d.ts +211 -0
  314. package/dist/services/voice/eot-classifier.d.ts.map +1 -0
  315. package/dist/services/voice/errors.d.ts +20 -0
  316. package/dist/services/voice/errors.d.ts.map +1 -0
  317. package/dist/services/voice/expressive-tags.d.ts +158 -0
  318. package/dist/services/voice/expressive-tags.d.ts.map +1 -0
  319. package/dist/services/voice/ffi-bindings.d.ts +696 -0
  320. package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
  321. package/dist/services/voice/first-line-cache.d.ts +181 -0
  322. package/dist/services/voice/first-line-cache.d.ts.map +1 -0
  323. package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
  324. package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
  325. package/dist/services/voice/index.d.ts +96 -0
  326. package/dist/services/voice/index.d.ts.map +1 -0
  327. package/dist/services/voice/kokoro/index.d.ts +24 -0
  328. package/dist/services/voice/kokoro/index.d.ts.map +1 -0
  329. package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
  330. package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  331. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  332. package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  333. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  334. package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  335. package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  336. package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  337. package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
  338. package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
  339. package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
  340. package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  341. package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
  342. package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  343. package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
  344. package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  345. package/dist/services/voice/kokoro/types.d.ts +82 -0
  346. package/dist/services/voice/kokoro/types.d.ts.map +1 -0
  347. package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
  348. package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  349. package/dist/services/voice/kokoro/voices.d.ts +30 -0
  350. package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
  351. package/dist/services/voice/lifecycle.d.ts +135 -0
  352. package/dist/services/voice/lifecycle.d.ts.map +1 -0
  353. package/dist/services/voice/live-diarization-session.d.ts +196 -0
  354. package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
  355. package/dist/services/voice/metric-math.d.ts +10 -0
  356. package/dist/services/voice/metric-math.d.ts.map +1 -0
  357. package/dist/services/voice/mic-source.d.ts +136 -0
  358. package/dist/services/voice/mic-source.d.ts.map +1 -0
  359. package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
  360. package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
  361. package/dist/services/voice/optimistic-policy.d.ts +109 -0
  362. package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
  363. package/dist/services/voice/optimistic-rollback.d.ts +151 -0
  364. package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
  365. package/dist/services/voice/partial-stabilizer.d.ts +73 -0
  366. package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
  367. package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
  368. package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  369. package/dist/services/voice/phrase-cache.d.ts +76 -0
  370. package/dist/services/voice/phrase-cache.d.ts.map +1 -0
  371. package/dist/services/voice/phrase-chunker.d.ts +62 -0
  372. package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
  373. package/dist/services/voice/pipeline-impls.d.ts +151 -0
  374. package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
  375. package/dist/services/voice/pipeline.d.ts +216 -0
  376. package/dist/services/voice/pipeline.d.ts.map +1 -0
  377. package/dist/services/voice/prefill-client.d.ts +123 -0
  378. package/dist/services/voice/prefill-client.d.ts.map +1 -0
  379. package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
  380. package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  381. package/dist/services/voice/profile-store.d.ts +248 -0
  382. package/dist/services/voice/profile-store.d.ts.map +1 -0
  383. package/dist/services/voice/ring-buffer.d.ts +40 -0
  384. package/dist/services/voice/ring-buffer.d.ts.map +1 -0
  385. package/dist/services/voice/rollback-queue.d.ts +24 -0
  386. package/dist/services/voice/rollback-queue.d.ts.map +1 -0
  387. package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
  388. package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  389. package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
  390. package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  391. package/dist/services/voice/scheduler.d.ts +146 -0
  392. package/dist/services/voice/scheduler.d.ts.map +1 -0
  393. package/dist/services/voice/self-voice-imprint.d.ts +33 -0
  394. package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
  395. package/dist/services/voice/shared-resources.d.ts +204 -0
  396. package/dist/services/voice/shared-resources.d.ts.map +1 -0
  397. package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  398. package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  399. package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
  400. package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  401. package/dist/services/voice/speaker/diarizer.d.ts +75 -0
  402. package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
  403. package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
  404. package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  405. package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
  406. package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  407. package/dist/services/voice/speaker/encoder.d.ts +37 -0
  408. package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
  409. package/dist/services/voice/speaker-imprint.d.ts +83 -0
  410. package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
  411. package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
  412. package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
  413. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
  414. package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
  415. package/dist/services/voice/system-audio-sink.d.ts +73 -0
  416. package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
  417. package/dist/services/voice/transcriber.d.ts +244 -0
  418. package/dist/services/voice/transcriber.d.ts.map +1 -0
  419. package/dist/services/voice/transcript-knowledge.d.ts +37 -0
  420. package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
  421. package/dist/services/voice/transcript-service.d.ts +60 -0
  422. package/dist/services/voice/transcript-service.d.ts.map +1 -0
  423. package/dist/services/voice/transcript-store.d.ts +64 -0
  424. package/dist/services/voice/transcript-store.d.ts.map +1 -0
  425. package/dist/services/voice/turn-controller.d.ts +183 -0
  426. package/dist/services/voice/turn-controller.d.ts.map +1 -0
  427. package/dist/services/voice/types.d.ts +643 -0
  428. package/dist/services/voice/types.d.ts.map +1 -0
  429. package/dist/services/voice/vad.d.ts +283 -0
  430. package/dist/services/voice/vad.d.ts.map +1 -0
  431. package/dist/services/voice/voice-budget.d.ts +241 -0
  432. package/dist/services/voice/voice-budget.d.ts.map +1 -0
  433. package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
  434. package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  435. package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
  436. package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
  437. package/dist/services/voice/voice-preset-format.d.ts +158 -0
  438. package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
  439. package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
  440. package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
  441. package/dist/services/voice/voice-profile-routes.d.ts +83 -0
  442. package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
  443. package/dist/services/voice/voice-scenario.d.ts +131 -0
  444. package/dist/services/voice/voice-scenario.d.ts.map +1 -0
  445. package/dist/services/voice/voice-state-machine.d.ts +364 -0
  446. package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
  447. package/dist/services/voice/voice-workbench-report.d.ts +117 -0
  448. package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
  449. package/dist/services/voice/wake-word-ggml.d.ts +100 -0
  450. package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
  451. package/dist/services/voice/wake-word.d.ts +255 -0
  452. package/dist/services/voice/wake-word.d.ts.map +1 -0
  453. package/dist/services/voice/wav-codec.d.ts +11 -0
  454. package/dist/services/voice/wav-codec.d.ts.map +1 -0
  455. package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
  456. package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
  457. package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
  458. package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
  459. package/dist/services/voice/workbench-logic-services.d.ts +36 -0
  460. package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
  461. package/dist/services/voice/workbench-real-services.d.ts +17 -0
  462. package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
  463. package/dist/services/voice/workbench-scenarios.d.ts +24 -0
  464. package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
  465. package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  466. package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  467. package/dist/services/voice-model-updater.d.ts +240 -0
  468. package/dist/services/voice-model-updater.d.ts.map +1 -0
  469. package/dist/services/voice-prewarm.d.ts +3 -0
  470. package/dist/services/voice-prewarm.d.ts.map +1 -0
  471. package/dist/voice-workbench.d.ts +18 -0
  472. package/dist/voice-workbench.d.ts.map +1 -0
  473. package/dist/voice-workbench.js +5259 -0
  474. package/dist/voice-workbench.js.map +34 -0
  475. package/package.json +101 -15
  476. package/registry-entry.json +137 -0
  477. package/src/actions/generate-media.ts +647 -0
  478. package/src/actions/identify-speaker.ts +171 -0
  479. package/src/actions/transcription-control.test.ts +100 -0
  480. package/src/actions/transcription-control.ts +127 -0
  481. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  482. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  483. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  484. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  485. package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
  486. package/src/adapters/capacitor-llama/environment.ts +71 -0
  487. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  488. package/src/adapters/capacitor-llama/index.ts +831 -0
  489. package/src/adapters/capacitor-llama/loader.ts +109 -0
  490. package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
  491. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  492. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  493. package/src/adapters/capacitor-llama/types.ts +374 -0
  494. package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
  495. package/src/backends/apple-foundation.ts +127 -0
  496. package/src/index.ts +62 -0
  497. package/src/local-inference-routes.test.ts +390 -0
  498. package/src/local-inference-routes.ts +1625 -0
  499. package/src/provider.ts +1111 -0
  500. package/src/routes/compat-helpers.ts +275 -0
  501. package/src/routes/family-member-route.ts +353 -0
  502. package/src/routes/index.ts +61 -0
  503. package/src/routes/live-diarization-route.test.ts +347 -0
  504. package/src/routes/live-diarization-route.ts +198 -0
  505. package/src/routes/local-inference-asr-route.test.ts +246 -0
  506. package/src/routes/local-inference-asr-route.ts +166 -0
  507. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  508. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  509. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  510. package/src/routes/local-inference-compat-routes.ts +775 -0
  511. package/src/routes/local-inference-tts-route.test.ts +179 -0
  512. package/src/routes/local-inference-tts-route.ts +230 -0
  513. package/src/routes/native-pcm-turn-route.test.ts +136 -0
  514. package/src/routes/native-pcm-turn-route.ts +121 -0
  515. package/src/routes/transcript-audio-store.ts +27 -0
  516. package/src/routes/transcripts-routes.test.ts +195 -0
  517. package/src/routes/transcripts-routes.ts +191 -0
  518. package/src/routes/voice-first-run-routes.ts +524 -0
  519. package/src/routes/voice-models-routes.ts +554 -0
  520. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  521. package/src/routes/voice-profiles-management-routes.ts +476 -0
  522. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  523. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  524. package/src/runtime/bionic-wire-encoding.test.ts +147 -0
  525. package/src/runtime/capacitor-llama.d.ts +25 -0
  526. package/src/runtime/embedding-manager-support.ts +497 -0
  527. package/src/runtime/embedding-presets.ts +81 -0
  528. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  529. package/src/runtime/embedding-warmup-policy.ts +48 -0
  530. package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
  531. package/src/runtime/ensure-local-inference-handler.ts +1640 -0
  532. package/src/runtime/index.ts +36 -0
  533. package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
  534. package/src/runtime/mobile-local-inference-gate.ts +99 -0
  535. package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
  536. package/src/runtime/voice-entity-binding.ts +368 -0
  537. package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
  538. package/src/services/README.md +71 -0
  539. package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
  540. package/src/services/__tests__/backend-selector.test.ts +101 -0
  541. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  542. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  543. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  544. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  545. package/src/services/__tests__/runtime-target.test.ts +176 -0
  546. package/src/services/active-model-context-fit.test.ts +125 -0
  547. package/src/services/active-model-switch-rollback.test.ts +183 -0
  548. package/src/services/active-model.ts +1416 -0
  549. package/src/services/asr-provenance.ts +68 -0
  550. package/src/services/assignment-validation.test.ts +118 -0
  551. package/src/services/assignments.test.ts +106 -0
  552. package/src/services/assignments.ts +278 -0
  553. package/src/services/backend-selector.ts +95 -0
  554. package/src/services/backend.test.ts +84 -0
  555. package/src/services/backend.ts +791 -0
  556. package/src/services/bionic-host-loader.test.ts +226 -0
  557. package/src/services/bionic-host-loader.ts +252 -0
  558. package/src/services/bundled-models.ts +129 -0
  559. package/src/services/cache-bridge.test.ts +516 -0
  560. package/src/services/cache-bridge.ts +423 -0
  561. package/src/services/catalog.test.ts +259 -0
  562. package/src/services/catalog.ts +33 -0
  563. package/src/services/checkpoint-client.ts +258 -0
  564. package/src/services/checkpoint-manager.ts +474 -0
  565. package/src/services/cloud-fallback.ts +230 -0
  566. package/src/services/context-fit.test.ts +121 -0
  567. package/src/services/context-fit.ts +113 -0
  568. package/src/services/conversation-registry.test.ts +235 -0
  569. package/src/services/conversation-registry.ts +264 -0
  570. package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
  571. package/src/services/device-bridge.ts +1237 -0
  572. package/src/services/device-resource-metrics.test.ts +98 -0
  573. package/src/services/device-resource-metrics.ts +346 -0
  574. package/src/services/device-tier.test.ts +458 -0
  575. package/src/services/device-tier.ts +502 -0
  576. package/src/services/downloader.test.ts +888 -0
  577. package/src/services/downloader.ts +1039 -0
  578. package/src/services/engine-direct-bundle.test.ts +90 -0
  579. package/src/services/engine-streaming.test.ts +80 -0
  580. package/src/services/engine.ts +2096 -0
  581. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  582. package/src/services/ensure-local-artifacts.test.ts +368 -0
  583. package/src/services/ensure-local-artifacts.ts +351 -0
  584. package/src/services/external-scanner.ts +312 -0
  585. package/src/services/ffi-llm-mock.ts +354 -0
  586. package/src/services/ffi-llm-streaming-abi.ts +445 -0
  587. package/src/services/ffi-streaming-backend.ts +418 -0
  588. package/src/services/ffi-streaming-runner.test.ts +220 -0
  589. package/src/services/ffi-streaming-runner.ts +407 -0
  590. package/src/services/ffi-unload-ordering.test.ts +166 -0
  591. package/src/services/fused-eliza1-no-regression.test.ts +144 -0
  592. package/src/services/gpu-autotune.ts +534 -0
  593. package/src/services/gpu-detect.ts +139 -0
  594. package/src/services/handler-registry.ts +240 -0
  595. package/src/services/hardware.test.ts +236 -0
  596. package/src/services/hardware.ts +438 -0
  597. package/src/services/image-description-runtime.test.ts +61 -0
  598. package/src/services/image-description-runtime.ts +118 -0
  599. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  600. package/src/services/imagegen/backend-selector.test.ts +190 -0
  601. package/src/services/imagegen/backend-selector.ts +277 -0
  602. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  603. package/src/services/imagegen/errors.ts +40 -0
  604. package/src/services/imagegen/index.ts +144 -0
  605. package/src/services/imagegen/mflux.ts +313 -0
  606. package/src/services/imagegen/sd-cpp.ts +715 -0
  607. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  608. package/src/services/imagegen/types.ts +193 -0
  609. package/src/services/index.ts +229 -0
  610. package/src/services/inference-capabilities.test.ts +75 -0
  611. package/src/services/inference-capabilities.ts +204 -0
  612. package/src/services/inference-telemetry.ts +143 -0
  613. package/src/services/ios-llama-streaming.ts +248 -0
  614. package/src/services/kv-spill.test.ts +222 -0
  615. package/src/services/kv-spill.ts +357 -0
  616. package/src/services/latency-trace.test.ts +266 -0
  617. package/src/services/latency-trace.ts +844 -0
  618. package/src/services/lib-target.test.ts +145 -0
  619. package/src/services/lib-target.ts +102 -0
  620. package/src/services/live-signals.test.ts +132 -0
  621. package/src/services/live-signals.ts +177 -0
  622. package/src/services/llama-server-metrics.test.ts +168 -0
  623. package/src/services/llama-server-metrics.ts +304 -0
  624. package/src/services/llm-streaming-binding.ts +136 -0
  625. package/src/services/load-args.ts +81 -0
  626. package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
  627. package/src/services/manifest/index.ts +72 -0
  628. package/src/services/manifest/manifest.test.ts +791 -0
  629. package/src/services/manifest/schema.ts +761 -0
  630. package/src/services/manifest/types.ts +61 -0
  631. package/src/services/manifest/validator.ts +633 -0
  632. package/src/services/memory-arbiter.test.ts +558 -0
  633. package/src/services/memory-arbiter.ts +991 -0
  634. package/src/services/memory-benchmark.test.ts +91 -0
  635. package/src/services/memory-benchmark.ts +354 -0
  636. package/src/services/memory-monitor.test.ts +232 -0
  637. package/src/services/memory-monitor.ts +309 -0
  638. package/src/services/memory-pressure.ts +414 -0
  639. package/src/services/mtp-doctor.ts +86 -0
  640. package/src/services/network-policy.ts +346 -0
  641. package/src/services/paths.ts +25 -0
  642. package/src/services/planner-skeleton.ts +175 -0
  643. package/src/services/providers.ts +507 -0
  644. package/src/services/ram-budget-cache.test.ts +164 -0
  645. package/src/services/ram-budget.ts +309 -0
  646. package/src/services/readiness.test.ts +87 -0
  647. package/src/services/readiness.ts +238 -0
  648. package/src/services/recommendation.test.ts +216 -0
  649. package/src/services/recommendation.ts +671 -0
  650. package/src/services/registry.ts +157 -0
  651. package/src/services/required-kernels-gate.test.ts +64 -0
  652. package/src/services/router-handler.test.ts +45 -0
  653. package/src/services/router-handler.ts +426 -0
  654. package/src/services/routing-policy.test.ts +352 -0
  655. package/src/services/routing-policy.ts +367 -0
  656. package/src/services/routing-preferences.ts +17 -0
  657. package/src/services/runtime-target.ts +154 -0
  658. package/src/services/service.test.ts +223 -0
  659. package/src/services/service.ts +750 -0
  660. package/src/services/session-pool.ts +153 -0
  661. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  662. package/src/services/structured-output/deterministic-repair.ts +443 -0
  663. package/src/services/structured-output/index.ts +4 -0
  664. package/src/services/structured-output.test.ts +483 -0
  665. package/src/services/structured-output.ts +712 -0
  666. package/src/services/system-memory.test.ts +47 -0
  667. package/src/services/system-memory.ts +67 -0
  668. package/src/services/transcription-priority.test.ts +211 -0
  669. package/src/services/types.ts +59 -0
  670. package/src/services/verify-on-device.test.ts +87 -0
  671. package/src/services/verify-on-device.ts +127 -0
  672. package/src/services/verify.ts +13 -0
  673. package/src/services/vision/aosp-unavailable.ts +163 -0
  674. package/src/services/vision/capacitor-llama.ts +255 -0
  675. package/src/services/vision/cloud-fallback.test.ts +243 -0
  676. package/src/services/vision/cloud-fallback.ts +268 -0
  677. package/src/services/vision/fallback-chain.test.ts +86 -0
  678. package/src/services/vision/hash.ts +157 -0
  679. package/src/services/vision/index.ts +251 -0
  680. package/src/services/vision/llama-server.ts +177 -0
  681. package/src/services/vision/types.ts +163 -0
  682. package/src/services/vision/vast-fallback.ts +127 -0
  683. package/src/services/vision-embedding-cache.ts +189 -0
  684. package/src/services/voice/VOICE_WORKBENCH.md +133 -0
  685. package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
  686. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  687. package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
  688. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  689. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  690. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  691. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  692. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  693. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  694. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  695. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  696. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  697. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  698. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  699. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  700. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  701. package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
  702. package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
  703. package/src/services/voice/asr-timed.real.test.ts +139 -0
  704. package/src/services/voice/audio-frame-consumer.test.ts +669 -0
  705. package/src/services/voice/audio-frame-consumer.ts +651 -0
  706. package/src/services/voice/barge-in.test.ts +244 -0
  707. package/src/services/voice/barge-in.ts +335 -0
  708. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  709. package/src/services/voice/cancellation-coordinator.ts +269 -0
  710. package/src/services/voice/checkpoint-manager.ts +401 -0
  711. package/src/services/voice/checkpoint-policy.ts +336 -0
  712. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  713. package/src/services/voice/corpus-augment.test.ts +276 -0
  714. package/src/services/voice/corpus-augment.ts +451 -0
  715. package/src/services/voice/corpus-generator.test.ts +201 -0
  716. package/src/services/voice/corpus-generator.ts +413 -0
  717. package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
  718. package/src/services/voice/diarization-error-rate.test.ts +100 -0
  719. package/src/services/voice/diarization-error-rate.ts +249 -0
  720. package/src/services/voice/e2e-harness.der.test.ts +94 -0
  721. package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
  722. package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
  723. package/src/services/voice/e2e-harness.test.ts +182 -0
  724. package/src/services/voice/e2e-harness.ts +902 -0
  725. package/src/services/voice/eager-context-builder.ts +262 -0
  726. package/src/services/voice/echo-delay.test.ts +118 -0
  727. package/src/services/voice/echo-delay.ts +135 -0
  728. package/src/services/voice/echo-metrics.test.ts +17 -0
  729. package/src/services/voice/echo-metrics.ts +20 -0
  730. package/src/services/voice/echo-reference-buffer.test.ts +86 -0
  731. package/src/services/voice/echo-reference-buffer.ts +165 -0
  732. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  733. package/src/services/voice/embedding-server.ts +200 -0
  734. package/src/services/voice/embedding.test.ts +131 -0
  735. package/src/services/voice/embedding.ts +242 -0
  736. package/src/services/voice/emotion-attribution.test.ts +129 -0
  737. package/src/services/voice/emotion-attribution.ts +361 -0
  738. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  739. package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
  740. package/src/services/voice/engine-bridge.test.ts +384 -0
  741. package/src/services/voice/engine-bridge.ts +2343 -0
  742. package/src/services/voice/eot-classifier-ggml.ts +569 -0
  743. package/src/services/voice/eot-classifier.test.ts +98 -0
  744. package/src/services/voice/eot-classifier.ts +422 -0
  745. package/src/services/voice/errors.ts +34 -0
  746. package/src/services/voice/expressive-tags.asr.test.ts +77 -0
  747. package/src/services/voice/expressive-tags.test.ts +102 -0
  748. package/src/services/voice/expressive-tags.ts +405 -0
  749. package/src/services/voice/ffi-bindings.test.ts +735 -0
  750. package/src/services/voice/ffi-bindings.ts +3387 -0
  751. package/src/services/voice/first-line-cache.ts +725 -0
  752. package/src/services/voice/fused-eot-scorer.ts +139 -0
  753. package/src/services/voice/index.ts +502 -0
  754. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
  755. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
  756. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  757. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  758. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  759. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  760. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  761. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
  762. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  763. package/src/services/voice/kokoro/index.ts +79 -0
  764. package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
  765. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  766. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  767. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  768. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  769. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  770. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  771. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  772. package/src/services/voice/kokoro/runtime-selection.ts +64 -0
  773. package/src/services/voice/kokoro/types.ts +95 -0
  774. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  775. package/src/services/voice/kokoro/voices.ts +64 -0
  776. package/src/services/voice/lifecycle.test.ts +315 -0
  777. package/src/services/voice/lifecycle.ts +301 -0
  778. package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
  779. package/src/services/voice/live-diarization-session.ts +622 -0
  780. package/src/services/voice/metric-math.test.ts +61 -0
  781. package/src/services/voice/metric-math.ts +25 -0
  782. package/src/services/voice/mic-source.test.ts +210 -0
  783. package/src/services/voice/mic-source.ts +503 -0
  784. package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
  785. package/src/services/voice/nlms-echo-canceller.ts +317 -0
  786. package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
  787. package/src/services/voice/optimistic-policy.test.ts +101 -0
  788. package/src/services/voice/optimistic-policy.ts +192 -0
  789. package/src/services/voice/optimistic-rollback.ts +343 -0
  790. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  791. package/src/services/voice/partial-stabilizer.ts +140 -0
  792. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  793. package/src/services/voice/phrase-cache.test.ts +242 -0
  794. package/src/services/voice/phrase-cache.ts +186 -0
  795. package/src/services/voice/phrase-chunker.test.ts +239 -0
  796. package/src/services/voice/phrase-chunker.ts +281 -0
  797. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  798. package/src/services/voice/pipeline-impls.test.ts +292 -0
  799. package/src/services/voice/pipeline-impls.ts +315 -0
  800. package/src/services/voice/pipeline.ts +504 -0
  801. package/src/services/voice/prefill-client.ts +316 -0
  802. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  803. package/src/services/voice/profile-store.ts +887 -0
  804. package/src/services/voice/real-audio-decode.test.ts +148 -0
  805. package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
  806. package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
  807. package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
  808. package/src/services/voice/ring-buffer.test.ts +129 -0
  809. package/src/services/voice/ring-buffer.ts +123 -0
  810. package/src/services/voice/rollback-queue.ts +74 -0
  811. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  812. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  813. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  814. package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
  815. package/src/services/voice/scheduler.t2.test.ts +141 -0
  816. package/src/services/voice/scheduler.ts +927 -0
  817. package/src/services/voice/self-voice-imprint.test.ts +59 -0
  818. package/src/services/voice/self-voice-imprint.ts +102 -0
  819. package/src/services/voice/shared-resources.ts +343 -0
  820. package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
  821. package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
  822. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  823. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  824. package/src/services/voice/speaker/diarizer.ts +218 -0
  825. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  826. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  827. package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
  828. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  829. package/src/services/voice/speaker/encoder.ts +105 -0
  830. package/src/services/voice/speaker-imprint.test.ts +185 -0
  831. package/src/services/voice/speaker-imprint.ts +312 -0
  832. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  833. package/src/services/voice/speaker-preset-cache.ts +195 -0
  834. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  835. package/src/services/voice/system-audio-sink.test.ts +29 -0
  836. package/src/services/voice/system-audio-sink.ts +366 -0
  837. package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
  838. package/src/services/voice/transcriber.test.ts +392 -0
  839. package/src/services/voice/transcriber.ts +704 -0
  840. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  841. package/src/services/voice/transcript-knowledge.ts +75 -0
  842. package/src/services/voice/transcript-service.test.ts +195 -0
  843. package/src/services/voice/transcript-service.ts +205 -0
  844. package/src/services/voice/transcript-store.test.ts +189 -0
  845. package/src/services/voice/transcript-store.ts +164 -0
  846. package/src/services/voice/turn-controller.test.ts +575 -0
  847. package/src/services/voice/turn-controller.ts +596 -0
  848. package/src/services/voice/types.ts +699 -0
  849. package/src/services/voice/vad.test.ts +498 -0
  850. package/src/services/voice/vad.ts +832 -0
  851. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  852. package/src/services/voice/voice-budget.test.ts +415 -0
  853. package/src/services/voice/voice-budget.ts +635 -0
  854. package/src/services/voice/voice-duet.test.ts +375 -0
  855. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  856. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  857. package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
  858. package/src/services/voice/voice-preload-predictor.test.ts +130 -0
  859. package/src/services/voice/voice-preload-predictor.ts +113 -0
  860. package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
  861. package/src/services/voice/voice-preset-format.test.ts +75 -0
  862. package/src/services/voice/voice-preset-format.ts +713 -0
  863. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  864. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  865. package/src/services/voice/voice-profile-artifact.ts +518 -0
  866. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  867. package/src/services/voice/voice-profile-routes.ts +425 -0
  868. package/src/services/voice/voice-scenario.test.ts +159 -0
  869. package/src/services/voice/voice-scenario.ts +280 -0
  870. package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
  871. package/src/services/voice/voice-state-machine.ts +727 -0
  872. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  873. package/src/services/voice/voice-workbench-report.ts +367 -0
  874. package/src/services/voice/voice-workbench.test.ts +158 -0
  875. package/src/services/voice/voice.test.ts +1070 -0
  876. package/src/services/voice/wake-word-ggml.ts +319 -0
  877. package/src/services/voice/wake-word.test.ts +298 -0
  878. package/src/services/voice/wake-word.ts +554 -0
  879. package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
  880. package/src/services/voice/wav-codec.test.ts +32 -0
  881. package/src/services/voice/wav-codec.ts +101 -0
  882. package/src/services/voice/workbench-entrypoint.test.ts +55 -0
  883. package/src/services/voice/workbench-entrypoint.ts +88 -0
  884. package/src/services/voice/workbench-headless-runner.test.ts +162 -0
  885. package/src/services/voice/workbench-headless-runner.ts +396 -0
  886. package/src/services/voice/workbench-logic-services.test.ts +225 -0
  887. package/src/services/voice/workbench-logic-services.ts +184 -0
  888. package/src/services/voice/workbench-real-services.ts +629 -0
  889. package/src/services/voice/workbench-scenarios.ts +407 -0
  890. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  891. package/src/services/voice-model-updater.ts +724 -0
  892. package/src/services/voice-prewarm.ts +51 -0
  893. package/src/voice-workbench.ts +71 -0
@@ -0,0 +1,791 @@
1
+ /**
2
+ * Local-inference backend interface and dispatcher.
3
+ *
4
+ * Both shipping backends are served by the SAME in-process FFI
5
+ * `libelizainference` library behind the SAME streaming symbols — the
6
+ * difference is which in-process runtime the FFI's `llm_backend_select` drives:
7
+ *
8
+ * - `llama-cpp` → the optimized in-process FFI llama.cpp path (the default).
9
+ * MTP, n-gram drafter, lookahead, `-ot` MoE offload, TurboQuant KV
10
+ * cache, mlock/no-mmap/mmproj, etc. all live here. Serves the `.gguf`.
11
+ * - `litert-lm` → the in-process LiteRT-LM backend (Android NPU / GPU
12
+ * delegate, gated `-DELIZA_ENABLE_LITERT`). Serves a `.litertlm` text
13
+ * artifact staged under `<bundleRoot>/text/`. The dispatcher passes
14
+ * `ELIZA_LLM_BACKEND=litert-lm` through the load; the C-side
15
+ * `llm_backend_select` reads it (and probes `text/*.litertlm`) and routes
16
+ * to the LiteRT factory. See `tools/omnivoice/src/llm-backend.h`.
17
+ *
18
+ * The dispatcher decides which one to use per-load based on:
19
+ *
20
+ * 1. `ELIZA_INFERENCE_BACKEND` env override — `llama-cpp` / `litert-lm` /
21
+ * `auto`. A `litert-lm` force is honoured only when the build/platform
22
+ * supports LiteRT and the bundle ships a `.litertlm` (else hard error).
23
+ * 2. A `.litertlm` text artifact in the bundle AND LiteRT support on this
24
+ * build/platform → `litert-lm`. GGUF stays the default whenever the
25
+ * LiteRT artifact or the build support is absent.
26
+ * 3. Catalog `runtime.optimizations.requiresKernel` — if any specialised
27
+ * llama.cpp kernel is required (e.g. `turbo3`), the
28
+ * dispatcher picks `llama-cpp`. Legacy bindings cannot
29
+ * provide these kernels at all.
30
+ * 4. Default: optimized llama.cpp FFI.
31
+ *
32
+ * The dispatcher does NOT own backend internals. It owns selection only,
33
+ * plus a small load-state
34
+ * cache so callers can swap models without touching either backend
35
+ * directly.
36
+ */
37
+
38
+ import { findCatalogModel } from "./catalog";
39
+ import type { StructuredGenerateParams } from "./structured-output";
40
+ import type { CatalogModel, LocalRuntimeKernel } from "./types";
41
+ import type { VerifierStreamEvent } from "./voice/types";
42
+
43
+ /**
44
+ * Per-load runtime overrides forwarded by the dispatcher to whichever
45
+ * backend handles the load. Mirror of the relevant fields on
46
+ * `LocalInferenceLoadArgs` from `active-model.ts` — kept inline here so
47
+ * `backend.ts` stays free of cross-file circular imports (active-model
48
+ * imports engine, engine imports backend).
49
+ */
50
+ export interface BackendLoadOverrides {
51
+ contextSize?: number;
52
+ cacheTypeK?: string;
53
+ cacheTypeV?: string;
54
+ gpuLayers?: number | "auto" | "max";
55
+ kvOffload?: "cpu" | "gpu" | "split" | { gpuLayers: number };
56
+ flashAttention?: boolean;
57
+ mmap?: boolean;
58
+ mlock?: boolean;
59
+ useGpu?: boolean;
60
+ /** Absolute path to a multimodal projector GGUF passed to the FFI runtime. */
61
+ mmprojPath?: string;
62
+ /** Absolute path to the MTP drafter GGUF passed to the FFI runtime. */
63
+ draftModelPath?: string;
64
+ /** Eliza-1 bundle root for direct bundle loads not present in the registry. */
65
+ bundleRoot?: string;
66
+ /** Manifest path for direct bundle loads not present in the registry. */
67
+ manifestPath?: string;
68
+ /**
69
+ * Absolute path to a `.litertlm` LiteRT-LM text artifact staged under
70
+ * `<bundleRoot>/text/`, when the bundle ships one. Presence (plus LiteRT
71
+ * build/platform support) routes the load to the `litert-lm` backend; the
72
+ * `.gguf` `modelPath` stays the GGUF default otherwise.
73
+ */
74
+ litertModelPath?: string;
75
+ }
76
+
77
+ export interface BackendPlan {
78
+ /** Absolute path to the GGUF on disk. */
79
+ modelPath: string;
80
+ /**
81
+ * Catalog model id, when known. The dispatcher uses this to pull
82
+ * `runtime.optimizations` and `runtime.mtp` — without it, we can
83
+ * only honour the env override and fall back to `capacitor-llama`.
84
+ */
85
+ modelId?: string;
86
+ /** Catalog entry, when the caller already resolved it. */
87
+ catalog?: CatalogModel;
88
+ /**
89
+ * Per-load runtime overrides resolved by the active-model coordinator.
90
+ * The dispatcher passes these through verbatim to the chosen backend
91
+ * so the in-process binding can honour cache-type and contextSize
92
+ * requests instead of silently dropping them.
93
+ */
94
+ overrides?: BackendLoadOverrides;
95
+ }
96
+
97
+ export interface GenerateArgs extends StructuredGenerateParams {
98
+ prompt: string;
99
+ stopSequences?: string[];
100
+ /** Upper bound on output tokens; defaults to 2048. */
101
+ maxTokens?: number;
102
+ /** 0..1; 0.7 default. */
103
+ temperature?: number;
104
+ /** Nucleus sampling; defaults to 0.9. */
105
+ topP?: number;
106
+ /**
107
+ * Optional cache key from the runtime's `ProviderCachePlan`. Identical
108
+ * keys reuse the same KV cache prefix: the `llama-cpp` FFI backend derives
109
+ * a deterministic slot so requests with the same key land on the same
110
+ * persisted KV state. Empty / absent keys fall through to the historical
111
+ * stateless path.
112
+ */
113
+ cacheKey?: string;
114
+ /**
115
+ * Per-request abort signal. The `llama-cpp` FFI backend honours it
116
+ * cooperatively by cancelling the active FFI stream. Callers that want
117
+ * hard cancel for things like app pause / kill-switch pass the same signal
118
+ * here that they pass into `runtime.useModel`.
119
+ */
120
+ signal?: AbortSignal;
121
+ /**
122
+ * Optional per-request backend transport budget. This should be at least as
123
+ * long as the caller's user-visible generation timeout; shorter inner
124
+ * timeouts abort long local-prefill turns before the chat route can make the
125
+ * user-facing decision.
126
+ */
127
+ requestTimeoutMs?: number;
128
+ /**
129
+ * Incremental accepted text from the backend. The `llama-cpp` FFI backend
130
+ * calls this as accepted chunks arrive, per `llmStreamNext` step (it
131
+ * streams even when a `grammar` is set).
132
+ */
133
+ onTextChunk?: (chunk: string) => void | Promise<void>;
134
+ /**
135
+ * Max tokens the FFI backend decodes per `llmStreamNext` step — i.e. the
136
+ * granularity of `onTextChunk` emission. Smaller ⇒ smoother token-by-token
137
+ * streaming to the UI at the cost of more FFI round-trips per response.
138
+ * Unset ⇒ the backend default (coarse, throughput-tuned). The text/chat
139
+ * handler sets a small value for smooth streaming; voice leaves it unset.
140
+ */
141
+ maxTokensPerStep?: number;
142
+ /**
143
+ * Whether this generation is user-visible text and therefore eligible for
144
+ * voice-mode TTS. Internal JSON / planner calls must not be spoken.
145
+ */
146
+ voiceOutput?: "user-visible" | "internal";
147
+ /**
148
+ * Native verifier stream from speculative MTP. Exact accept/reject token
149
+ * ranges let voice TTS rollback avoid inferring state from text chunks.
150
+ */
151
+ onVerifierEvent?: (event: VerifierStreamEvent) => void | Promise<void>;
152
+ }
153
+
154
+ export type GenerateResult = string;
155
+
156
+ export interface LocalGenerateWithUsageResult {
157
+ text: string;
158
+ usage?: {
159
+ prompt_tokens?: number;
160
+ completion_tokens?: number;
161
+ total_tokens?: number;
162
+ [key: string]: unknown;
163
+ };
164
+ slotId?: number;
165
+ firstTokenMs?: number | null;
166
+ mtpStats?: {
167
+ drafted: number;
168
+ accepted: number;
169
+ acceptanceRate: number | null;
170
+ };
171
+ }
172
+
173
+ /**
174
+ * The in-process runtime the FFI streaming pipe drives for a given load.
175
+ * `llama-cpp` is the default GGUF path; `litert-lm` is the LiteRT-LM
176
+ * `.litertlm` path (same FFI symbols, selected via `ELIZA_LLM_BACKEND` +
177
+ * the C-side `llm_backend_select`). This is the dispatcher's *selection*,
178
+ * distinct from `LocalInferenceBackend.id` (the implementation surface, which
179
+ * stays the single fused FFI backend regardless of the runtime it drives).
180
+ */
181
+ export type BackendId = "llama-cpp" | "litert-lm";
182
+
183
+ export interface LocalRuntimeLoadConfig {
184
+ modelId: string | null;
185
+ modelPath: string | null;
186
+ contextSize: number | null;
187
+ cacheTypeK: string | null;
188
+ cacheTypeV: string | null;
189
+ gpuLayers: number | null;
190
+ parallel: number;
191
+ binaryPath: string | null;
192
+ backend: BackendId | null;
193
+ mtp: {
194
+ specType: "draft-mtp";
195
+ draftMin: number;
196
+ draftMax: number;
197
+ } | null;
198
+ }
199
+
200
+ /**
201
+ * The backend contract every local-inference implementation satisfies.
202
+ *
203
+ * `available()` is a soft probe — it should NOT spawn anything; it just
204
+ * reports whether the backend can be used at all (e.g. is the binding
205
+ * loadable, is the binary on disk). Loading a specific model is `load()`.
206
+ */
207
+ export interface LocalInferenceBackend {
208
+ /** Identifier for the concrete backend implementation. */
209
+ readonly id: "llama-cpp";
210
+ available(): Promise<boolean>;
211
+ load(plan: BackendPlan): Promise<void>;
212
+ unload(): Promise<void>;
213
+ generate(args: GenerateArgs): Promise<GenerateResult>;
214
+ hasLoadedModel(): boolean;
215
+ currentModelPath(): string | null;
216
+
217
+ // === Optional methods — backends that don't implement them are surfaced
218
+ // === via `dispatcher.X?.()` calls in `engine.ts`, with safe fallback
219
+ // === values for query methods and actionable throws for required ops.
220
+ // ===
221
+ // === These exist so engine.ts can drive every optimized llama.cpp-specific
222
+ // === feature through the dispatcher and keep FFI as the single runtime
223
+ // === implementation surface.
224
+
225
+ /**
226
+ * Usage-instrumented variant of `generate`. Returns Anthropic-shape
227
+ * usage block plus per-turn MTP stats when available.
228
+ */
229
+ generateWithUsage?(
230
+ args: GenerateArgs & { slotId?: number },
231
+ ): Promise<LocalGenerateWithUsageResult>;
232
+
233
+ /** Vision describe via mmproj. Requires an mmproj-loaded backend. */
234
+ describeImage?(args: {
235
+ bytes: Uint8Array;
236
+ mimeType?: string;
237
+ prompt?: string;
238
+ maxTokens?: number;
239
+ temperature?: number;
240
+ signal?: AbortSignal;
241
+ /** Per-token callback for streaming vision describe (ABI v13). When set and
242
+ * the backend supports streaming, the description is decoded token-by-token
243
+ * through the same pipe as chat text; otherwise the backend returns the
244
+ * full description and ignores it. */
245
+ onTextChunk?: (chunk: string) => void | Promise<void>;
246
+ maxTokensPerStep?: number;
247
+ }): Promise<{
248
+ text: string;
249
+ projectorMs?: number;
250
+ decodeMs?: number;
251
+ }>;
252
+
253
+ /** Persist a slot's KV cache to disk under the conversation directory. */
254
+ persistConversationKv?(conversationId: string, slotId: number): Promise<void>;
255
+
256
+ /** Restore a slot's KV cache from disk into the running backend. */
257
+ restoreConversationKv?(
258
+ conversationId: string,
259
+ slotId: number,
260
+ ): Promise<boolean>;
261
+
262
+ /**
263
+ * Pre-decode `promptPrefix` into the named slot/cache key so the next
264
+ * `generate` against the same key skips re-prefill. Returns false when
265
+ * no warmup happened (already cached, no model loaded, etc).
266
+ */
267
+ prewarmConversation?(
268
+ promptPrefix: string,
269
+ opts: { slotId: number; cacheKey: string },
270
+ ): Promise<boolean>;
271
+
272
+ /**
273
+ * Resize the backend's parallel slot pool. Returns true on a real
274
+ * restart/resize, false when no resize was needed (target ≤ current, etc).
275
+ */
276
+ resizeParallel?(target: number): Promise<boolean>;
277
+
278
+ /** Active parallel slot count. Default `1` on backends without pooling. */
279
+ parallelSlots?(): number;
280
+
281
+ /** True when native MTP speculative decoding is enabled. */
282
+ mtpEnabled?(): boolean;
283
+
284
+ /** Absolute path to the loaded mmproj (vision) GGUF, or null. */
285
+ currentMmprojPath?(): string | null;
286
+
287
+ /**
288
+ * Snapshot of the backend's current load configuration (ctx, cache
289
+ * types, parallel, binary path). Used by engine introspection +
290
+ * /api/local-inference/active.
291
+ */
292
+ currentRuntimeLoadConfig?(): LocalRuntimeLoadConfig | null;
293
+ }
294
+
295
+ export type BackendOverride = "auto" | "llama-cpp" | "litert-lm";
296
+
297
+ /**
298
+ * The env name the C-side `llm_backend_select` reads to HARD-select an
299
+ * in-process runtime. The dispatcher sets it to `litert-lm` for a LiteRT load
300
+ * and clears it for a llama.cpp load so a prior LiteRT select never leaks into
301
+ * the next GGUF load. Mirrors `tools/omnivoice/src/llm-backend.h`.
302
+ */
303
+ export const ELIZA_LLM_BACKEND_ENV = "ELIZA_LLM_BACKEND" as const;
304
+
305
+ export function readBackendOverride(): BackendOverride {
306
+ const raw = process.env.ELIZA_INFERENCE_BACKEND?.trim().toLowerCase();
307
+ if (raw === "auto") return "auto";
308
+ if (raw === "llama-cpp") {
309
+ return "llama-cpp";
310
+ }
311
+ if (raw === "litert-lm" || raw === "litert" || raw === "litert_lm") {
312
+ return "litert-lm";
313
+ }
314
+ return "auto";
315
+ }
316
+
317
+ /**
318
+ * Whether the LiteRT-LM in-process backend is usable on THIS build/platform.
319
+ * The C-side `LlmBackendFactory::available()` is the runtime authority (it is
320
+ * compiled in only under `-DELIZA_ENABLE_LITERT` and reports false when the
321
+ * NPU/GPU delegate is absent), but the TS dispatcher must decide *before* the
322
+ * FFI load whether to route there at all, so we gate on the same signals the
323
+ * build/launcher exports:
324
+ *
325
+ * - `ELIZA_ENABLE_LITERT=1` — the explicit opt-in the LiteRT-enabled build
326
+ * sets (matches the `-DELIZA_ENABLE_LITERT` CMake gate).
327
+ * - `ELIZA_PLATFORM=android` — the NPU/GPU-delegate target where a LiteRT
328
+ * `.litertlm` bundle is the on-device path.
329
+ *
330
+ * A bundle that ships a `.litertlm` but runs on a build without LiteRT support
331
+ * loads the GGUF (`llama-cpp`) instead — the artifact is additive, never a
332
+ * requirement. Returns false unless one of the signals is present, so GGUF
333
+ * stays the default everywhere LiteRT is not wired.
334
+ */
335
+ export function litertBackendSupported(
336
+ env: NodeJS.ProcessEnv = process.env,
337
+ ): boolean {
338
+ if (envFlagIn(env, "ELIZA_ENABLE_LITERT")) return true;
339
+ return env.ELIZA_PLATFORM?.trim().toLowerCase() === "android";
340
+ }
341
+
342
+ function envFlagIn(env: NodeJS.ProcessEnv, name: string): boolean {
343
+ const v = env[name]?.trim().toLowerCase();
344
+ return v === "1" || v === "true" || v === "yes" || v === "on";
345
+ }
346
+
347
+ function envFlag(name: string): boolean {
348
+ const v = process.env[name]?.trim().toLowerCase();
349
+ return v === "1" || v === "true" || v === "yes" || v === "on";
350
+ }
351
+
352
+ /**
353
+ * Opt-in "reduced-optimization local mode" (the cross-platform escape hatch
354
+ * documented in `docs/voice-interactive.md` and `packages/inference/AGENTS.md`
355
+ * §4): when the installed llama.cpp runtime does not advertise the
356
+ * custom Eliza-1 KV kernels (`turbo3`/`qjl_full`/`polarquant`/…) — i.e. the
357
+ * fork hasn't been built with those kernels dispatched on this backend yet —
358
+ * setting `ELIZA_LOCAL_ALLOW_STOCK_KV=1` lets the model load anyway with
359
+ * stock `f16` KV cache instead of hard-refusing. The voice pipeline runs;
360
+ * it just runs without the KV-compression speedups on that backend. A loud
361
+ * one-time warning is emitted (see `warnReducedOptimizationLocalMode`).
362
+ *
363
+ * §3-vs-"works everywhere" reconciliation: AGENTS.md §3 says these kernels
364
+ * are *mandatory* and there is *no* "fallback to unoptimized" path. The
365
+ * user's directive for SA-1 is "works everywhere regardless of GPU". The
366
+ * reconciliation: the kernels DO build on every backend where they can be
367
+ * dispatched (Metal, CUDA, Vulkan-source-patched, CPU SIMD TUs), and this
368
+ * fallback is the *opt-in*, *loudly-warned*, *non-publishable* mode for the
369
+ * backends where dispatch isn't wired yet — it is not a silent downgrade,
370
+ * and `defaultEligible` bundles still require the verified kernels.
371
+ */
372
+ export function localAllowStockKv(): boolean {
373
+ return envFlag("ELIZA_LOCAL_ALLOW_STOCK_KV");
374
+ }
375
+
376
+ let reducedModeWarned = false;
377
+ export function warnReducedOptimizationLocalMode(detail: string): void {
378
+ if (reducedModeWarned) return;
379
+ reducedModeWarned = true;
380
+ console.warn(
381
+ `\n[local-inference] ⚠️ REDUCED-OPTIMIZATION LOCAL MODE — ${detail}\n` +
382
+ ` ELIZA_LOCAL_ALLOW_STOCK_KV=1 is set, so the model is loading with stock\n` +
383
+ ` f16 KV cache instead of the Eliza-1 TurboQuant/QJL/PolarQuant KV kernels.\n` +
384
+ ` The voice pipeline will run, but slower and using more memory than a build\n` +
385
+ ` with the kernels dispatched (Metal: all 5; CUDA: ships them; Vulkan: source-\n` +
386
+ ` patched; CPU: SIMD TUs). Rebuild the bundled llama.cpp FFI runtime\n` +
387
+ ` to get the optimized path. This mode is NOT publishable and NOT a default.\n`,
388
+ );
389
+ }
390
+
391
+ /** Reset the one-time warning latch (tests only). */
392
+ export function __resetReducedModeWarnedForTests(): void {
393
+ reducedModeWarned = false;
394
+ }
395
+
396
+ export interface BackendDecision {
397
+ /**
398
+ * In-process runtime the dispatcher routes this load to. `llama-cpp` (the
399
+ * GGUF path) is the default; `litert-lm` is selected only when the bundle
400
+ * ships a `.litertlm` AND the build/platform supports LiteRT (or it was
401
+ * forced via `ELIZA_INFERENCE_BACKEND=litert-lm`). Both run through the same
402
+ * fused `libelizainference` FFI — the selection only changes the env the
403
+ * C-side `llm_backend_select` reads.
404
+ */
405
+ backend: BackendId;
406
+ /** Why this backend was chosen — for diagnostics and warnings. */
407
+ reason:
408
+ | "env-override"
409
+ | "kernel-required"
410
+ | "preferred-backend"
411
+ | "litert-artifact"
412
+ | "default";
413
+ /**
414
+ * Absolute path to the selected `.litertlm` artifact when `backend ===
415
+ * "litert-lm"`, else undefined. The dispatcher exports
416
+ * `ELIZA_LLM_BACKEND=litert-lm` for this load so the FFI picks the LiteRT
417
+ * factory; the path is surfaced for diagnostics.
418
+ */
419
+ litertModelPath?: string;
420
+ /** Required kernels declared by the catalog, when any. */
421
+ kernels: LocalRuntimeKernel[];
422
+ /**
423
+ * Set when the dispatcher detected a kernel mismatch — the catalog model
424
+ * declares `requiresKernel: [...]` but CAPABILITIES.json next to the
425
+ * installed binary reports those kernels as unavailable. The dispatcher
426
+ * still routes to optimized llama.cpp (the only backend that could satisfy
427
+ * those kernels), but the load is expected to fail; the caller should
428
+ * surface this to the operator with a clear "rebuild your binary"
429
+ * message instead of letting the model silently misbehave.
430
+ */
431
+ unsatisfiedKernels?: LocalRuntimeKernel[];
432
+ }
433
+
434
+ /**
435
+ * Pure decision function. Easy to unit-test without spawning anything.
436
+ *
437
+ * Inputs are deliberately explicit — the caller resolves the catalog entry,
438
+ * the binary availability, the env override, and (for LiteRT) the staged
439
+ * `.litertlm` path + the build/platform support flag before calling us.
440
+ *
441
+ * `binaryKernels`, when present, is the parsed CAPABILITIES.json kernels
442
+ * map from the installed llama.cpp FFI runtime. The dispatcher uses it to
443
+ * compute `unsatisfiedKernels`; null means the binary is older / has no
444
+ * capabilities probe, in which case we trust the model's declaration and
445
+ * let the load attempt clarify.
446
+ *
447
+ * `litertModelPath` is the absolute path to a `.litertlm` text artifact when
448
+ * the bundle ships one (else undefined); `litertSupported` is whether this
449
+ * build/platform can run LiteRT ({@link litertBackendSupported}). LiteRT is
450
+ * selected only when BOTH hold, or when forced via
451
+ * `ELIZA_INFERENCE_BACKEND=litert-lm` (a forced LiteRT select with no
452
+ * `.litertlm` or no support throws — no silent downgrade to GGUF). GGUF stays
453
+ * the default in every other case.
454
+ */
455
+ export function decideBackend(input: {
456
+ override: BackendOverride;
457
+ catalog: CatalogModel | undefined;
458
+ llamaCppAvailable: boolean;
459
+ binaryKernels?: Partial<Record<LocalRuntimeKernel | string, boolean>> | null;
460
+ litertModelPath?: string | null;
461
+ litertSupported?: boolean;
462
+ }): BackendDecision {
463
+ const { override, catalog } = input;
464
+ const optimizations = catalog?.runtime?.optimizations;
465
+ const kernels = optimizations?.requiresKernel ?? [];
466
+ const unsatisfiedKernels = computeUnsatisfiedKernels(
467
+ kernels,
468
+ input.binaryKernels ?? null,
469
+ );
470
+ const litertModelPath = input.litertModelPath ?? undefined;
471
+ const litertSupported = input.litertSupported ?? false;
472
+
473
+ // `ELIZA_INFERENCE_BACKEND=litert-lm` HARD-forces the LiteRT runtime. It is a
474
+ // real select, not a hint: a forced LiteRT load with no staged `.litertlm`
475
+ // or on a build without LiteRT support is an error, never a silent fall back
476
+ // to GGUF (Commandment 8 — don't paper over a broken pipeline).
477
+ if (override === "litert-lm") {
478
+ if (!litertSupported) {
479
+ throw new Error(
480
+ "[local-inference] ELIZA_INFERENCE_BACKEND=litert-lm forces the LiteRT-LM " +
481
+ "backend, but this build/platform does not support it (set ELIZA_ENABLE_LITERT=1 " +
482
+ "on a LiteRT-enabled build, or run on android). Use llama-cpp, or unset the override.",
483
+ );
484
+ }
485
+ if (!litertModelPath) {
486
+ throw new Error(
487
+ "[local-inference] ELIZA_INFERENCE_BACKEND=litert-lm forces the LiteRT-LM " +
488
+ "backend, but the bundle ships no .litertlm text artifact under text/. " +
489
+ "Stage a .litertlm into the bundle, or use llama-cpp.",
490
+ );
491
+ }
492
+ return {
493
+ backend: "litert-lm",
494
+ reason: "env-override",
495
+ litertModelPath,
496
+ kernels,
497
+ unsatisfiedKernels,
498
+ };
499
+ }
500
+
501
+ // `ELIZA_INFERENCE_BACKEND=llama-cpp` forces the fused GGUF path explicitly.
502
+ if (override === "llama-cpp") {
503
+ return {
504
+ backend: "llama-cpp",
505
+ reason: "env-override",
506
+ kernels,
507
+ unsatisfiedKernels,
508
+ };
509
+ }
510
+
511
+ // Auto: when the bundle ships a `.litertlm` AND this build/platform supports
512
+ // LiteRT, route there (it is the on-device NPU/GPU-delegate path). GGUF stays
513
+ // the default whenever the artifact or the support is absent.
514
+ if (litertSupported && litertModelPath) {
515
+ return {
516
+ backend: "litert-lm",
517
+ reason: "litert-artifact",
518
+ litertModelPath,
519
+ kernels,
520
+ unsatisfiedKernels,
521
+ };
522
+ }
523
+
524
+ if (kernels.length > 0) {
525
+ return {
526
+ backend: "llama-cpp",
527
+ reason: "kernel-required",
528
+ kernels,
529
+ unsatisfiedKernels,
530
+ };
531
+ }
532
+ return {
533
+ backend: "llama-cpp",
534
+ reason: "default",
535
+ kernels,
536
+ unsatisfiedKernels,
537
+ };
538
+ }
539
+
540
+ /**
541
+ * Returns the subset of `required` kernels that aren't reported as `true`
542
+ * in the binary's CAPABILITIES.json. Returns undefined when no probe is
543
+ * available; an empty array means "all required kernels are satisfied".
544
+ */
545
+ function computeUnsatisfiedKernels(
546
+ required: LocalRuntimeKernel[],
547
+ binaryKernels: Partial<Record<LocalRuntimeKernel | string, boolean>> | null,
548
+ ): LocalRuntimeKernel[] | undefined {
549
+ if (required.length === 0) return undefined;
550
+ if (!binaryKernels) return undefined;
551
+ return required.filter((k) => binaryKernels[k] !== true);
552
+ }
553
+
554
+ /**
555
+ * Resolve the catalog entry for a `BackendPlan`. Plans may carry the entry
556
+ * already (when the caller has it on hand), reference it by id, or carry
557
+ * neither — in which case the dispatcher falls back to the default backend.
558
+ */
559
+ export function resolveCatalogForPlan(
560
+ plan: BackendPlan,
561
+ ): CatalogModel | undefined {
562
+ if (plan.catalog) return plan.catalog;
563
+ if (plan.modelId) return findCatalogModel(plan.modelId);
564
+ return undefined;
565
+ }
566
+
567
+ /**
568
+ * Dispatcher that fronts the in-process FFI llama.cpp backend behind the
569
+ * `LocalInferenceBackend` contract. Holds at most one active backend at a
570
+ * time — load() unloads the previous backend before loading the new one if
571
+ * they differ.
572
+ */
573
+ export class BackendDispatcher implements LocalInferenceBackend {
574
+ readonly id = "llama-cpp" as const;
575
+ // The dispatcher's `id` is informational; the active backend's id is what
576
+ // matters for diagnostics. We expose `activeBackendId()` for that.
577
+
578
+ private active: LocalInferenceBackend | null = null;
579
+
580
+ constructor(
581
+ private readonly ffiStreaming: LocalInferenceBackend,
582
+ private readonly probeFfiAvailable: () => boolean,
583
+ /**
584
+ * Optional capabilities probe that returns the kernels map from the
585
+ * installed llama.cpp FFI runtime, or null when no probe is available.
586
+ * Used to flag `unsatisfiedKernels`
587
+ * in the BackendDecision before load() so callers can give a clean
588
+ * "rebuild your fork binary" error instead of a kernel SIGSEGV at
589
+ * generation time.
590
+ */
591
+ private readonly probeBinaryKernels?: () => Partial<
592
+ Record<string, boolean>
593
+ > | null,
594
+ ) {}
595
+
596
+ async available(): Promise<boolean> {
597
+ return this.ffiStreaming.available();
598
+ }
599
+
600
+ activeBackendId(): "llama-cpp" | null {
601
+ return this.active ? this.active.id : null;
602
+ }
603
+
604
+ hasLoadedModel(): boolean {
605
+ return this.active?.hasLoadedModel() ?? false;
606
+ }
607
+
608
+ currentModelPath(): string | null {
609
+ return this.active?.currentModelPath() ?? null;
610
+ }
611
+
612
+ decide(plan: BackendPlan): BackendDecision {
613
+ const catalog = resolveCatalogForPlan(plan);
614
+ return decideBackend({
615
+ override: readBackendOverride(),
616
+ catalog,
617
+ llamaCppAvailable: this.probeFfiAvailable(),
618
+ binaryKernels: this.probeBinaryKernels?.() ?? null,
619
+ litertModelPath: plan.overrides?.litertModelPath ?? null,
620
+ litertSupported: litertBackendSupported(),
621
+ });
622
+ }
623
+
624
+ async load(plan: BackendPlan): Promise<void> {
625
+ const decision = this.decide(plan);
626
+
627
+ // Tell the C-side `llm_backend_select` which in-process runtime to drive.
628
+ // `litert-lm` sets the HARD select; the GGUF path clears it so a prior
629
+ // LiteRT select never leaks into the next llama.cpp load. The FFI library
630
+ // is the same singleton either way (`this.ffiStreaming`); only the env
631
+ // (read at `_open`) changes which factory it picks.
632
+ if (decision.backend === "litert-lm") {
633
+ process.env[ELIZA_LLM_BACKEND_ENV] = "litert-lm";
634
+ } else {
635
+ delete process.env[ELIZA_LLM_BACKEND_ENV];
636
+ }
637
+
638
+ let effectivePlan = plan;
639
+ // Kernel-mismatch enforcement is a llama.cpp-only contract — the LiteRT
640
+ // `.litertlm` path uses none of the fork's KV kernels, so skip it there.
641
+ if (
642
+ decision.backend === "llama-cpp" &&
643
+ decision.unsatisfiedKernels &&
644
+ decision.unsatisfiedKernels.length > 0
645
+ ) {
646
+ const missing = decision.unsatisfiedKernels.join(", ");
647
+ if (localAllowStockKv()) {
648
+ // Reduced-optimization local mode: the build hasn't dispatched these
649
+ // kernels on this backend yet, but the user opted into running with
650
+ // stock f16 KV instead of hard-refusing. Strip any custom cache-type
651
+ // override from the plan so the FFI runtime uses f16, and warn
652
+ // loudly exactly once.
653
+ warnReducedOptimizationLocalMode(
654
+ `catalog model requires kernel(s) {${missing}}, not advertised by the installed llama.cpp FFI runtime`,
655
+ );
656
+ if (
657
+ plan.overrides &&
658
+ (plan.overrides.cacheTypeK !== undefined ||
659
+ plan.overrides.cacheTypeV !== undefined)
660
+ ) {
661
+ const { cacheTypeK: _k, cacheTypeV: _v, ...rest } = plan.overrides;
662
+ effectivePlan = { ...plan, overrides: { ...rest } };
663
+ }
664
+ } else {
665
+ throw new Error(
666
+ `[local-inference] Catalog model requires kernel(s) {${missing}}, but the installed llama.cpp FFI runtime does not advertise them. Rebuild the bundled runtime for this target, pick a different model, or set ELIZA_LOCAL_ALLOW_STOCK_KV=1 to load with stock f16 KV (reduced-optimization local mode — loud warning, not publishable).`,
667
+ );
668
+ }
669
+ }
670
+ if (!this.probeFfiAvailable()) {
671
+ throw new Error(
672
+ "[local-inference] Optimized llama.cpp requires the in-process FFI backend. " +
673
+ "Install/rebuild libelizainference with streaming-LLM + MTP support; " +
674
+ "server backends are not supported.",
675
+ );
676
+ }
677
+ const target = this.ffiStreaming;
678
+ if (this.active && this.active !== target) {
679
+ await this.active.unload();
680
+ }
681
+ this.active = target;
682
+ await target.load(effectivePlan);
683
+ }
684
+
685
+ async unload(): Promise<void> {
686
+ const active = this.active;
687
+ this.active = null;
688
+ if (active) await active.unload();
689
+ }
690
+
691
+ async generate(args: GenerateArgs): Promise<GenerateResult> {
692
+ if (!this.active) {
693
+ throw new Error(
694
+ "[local-inference] No backend loaded. Call load() before generate().",
695
+ );
696
+ }
697
+ return this.active.generate(args);
698
+ }
699
+
700
+ // === Forwarders for the optional methods on LocalInferenceBackend.
701
+ // === Required ops (generate / describe / persist / restore / prewarm /
702
+ // === resize / restart) throw an actionable error when the active
703
+ // === backend doesn't implement them, pointing at the FFI parity gap.
704
+ // === Query getters return safe defaults that match the engine's
705
+ // === existing guard expectations.
706
+
707
+ async generateWithUsage(
708
+ args: GenerateArgs & { slotId?: number },
709
+ ): Promise<LocalGenerateWithUsageResult> {
710
+ this.ensureLoaded();
711
+ if (!this.active?.generateWithUsage) {
712
+ throw this.notSupported("generateWithUsage");
713
+ }
714
+ return this.active?.generateWithUsage(args);
715
+ }
716
+
717
+ async describeImage(
718
+ args: Parameters<NonNullable<LocalInferenceBackend["describeImage"]>>[0],
719
+ ): ReturnType<NonNullable<LocalInferenceBackend["describeImage"]>> {
720
+ this.ensureLoaded();
721
+ if (!this.active?.describeImage) {
722
+ throw this.notSupported(
723
+ "describeImage",
724
+ "vision describe requires an mmproj-loaded llama.cpp FFI runtime. Load an Eliza-1 bundle with its vision projector.",
725
+ );
726
+ }
727
+ return this.active?.describeImage(args);
728
+ }
729
+
730
+ async persistConversationKv(
731
+ conversationId: string,
732
+ slotId: number,
733
+ ): Promise<void> {
734
+ this.ensureLoaded();
735
+ if (!this.active?.persistConversationKv) return;
736
+ await this.active?.persistConversationKv(conversationId, slotId);
737
+ }
738
+
739
+ async restoreConversationKv(
740
+ conversationId: string,
741
+ slotId: number,
742
+ ): Promise<boolean> {
743
+ this.ensureLoaded();
744
+ if (!this.active?.restoreConversationKv) return false;
745
+ return this.active?.restoreConversationKv(conversationId, slotId);
746
+ }
747
+
748
+ async prewarmConversation(
749
+ promptPrefix: string,
750
+ opts: { slotId: number; cacheKey: string },
751
+ ): Promise<boolean> {
752
+ this.ensureLoaded();
753
+ if (!this.active?.prewarmConversation) return false;
754
+ return this.active?.prewarmConversation(promptPrefix, opts);
755
+ }
756
+
757
+ async resizeParallel(target: number): Promise<boolean> {
758
+ this.ensureLoaded();
759
+ if (!this.active?.resizeParallel) return false;
760
+ return this.active?.resizeParallel(target);
761
+ }
762
+
763
+ parallelSlots(): number {
764
+ return this.active?.parallelSlots?.() ?? 1;
765
+ }
766
+
767
+ mtpEnabled(): boolean {
768
+ return this.active?.mtpEnabled?.() ?? false;
769
+ }
770
+
771
+ currentMmprojPath(): string | null {
772
+ return this.active?.currentMmprojPath?.() ?? null;
773
+ }
774
+
775
+ currentRuntimeLoadConfig(): LocalRuntimeLoadConfig | null {
776
+ return this.active?.currentRuntimeLoadConfig?.() ?? null;
777
+ }
778
+
779
+ private ensureLoaded(): void {
780
+ if (!this.active) {
781
+ throw new Error(
782
+ "[local-inference] No backend loaded. Call load() first.",
783
+ );
784
+ }
785
+ }
786
+
787
+ private notSupported(method: string, detail?: string): Error {
788
+ const base = `[local-inference] Active backend (${this.active?.id ?? "<none>"}) does not implement ${method}.`;
789
+ return new Error(detail ? `${base} ${detail}` : base);
790
+ }
791
+ }