@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,382 @@
1
+ /**
2
+ * In-process FFI streaming backend adapter.
3
+ *
4
+ * Implements `LocalInferenceBackend` as the optimized in-process
5
+ * llama.cpp path used by Eliza-1 on desktop and mobile.
6
+ *
7
+ * What this class deliberately does NOT do:
8
+ * - Own the FFI context. The runtime provider passed to this class owns
9
+ * native load/unload and hands back the binding, context, and tokenizer.
10
+ * - Decode image bytes or call mtmd directly. Vision requests are validated
11
+ * here, then forwarded to runtimes that expose `describeImage`.
12
+ */
13
+
14
+ import type {
15
+ BackendPlan,
16
+ GenerateArgs,
17
+ GenerateResult,
18
+ LocalGenerateWithUsageResult,
19
+ LocalInferenceBackend,
20
+ } from "./backend";
21
+ import type { FfiStreamingRunner } from "./ffi-streaming-runner";
22
+ import type {
23
+ LlmCtxHandle,
24
+ LlmStreamingBinding,
25
+ } from "./llm-streaming-binding";
26
+ import { resolveGuidedDecodeForParams } from "./structured-output";
27
+
28
+ /**
29
+ * Constructor-injected adapter that resolves the FFI binding, context, and
30
+ * tokenizer for a given load. Two responsibilities:
31
+ *
32
+ * 1. Decide whether the FFI path is viable on the current binding
33
+ * (`supported()`). Mirrors `LlmStreamingBinding.llmStreamSupported()`
34
+ * plus any higher-level constraints (e.g. dylib path exists, build
35
+ * target matches the bundle's required kernels).
36
+ * 2. Lifecycle: `acquire(plan)` returns the FFI runner ready for
37
+ * `generate()` against the requested model, plus a tokenizer that
38
+ * matches that model's vocab. `release()` tears everything down.
39
+ *
40
+ * Production runtime implementation: the fused libelizainference path
41
+ * (`desktop-fused-ffi-backend-runtime.ts`), which wraps `ElizaInferenceFfi`
42
+ * via `wrapElizaInferenceFfi()` from `services/llm-streaming-binding.ts`.
43
+ * libllama has been retired — there is no second runtime behind this slot.
44
+ */
45
+ export interface FfiBackendRuntime {
46
+ supported(): boolean;
47
+ acquire(plan: BackendPlan): Promise<FfiBackendSession>;
48
+ release(): Promise<void>;
49
+ /**
50
+ * Optional parallel-slot pool surface. When the runtime exposes a
51
+ * ctx pool (the desktop libllama path does), `parallelSlots()`
52
+ * reports the live count and `resizeParallel(N)` grows/shrinks it.
53
+ * Runtimes without a pool report 1 and ignore resize requests.
54
+ */
55
+ parallelSlots?(): number;
56
+ resizeParallel?(target: number): Promise<boolean>;
57
+ }
58
+
59
+ /**
60
+ * Result of `FfiBackendRuntime.acquire()` — a live FFI session bound to a
61
+ * specific loaded model.
62
+ */
63
+ export interface FfiBackendSession {
64
+ readonly binding: LlmStreamingBinding;
65
+ readonly ctx: LlmCtxHandle;
66
+ readonly runner: FfiStreamingRunner;
67
+ /**
68
+ * Tokenize a prompt string into model token ids using the loaded model's
69
+ * tokenizer. The vocab MUST match the GGUF — mismatches produce gibberish
70
+ * silently. The runtime is responsible for asserting this at acquire
71
+ * time.
72
+ */
73
+ readonly tokenize: (prompt: string) => Int32Array;
74
+ /**
75
+ * Native MTP speculative-decoding policy from the catalog. `null`
76
+ * disables speculative decoding for this session.
77
+ */
78
+ readonly mtp: {
79
+ specType: "draft-mtp";
80
+ draftMin: number;
81
+ draftMax: number;
82
+ gpuLayers: number | "auto";
83
+ } | null;
84
+ /**
85
+ * Absolute path to a *separate* MTP drafter GGUF resolved during load.
86
+ * `null` means same-file MTP: the NextN head is embedded in the main
87
+ * text GGUF and the native runner activates `--spec-type draft-mtp`
88
+ * with no `-md`. Speculative decoding is governed by `mtp`, not by the
89
+ * presence of this path.
90
+ */
91
+ readonly draftModelPath: string | null;
92
+ /**
93
+ * Multimodal projector (mmproj) GGUF path for vision describe. Resolved
94
+ * from `plan.overrides.mmprojPath` at acquire time. `null` disables
95
+ * vision — `describeImage` then throws an actionable error.
96
+ */
97
+ readonly mmprojPath: string | null;
98
+ /**
99
+ * Per-load runtime config the fused libelizainference path applies at its
100
+ * first `llmStreamOpen` (gpuLayers + KV-cache quant types). The desktop
101
+ * libllama runtime applies these at `loadModel()` instead and leaves this
102
+ * `null` — the backend forwards them into the runner's per-call config only
103
+ * when present, so the fused path mirrors the libllama load decision without
104
+ * the libllama path double-applying them.
105
+ */
106
+ readonly loadConfig?: {
107
+ gpuLayers?: number;
108
+ cacheTypeK?: string | null;
109
+ cacheTypeV?: string | null;
110
+ } | null;
111
+ }
112
+
113
+ /**
114
+ * Adapter that satisfies `LocalInferenceBackend` by delegating to
115
+ * `FfiStreamingRunner`. The `id` is `"llama-cpp"` because this is the
116
+ * in-process variant of the optimized llama.cpp path.
117
+ */
118
+ export class FfiStreamingBackend implements LocalInferenceBackend {
119
+ readonly id = "llama-cpp" as const;
120
+
121
+ private session: FfiBackendSession | null = null;
122
+ private loadedPath: string | null = null;
123
+
124
+ constructor(private readonly runtime: FfiBackendRuntime) {}
125
+
126
+ async available(): Promise<boolean> {
127
+ return this.runtime.supported();
128
+ }
129
+
130
+ hasLoadedModel(): boolean {
131
+ return this.session !== null;
132
+ }
133
+
134
+ currentModelPath(): string | null {
135
+ return this.loadedPath;
136
+ }
137
+
138
+ async load(plan: BackendPlan): Promise<void> {
139
+ if (this.session) await this.unload();
140
+ this.session = await this.runtime.acquire(plan);
141
+ this.loadedPath = plan.modelPath;
142
+ }
143
+
144
+ async unload(): Promise<void> {
145
+ // Await the native release BEFORE nulling our refs. If we null first and
146
+ // release() throws (a raw bun:ffi free can reject), this.session would be
147
+ // null while the runtime still holds a live session — the next load()
148
+ // would skip unload() and call acquire(), which throws on its live-session
149
+ // guard, wedging the backend until process restart. The finally guarantees
150
+ // our refs are cleared regardless so a failed release can't leave a stale
151
+ // "loaded" view either.
152
+ try {
153
+ await this.runtime.release();
154
+ } finally {
155
+ this.session = null;
156
+ this.loadedPath = null;
157
+ }
158
+ }
159
+
160
+ async generate(args: GenerateArgs): Promise<GenerateResult> {
161
+ const result = await this.generateWithUsage(args);
162
+ return result.text;
163
+ }
164
+
165
+ async generateWithUsage(
166
+ args: GenerateArgs & { slotId?: number },
167
+ ): Promise<LocalGenerateWithUsageResult> {
168
+ if (!this.session) {
169
+ throw new Error(
170
+ "[ffi-streaming-backend] generate() called before load() — " +
171
+ "the FFI session has not been acquired.",
172
+ );
173
+ }
174
+ const { runner, tokenize, mtp, draftModelPath, loadConfig } = this.session;
175
+ // Force the structured-reply envelope: compile the GBNF from the
176
+ // caller's `responseSkeleton` / explicit `grammar` (precedence handled
177
+ // by `resolveGuidedDecodeForParams`, mirroring `engine.ts`'s
178
+ // `resolveBindingGrammarSource`). The native session installs it FIRST
179
+ // in the sampler chain so every sampled token is grammar-constrained.
180
+ const gbnfGrammar =
181
+ resolveGuidedDecodeForParams(args).grammar?.source ?? null;
182
+ const result = await runner.generateWithUsage({
183
+ promptTokens: tokenize(args.prompt),
184
+ slotId: args.slotId ?? -1,
185
+ cacheKey: args.cacheKey,
186
+ maxTokens: args.maxTokens ?? 2048,
187
+ temperature: args.temperature ?? 0.7,
188
+ topP: args.topP ?? 0.9,
189
+ topK: 40,
190
+ repeatPenalty: 1.1,
191
+ draftMin: mtp?.draftMin ?? 0,
192
+ draftMax: mtp?.draftMax ?? 0,
193
+ draftModelPath,
194
+ gbnfGrammar,
195
+ gpuLayers: loadConfig?.gpuLayers,
196
+ cacheTypeK: loadConfig?.cacheTypeK,
197
+ cacheTypeV: loadConfig?.cacheTypeV,
198
+ signal: args.signal,
199
+ onTextChunk: args.onTextChunk,
200
+ onVerifierEvent: args.onVerifierEvent,
201
+ });
202
+ return {
203
+ text: result.text,
204
+ slotId: result.slotId,
205
+ firstTokenMs: result.firstTokenMs,
206
+ usage: {
207
+ completion_tokens: result.accepted,
208
+ },
209
+ mtpStats: {
210
+ drafted: result.drafted,
211
+ accepted: result.accepted,
212
+ acceptanceRate:
213
+ result.drafted > 0 ? result.accepted / result.drafted : null,
214
+ },
215
+ };
216
+ }
217
+
218
+ // === Optional `LocalInferenceBackend` methods routed through the runner.
219
+
220
+ /**
221
+ * Persist the active session's KV state to a per-conversation file.
222
+ * v1 uses `llama_state_seq_save_file` against seq_id=0. The on-disk file
223
+ * path mirrors `ffi-streaming-backend.ts`'s conversation-keyed slot layout
224
+ * (`<cacheDir>/<conversationId>/<slotId>.kv`) so a switch between
225
+ * FFI and subprocess can resume each other's slots — once both
226
+ * paths agree on the file format.
227
+ */
228
+ async persistConversationKv(
229
+ conversationId: string,
230
+ slotId: number,
231
+ ): Promise<void> {
232
+ if (!this.session) return; // no active session to persist
233
+ const { binding } = this.session;
234
+ if (!binding.llmStreamSaveSlot) return; // adapter doesn't support save
235
+ const filename = slotFilename(conversationId, slotId);
236
+ // llmStreamSaveSlot is per-stream in the binding API; the desktop
237
+ // adapter currently saves the ctx-wide seq=0 state, so the stream
238
+ // handle is informational. We pass the runner's most recent
239
+ // stream id when available; 0n is the binding-level sentinel.
240
+ binding.llmStreamSaveSlot({ stream: 0n, filename });
241
+ }
242
+
243
+ /** Restore a previously persisted KV state. Mirror of `persistConversationKv`. */
244
+ async restoreConversationKv(
245
+ conversationId: string,
246
+ slotId: number,
247
+ ): Promise<boolean> {
248
+ if (!this.session) return false;
249
+ const { binding } = this.session;
250
+ if (!binding.llmStreamRestoreSlot) return false;
251
+ const filename = slotFilename(conversationId, slotId);
252
+ binding.llmStreamRestoreSlot({ stream: 0n, filename });
253
+ return true;
254
+ }
255
+
256
+ /**
257
+ * Pre-decode `promptPrefix` so the next `generate` against the same
258
+ * `cacheKey` skips re-prefill. Returns `false` when the prefix is
259
+ * empty or no session is loaded. The FFI runner serializes by
260
+ * `cacheKey` internally via the `slotInFlight` map.
261
+ */
262
+ async prewarmConversation(
263
+ promptPrefix: string,
264
+ opts: { slotId: number; cacheKey: string },
265
+ ): Promise<boolean> {
266
+ if (!this.session || promptPrefix.length === 0) return false;
267
+ const { runner, tokenize, mtp, draftModelPath, loadConfig } = this.session;
268
+ await runner.generateWithUsage({
269
+ promptTokens: tokenize(promptPrefix),
270
+ slotId: opts.slotId,
271
+ cacheKey: opts.cacheKey,
272
+ maxTokens: 0, // prefill-only: feed prompt, generate nothing
273
+ temperature: 0,
274
+ topP: 1,
275
+ topK: 1,
276
+ repeatPenalty: 1,
277
+ draftMin: mtp?.draftMin ?? 0,
278
+ draftMax: mtp?.draftMax ?? 0,
279
+ draftModelPath,
280
+ gpuLayers: loadConfig?.gpuLayers,
281
+ cacheTypeK: loadConfig?.cacheTypeK,
282
+ cacheTypeV: loadConfig?.cacheTypeV,
283
+ });
284
+ return true;
285
+ }
286
+
287
+ /**
288
+ * True when Eliza-1 native MTP is active for the loaded target model.
289
+ * Covers both shapes: same-file MTP (NextN head embedded in the text
290
+ * GGUF, `draftModelPath` null) and separate-drafter MTP.
291
+ */
292
+ mtpEnabled(): boolean {
293
+ return Boolean(this.session?.mtp);
294
+ }
295
+
296
+ /**
297
+ * Parallel-slot pool size. Routed to the runtime's ctx pool when one
298
+ * exists; defaults to 1 otherwise.
299
+ */
300
+ parallelSlots(): number {
301
+ return this.runtime.parallelSlots?.() ?? 1;
302
+ }
303
+
304
+ /**
305
+ * Grow or shrink the runtime's ctx pool to `target` slots. Returns
306
+ * false when the runtime has no pool surface (in which case parallel
307
+ * resize is ignored — the conversation registry tolerates
308
+ * fixed 1-slot operation).
309
+ */
310
+ async resizeParallel(target: number): Promise<boolean> {
311
+ if (!this.runtime.resizeParallel) return false;
312
+ return this.runtime.resizeParallel(target);
313
+ }
314
+
315
+ /**
316
+ * Vision describe via mmproj. Requires:
317
+ * - The shim built with `-DELIZA_ENABLE_VISION=1` (ELIZA_ENABLE_VISION=1
318
+ * at the build script env). When absent the runtime throws an
319
+ * actionable error.
320
+ * - `plan.overrides.mmprojPath` was passed at load time so the
321
+ * adapter knows which mmproj GGUF to feed clip.
322
+ */
323
+ async describeImage(args: {
324
+ bytes: Uint8Array;
325
+ mimeType?: string;
326
+ prompt?: string;
327
+ maxTokens?: number;
328
+ temperature?: number;
329
+ signal?: AbortSignal;
330
+ }): Promise<{ text: string; projectorMs?: number; decodeMs?: number }> {
331
+ if (!this.session) {
332
+ throw new Error(
333
+ "[ffi-streaming-backend] describeImage before load — no session acquired",
334
+ );
335
+ }
336
+ if (!this.session.mmprojPath) {
337
+ throw new Error(
338
+ "[ffi-streaming-backend] describeImage: no mmproj GGUF loaded for this session. " +
339
+ "Pass `overrides.mmprojPath` in the BackendPlan when activating a vision-capable bundle.",
340
+ );
341
+ }
342
+ // The runtime adapter has visionSupported() + describeImage(args).
343
+ // We re-shape `bytes` → `imageBytes` and merge in the resolved
344
+ // mmprojPath; the rest of args pass through unchanged.
345
+ const runtime = this.runtime as unknown as {
346
+ describeImage?: (args: {
347
+ imageBytes: Uint8Array;
348
+ mmprojPath: string;
349
+ prompt?: string;
350
+ maxTokens?: number;
351
+ temperature?: number;
352
+ signal?: AbortSignal;
353
+ }) => Promise<{ text: string; projectorMs?: number; decodeMs?: number }>;
354
+ };
355
+ if (!runtime.describeImage) {
356
+ throw new Error(
357
+ "[ffi-streaming-backend] runtime lacks describeImage support",
358
+ );
359
+ }
360
+ return runtime.describeImage({
361
+ imageBytes: args.bytes,
362
+ mmprojPath: this.session.mmprojPath,
363
+ prompt: args.prompt,
364
+ maxTokens: args.maxTokens,
365
+ temperature: args.temperature,
366
+ signal: args.signal,
367
+ });
368
+ }
369
+
370
+ currentMmprojPath(): string | null {
371
+ return this.session?.mmprojPath ?? null;
372
+ }
373
+ }
374
+
375
+ /**
376
+ * Conversation-keyed slot file layout. Mirrors `cache-bridge.ts`'s
377
+ * `slotSavePath` so an `ELIZA_INFERENCE_BACKEND=http` opt-out can resume
378
+ * an FFI-saved conversation and vice-versa once the file formats align.
379
+ */
380
+ function slotFilename(conversationId: string, slotId: number): string {
381
+ return `${conversationId}__slot${slotId}.kv`;
382
+ }
@@ -0,0 +1,122 @@
1
+ /**
2
+ * In-process streaming-LLM runner.
3
+ *
4
+ * FFI streaming-LLM ABI declared in `ffi-streaming-llm.h`. The
5
+ * token-by-token loop hands `onTextChunk` accepted chunks and surfaces
6
+ * verifier events from native MTP.
7
+ *
8
+ * This file deliberately does not own the FFI context or the binding
9
+ * itself. It takes a narrow `LlmStreamingBinding` (see
10
+ * `services/llm-streaming-binding.ts`) + an opaque `LlmCtxHandle` as
11
+ * constructor arguments — that way it can be driven by libelizainference
12
+ * (via `wrapElizaInferenceFfi`) or any desktop libllama shim adapter without
13
+ * dragging in TTS/ASR surfaces. A single context can host concurrent generation
14
+ * sessions (one per pinned slot); the runner serialises with
15
+ * `slotInFlight`.
16
+ *
17
+ * Single-flight: lock map keyed by slot id, slot id `-1` unlocked. Two concurrent generates
18
+ * against the same pinned slot would interleave KV cache state, so the
19
+ * runner serializes them at the JS layer.
20
+ */
21
+ import type { LlmCtxHandle, LlmStreamingBinding } from "./llm-streaming-binding";
22
+ import type { LlmStreamHandle, LlmStreamStep } from "./voice/ffi-bindings";
23
+ import type { VerifierStreamEvent } from "./voice/types";
24
+ export interface FfiStreamingGenerateArgs {
25
+ /** Pre-tokenized prompt — the runner does not detokenize. */
26
+ promptTokens: Int32Array;
27
+ /** Pinned slot id; -1 disables pinning (any free slot). */
28
+ slotId: number;
29
+ /** Optional prompt cache key used to derive a slot when `slotId === -1`. */
30
+ cacheKey?: string;
31
+ maxTokens: number;
32
+ temperature: number;
33
+ topP: number;
34
+ topK: number;
35
+ repeatPenalty: number;
36
+ draftMin: number;
37
+ draftMax: number;
38
+ /** Reserved for separate draft-model speculation; null for Eliza-1 MTP. */
39
+ draftModelPath: string | null;
40
+ /**
41
+ * Per-load GPU offload (ABI v8). Forwarded into the native session config
42
+ * on `llmStreamOpen`. The fused libelizainference path loads the text model
43
+ * once per ctx, so the FIRST session's value wins; later sessions reuse the
44
+ * resident model. `undefined` selects the runtime default (all layers).
45
+ * The desktop libllama path already applies gpuLayers at `loadModel()`, so
46
+ * it ignores this field — it is load-time config, threaded here only so the
47
+ * fused runner can mirror the libllama load decision.
48
+ */
49
+ gpuLayers?: number;
50
+ /**
51
+ * KV-cache K/V quant type names (ABI v8), e.g. "qjl1_256" / "q4_polar".
52
+ * Same load-time semantics as `gpuLayers`: forwarded into the fused
53
+ * session config so the first `llmStreamOpen` applies the quantized cache.
54
+ */
55
+ cacheTypeK?: string | null;
56
+ cacheTypeV?: string | null;
57
+ /**
58
+ * GBNF grammar source forcing the structured-reply envelope. Passed to
59
+ * the native session's `llmStreamOpen` config so sampling is
60
+ * grammar-constrained. `null` disables the constraint (free generation).
61
+ */
62
+ gbnfGrammar?: string | null;
63
+ /** Cancellation signal — fires `llmStreamCancel` on the active session. */
64
+ signal?: AbortSignal;
65
+ /** Per-chunk text callback. */
66
+ onTextChunk?: (chunk: string) => void | Promise<void>;
67
+ /** Speculative accept/reject events from MTP verification. */
68
+ onVerifierEvent?: (event: VerifierStreamEvent) => void | Promise<void>;
69
+ }
70
+ export interface FfiStreamingGenerateResult {
71
+ text: string;
72
+ slotId: number;
73
+ firstTokenMs: number | null;
74
+ drafted: number;
75
+ accepted: number;
76
+ }
77
+ /**
78
+ * Backend used by the mobile and desktop FFI routes.
79
+ */
80
+ export declare class FfiStreamingRunner {
81
+ private readonly ffi;
82
+ private readonly ctx;
83
+ private readonly slotInFlight;
84
+ /**
85
+ * Constructor takes the narrow `LlmStreamingBinding` (see
86
+ * `services/llm-streaming-binding.ts`) so both libelizainference (via
87
+ * `wrapElizaInferenceFfi`) and desktop libllama adapters can
88
+ * satisfy it. The runner never touches TTS/ASR/mmap surfaces.
89
+ */
90
+ constructor(ffi: LlmStreamingBinding, ctx: LlmCtxHandle);
91
+ /**
92
+ * Run one generation. Mirrors `MtpLlamaServer.generateWithUsage()`
93
+ * — same single-flight rule, same callback shape, same result block
94
+ * minus the metrics scrape (FFI does not have a `/metrics` endpoint).
95
+ */
96
+ generateWithUsage(args: FfiStreamingGenerateArgs): Promise<FfiStreamingGenerateResult>;
97
+ /**
98
+ * Async-iterable variant. Yields each accepted-token batch as it lands
99
+ * so callers that want token-grained control (e.g. the voice scheduler
100
+ * driving phrase-chunking off accept/reject events) don't have to
101
+ * register a callback. Internally still routes through `generateWithUsage`
102
+ * via a pump so the single-flight rule applies.
103
+ */
104
+ generateStream(args: FfiStreamingGenerateArgs): AsyncIterable<LlmStreamStep>;
105
+ /**
106
+ * Save the streaming slot KV state to disk. Best called between turns
107
+ * — calling mid-stream is racy and the FFI side is allowed to refuse.
108
+ * Surfaced here so the conversation registry can persist between
109
+ * mobile backgrounds the same way `MtpLlamaServer.persistSlot` does.
110
+ */
111
+ saveSlot(stream: LlmStreamHandle, filename: string): void;
112
+ /** Restore a previously-saved slot KV file into a fresh session. */
113
+ restoreSlot(stream: LlmStreamHandle, filename: string): void;
114
+ private runGenerate;
115
+ /**
116
+ * Shared inner loop. Opens the session, runs the prefill + next pump,
117
+ * forwards each step through `onStep` plus the optional caller
118
+ * callbacks, and wires abort + cancel.
119
+ */
120
+ private runGenerateInner;
121
+ }
122
+ //# sourceMappingURL=ffi-streaming-runner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ffi-streaming-runner.d.ts","sourceRoot":"","sources":["ffi-streaming-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAIH,OAAO,KAAK,EACX,YAAY,EACZ,mBAAmB,EACnB,MAAM,yBAAyB,CAAC;AACjC,OAAO,KAAK,EAAE,eAAe,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAC3E,OAAO,KAAK,EAAa,mBAAmB,EAAE,MAAM,eAAe,CAAC;AAEpE,MAAM,WAAW,wBAAwB;IACxC,6DAA6D;IAC7D,YAAY,EAAE,UAAU,CAAC;IACzB,2DAA2D;IAC3D,MAAM,EAAE,MAAM,CAAC;IACf,4EAA4E;IAC5E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;OAIG;IACH,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,2EAA2E;IAC3E,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,+BAA+B;IAC/B,WAAW,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACtD,8DAA8D;IAC9D,eAAe,CAAC,EAAE,CAAC,KAAK,EAAE,mBAAmB,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACvE;AAED,MAAM,WAAW,0BAA0B;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;CACjB;AAMD;;GAEG;AACH,qBAAa,kBAAkB;IAU7B,OAAO,CAAC,QAAQ,CAAC,GAAG;IACpB,OAAO,CAAC,QAAQ,CAAC,GAAG;IAVrB,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAoC;IAEjE;;;;;OAKG;gBAEe,GAAG,EAAE,mBAAmB,EACxB,GAAG,EAAE,YAAY;IAGnC;;;;OAIG;IACG,iBAAiB,CACtB,IAAI,EAAE,wBAAwB,GAC5B,OAAO,CAAC,0BAA0B,CAAC;IAsBtC;;;;;;OAMG;IACI,cAAc,CACpB,IAAI,EAAE,wBAAwB,GAC5B,aAAa,CAAC,aAAa,CAAC;IAmD/B;;;;;OAKG;IACH,QAAQ,CAAC,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;IASzD,oEAAoE;IACpE,WAAW,CAAC,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI;YAW9C,WAAW;IA2BzB;;;;OAIG;YACW,gBAAgB;CAmG9B"}
@@ -0,0 +1,60 @@
1
+ import { describe, expect, it, vi } from "vitest";
2
+ import { FfiStreamingRunner } from "./ffi-streaming-runner";
3
+ import type {
4
+ LlmCtxHandle,
5
+ LlmStreamingBinding,
6
+ } from "./llm-streaming-binding";
7
+ import type { LlmStreamHandle } from "./voice/ffi-bindings";
8
+
9
+ describe("FfiStreamingRunner prewarm", () => {
10
+ it("treats maxTokens: 0 as prefill-only and never calls next-token generation", async () => {
11
+ const stream = 7n as LlmStreamHandle;
12
+ const binding: LlmStreamingBinding = {
13
+ llmStreamSupported: () => true,
14
+ llmStreamOpen: vi.fn().mockReturnValue(stream),
15
+ llmStreamPrefill: vi.fn(),
16
+ llmStreamNext: vi.fn().mockReturnValue({
17
+ tokens: [1],
18
+ text: "x",
19
+ done: true,
20
+ drafterDrafted: 0,
21
+ drafterAccepted: 0,
22
+ }),
23
+ llmStreamCancel: vi.fn(),
24
+ llmStreamClose: vi.fn(),
25
+ };
26
+ const onTextChunk = vi.fn();
27
+ const runner = new FfiStreamingRunner(binding, 1n as LlmCtxHandle);
28
+ const promptTokens = new Int32Array([11, 12, 13]);
29
+
30
+ const result = await runner.generateWithUsage({
31
+ promptTokens,
32
+ slotId: 0,
33
+ maxTokens: 0,
34
+ temperature: 0,
35
+ topP: 1,
36
+ topK: 0,
37
+ repeatPenalty: 1,
38
+ draftMin: 0,
39
+ draftMax: 0,
40
+ draftModelPath: null,
41
+ onTextChunk,
42
+ });
43
+
44
+ expect(binding.llmStreamOpen).toHaveBeenCalledTimes(1);
45
+ expect(binding.llmStreamPrefill).toHaveBeenCalledWith({
46
+ stream,
47
+ tokens: promptTokens,
48
+ });
49
+ expect(binding.llmStreamNext).not.toHaveBeenCalled();
50
+ expect(onTextChunk).not.toHaveBeenCalled();
51
+ expect(binding.llmStreamClose).toHaveBeenCalledWith(stream);
52
+ expect(result).toEqual({
53
+ text: "",
54
+ slotId: 0,
55
+ firstTokenMs: null,
56
+ drafted: 0,
57
+ accepted: 0,
58
+ });
59
+ });
60
+ });