@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,442 @@
1
+ /**
2
+ * TypeScript-side ABI surface for the in-process FFI streaming LLM path.
3
+ *
4
+ * This file mirrors the C header at
5
+ * `packages/inference/llama.cpp/omnivoice/src/ffi-streaming.h` — the
6
+ * function names are the same so that bun:ffi symbol resolution uses the
7
+ * exact C exports without any aliasing.
8
+ *
9
+ * Rationale for a separate ABI module
10
+ * ────────────────────────────────────
11
+ * `ffi-streaming-runner.ts` depends on the `ElizaInferenceFfi` handle from
12
+ * `voice/ffi-bindings.ts`, which in turn is tied to the omnivoice-fused
13
+ * build of `libelizainference`. That handle carries TTS, ASR, embedding,
14
+ * and streaming-LLM symbols together. The ABI declared here is the
15
+ * *streaming-LLM-only* slice that the mobile bootstrap needs to reason
16
+ * about independently — it does not assume the full fused binary is
17
+ * loaded. Callers that already have an `ElizaInferenceFfi` can implement
18
+ * `FfiLlmStreamingAbi` as a thin wrapper; callers that only have the
19
+ * llama.cpp-only `libelizainference.so` (e.g. the Android AOSP bootstrap
20
+ * before omnivoice ships) can implement it directly.
21
+ *
22
+ * MTP phasing
23
+ * ──────────────
24
+ * Phase 1 — target model only. The `FfiLlmStreamingAbi` alone is
25
+ * sufficient: open a single-model streaming session, prefill, generate,
26
+ * cancel, close. No drafter weights required.
27
+ *
28
+ * Phase 2 — speculative decoding. When `MobileInferenceCapabilities.
29
+ * mtpSupported` is `true`, swap to `FfiMtpStreamingAbi` which opens
30
+ * a paired drafter + verifier session and runs the speculative decode loop
31
+ * on-device. The two ABI surfaces share the same `FfiLlmHandle` brand so
32
+ * the dispatcher (`runtime-dispatcher.ts`) sees a uniform handle type.
33
+ *
34
+ * iOS XCFramework gap
35
+ * ───────────────────
36
+ * The ABI is defined here, the C header is frozen, but the iOS
37
+ * XCFramework that re-exports these symbols through the Swift bridge has
38
+ * not shipped yet. `loadIosStreamingLlmBinding()` in
39
+ * `ios-llama-streaming.ts` returns `null` until the XCFramework build
40
+ * lands. See `docs/inference/ffi-streaming.md` §iOS XCFramework gap for
41
+ * the current status.
42
+ */
43
+
44
+ // ---------------------------------------------------------------------------
45
+ // Core handle types
46
+ // ---------------------------------------------------------------------------
47
+
48
+ /**
49
+ * Opaque handle to an open streaming-LLM session. The underlying C value
50
+ * is a pointer to a heap-allocated session struct; we brand it at the TS
51
+ * layer to prevent accidental mixing with other handle types.
52
+ *
53
+ * Concrete implementations will typically alias this to `bigint` (the
54
+ * bun:ffi representation of a C pointer) — but callers should treat it as
55
+ * opaque.
56
+ */
57
+ export interface FfiLlmHandle {
58
+ readonly _brand: "FfiLlmHandle";
59
+ }
60
+
61
+ /**
62
+ * Token callback fired from the generation background thread once per
63
+ * decoded token (or once per speculative-accept batch in MTP mode).
64
+ *
65
+ * `isDone` is `true` on the *last* invocation for a given generate call.
66
+ * After `isDone` the handle remains open but must not be passed to
67
+ * `generate` again until the caller re-prefills.
68
+ *
69
+ * The callback executes synchronously on the background thread the C
70
+ * library uses for decoding — callers must not call any FFI method
71
+ * back from inside the callback (the lock is not re-entrant).
72
+ */
73
+ export type TokenCallback = (
74
+ tokenId: number,
75
+ tokenText: string,
76
+ isDone: boolean,
77
+ ) => void;
78
+
79
+ // ---------------------------------------------------------------------------
80
+ // Single-model streaming ABI
81
+ // ---------------------------------------------------------------------------
82
+
83
+ /**
84
+ * C ABI surface for the in-process streaming LLM path.
85
+ *
86
+ * Function names match the C exports in `ffi-streaming.h` exactly; bun:ffi
87
+ * resolves them by string match against the shared library symbol table.
88
+ *
89
+ * All methods are synchronous from the JS perspective (bun:ffi calls are
90
+ * synchronous unless declared `nonblocking`). `generate` is the one
91
+ * exception: it returns immediately after scheduling the background decode
92
+ * loop and delivers results via `tokenCallback`.
93
+ */
94
+ export interface FfiLlmStreamingAbi {
95
+ /**
96
+ * Open a streaming-LLM session against the model at `modelPath`.
97
+ *
98
+ * The model is memory-mapped into the process — this call may block
99
+ * briefly on a cold filesystem. Subsequent calls with the same path
100
+ * share the mmap region (the C library uses a ref-counted mmap cache).
101
+ *
102
+ * Returns an opaque handle on success, or `null` when:
103
+ * - the model file does not exist or cannot be read,
104
+ * - the device lacks the RAM required for `contextSizeTokens`,
105
+ * - `gpuLayers > 0` and the Metal / Vulkan device is unavailable.
106
+ *
107
+ * @param modelPath Absolute path to a GGUF model file.
108
+ * @param contextSizeTokens KV cache size in tokens (must be power-of-two
109
+ * aligned; the library rounds up if needed).
110
+ * @param numThreads CPU decode threads. 0 = auto-detect (uses
111
+ * `eliza_inference_default_thread_count()`).
112
+ * @param gpuLayers Number of transformer layers to offload to
113
+ * GPU. 0 = CPU only.
114
+ */
115
+ eliza_inference_llm_stream_open(
116
+ modelPath: string,
117
+ contextSizeTokens: number,
118
+ numThreads: number,
119
+ gpuLayers: number,
120
+ ): FfiLlmHandle | null;
121
+
122
+ /**
123
+ * Prefill the KV cache with the supplied token ids.
124
+ *
125
+ * Blocks until all tokens are evaluated. On a large prompt this can
126
+ * take several hundred milliseconds on CPU — callers should not invoke
127
+ * on the main thread.
128
+ *
129
+ * @param handle Active session from `open`.
130
+ * @param promptTokens Pre-tokenized prompt; row-major int32 ids.
131
+ * @param slotId KV slot index (0-based). Use -1 to allocate a
132
+ * fresh slot; use 0..N-1 to pin a conversational
133
+ * turn for KV reuse across multi-turn sessions.
134
+ * @returns Number of tokens prefilled, or -1 on error (invalid handle,
135
+ * OOM, or KV cache exhausted).
136
+ */
137
+ eliza_inference_llm_stream_prefill(
138
+ handle: FfiLlmHandle,
139
+ promptTokens: Int32Array,
140
+ slotId: number,
141
+ ): number;
142
+
143
+ /**
144
+ * Start async token generation.
145
+ *
146
+ * The library spins up an internal worker thread (or reuses a pooled
147
+ * one) and begins decoding. Each decoded token fires `tokenCallback`
148
+ * from that thread. The final callback invocation has `isDone = true`.
149
+ *
150
+ * This call is non-blocking from the C caller's perspective: the C
151
+ * function returns 0 as soon as the worker is scheduled. From the JS
152
+ * perspective, callers should await the returned Promise — it resolves
153
+ * after the final `isDone = true` callback fires so that the JS async
154
+ * iterator can drain cleanly without a separate synchronisation
155
+ * mechanism. Mock implementations fulfil this contract by resolving
156
+ * the Promise after the last synthetic token; native FFI wrappers wrap
157
+ * a completion event or condition variable.
158
+ *
159
+ * Calling `generate` on a handle that is already generating is a hard
160
+ * error (returns -1 / rejects). Callers must wait for the Promise to
161
+ * resolve (or call `cancel` and await the resulting `isDone` callback)
162
+ * before re-using the handle.
163
+ *
164
+ * @param handle Active session from `open`.
165
+ * @param maxNewTokens Budget cap. Generation stops at `maxNewTokens`
166
+ * even if no EOS token was produced.
167
+ * @param temperature Softmax temperature. 0.0 = greedy.
168
+ * @param topP Nucleus sampling threshold (0.0–1.0).
169
+ * @param tokenCallback Callback fired per token from the decode thread.
170
+ * @returns Promise resolving to 0 on success, -1 on error.
171
+ */
172
+ eliza_inference_llm_stream_generate(
173
+ handle: FfiLlmHandle,
174
+ maxNewTokens: number,
175
+ temperature: number,
176
+ topP: number,
177
+ tokenCallback: TokenCallback,
178
+ ): number | Promise<number>;
179
+
180
+ /**
181
+ * Signal the active generation to stop at the next safe cancellation
182
+ * point (after the current speculative batch is retired).
183
+ *
184
+ * This does NOT wait for the background thread to finish — the thread
185
+ * fires a final `tokenCallback` with `isDone = true` shortly after the
186
+ * cancel flag is observed. Callers that need to know the thread has
187
+ * stopped must wait for that final callback.
188
+ *
189
+ * Calling `cancel` on a handle that is not currently generating is a
190
+ * no-op.
191
+ *
192
+ * @param handle Active session from `open`.
193
+ */
194
+ eliza_inference_llm_stream_cancel(handle: FfiLlmHandle): void;
195
+
196
+ /**
197
+ * Release all resources associated with `handle`.
198
+ *
199
+ * Evicts the KV cache slots occupied by this session and releases the
200
+ * mmap reference. The model's mmap region stays mapped until the ref
201
+ * count reaches zero (i.e. all sessions against that path are closed).
202
+ *
203
+ * Calling `close` on a handle that is still generating is a hard error
204
+ * — cancel first and wait for `isDone` before closing.
205
+ *
206
+ * @param handle Active session from `open`.
207
+ */
208
+ eliza_inference_llm_stream_close(handle: FfiLlmHandle): void;
209
+ }
210
+
211
+ // ---------------------------------------------------------------------------
212
+ // MTP (speculative decoding) streaming ABI — Phase 2
213
+ // ---------------------------------------------------------------------------
214
+
215
+ /**
216
+ * C ABI surface for paired drafter + verifier speculative decoding.
217
+ *
218
+ * Phase 2 only — the mobile runtime enables this path when
219
+ * `MobileInferenceCapabilities.mtpSupported` is `true`. Phase 1
220
+ * devices use `FfiLlmStreamingAbi` only (target model, no drafter).
221
+ *
222
+ * The MTP session holds two model contexts internally:
223
+ * 1. The *drafter* — a small, fast model that proposes `speculativeWindowSize`
224
+ * candidate tokens per step.
225
+ * 2. The *verifier* — the full target model that accepts or rejects the
226
+ * drafter's proposals in one parallel evaluation batch.
227
+ *
228
+ * The token callback fires once per *accepted* token (after the verifier's
229
+ * accept decision). Rejected tokens are silently discarded at the C layer;
230
+ * the JS consumer always sees a stream of accepted tokens identical to
231
+ * what a greedy target-only decode would have produced (assuming the
232
+ * drafter and verifier share vocabulary and a compatible chat template).
233
+ *
234
+ * Method signatures mirror `FfiLlmStreamingAbi` exactly — only the `open`
235
+ * argument list differs (adds drafter path + speculative window). This
236
+ * keeps the dispatcher (`runtime-dispatcher.ts`) agnostic to which ABI is
237
+ * in use.
238
+ */
239
+ export interface FfiMtpStreamingAbi {
240
+ /**
241
+ * Open a paired drafter + verifier streaming session.
242
+ *
243
+ * Both models are mmap'd; the KV cache is sized to `contextSizeTokens`
244
+ * for the verifier and proportionally smaller for the drafter (the C
245
+ * library computes the drafter KV budget automatically from the
246
+ * `speculativeWindowSize`).
247
+ *
248
+ * @param drafterModelPath Absolute path to the drafter GGUF.
249
+ * @param verifierModelPath Absolute path to the verifier GGUF.
250
+ * @param contextSizeTokens Verifier KV size in tokens.
251
+ * @param numThreads CPU threads for verifier (drafter shares
252
+ * the same thread pool).
253
+ * @param gpuLayers Verifier GPU layer count. The drafter
254
+ * always runs on CPU in Phase 2 to avoid
255
+ * competing for Metal/Vulkan resources.
256
+ * @param speculativeWindowSize Number of drafter candidate tokens per
257
+ * speculative step (1–16; 4 is a safe
258
+ * starting point for mobile).
259
+ * @returns Opaque session handle, or `null` on failure.
260
+ */
261
+ eliza_inference_mtp_stream_open(
262
+ drafterModelPath: string,
263
+ verifierModelPath: string,
264
+ contextSizeTokens: number,
265
+ numThreads: number,
266
+ gpuLayers: number,
267
+ speculativeWindowSize: number,
268
+ ): FfiLlmHandle | null;
269
+
270
+ /**
271
+ * Prefill both the drafter and verifier KV caches in a single blocking
272
+ * call. The verifier is prefilled first (it owns the ground-truth KV
273
+ * state); the drafter is then fast-forwarded to match.
274
+ *
275
+ * Same contract as `FfiLlmStreamingAbi.eliza_inference_llm_stream_prefill`.
276
+ */
277
+ eliza_inference_mtp_stream_prefill(
278
+ handle: FfiLlmHandle,
279
+ promptTokens: Int32Array,
280
+ slotId: number,
281
+ ): number;
282
+
283
+ /**
284
+ * Start speculative-decoding generation. The token callback fires for
285
+ * each verifier-accepted token — not for each drafter proposal.
286
+ *
287
+ * Same contract as `FfiLlmStreamingAbi.eliza_inference_llm_stream_generate`.
288
+ */
289
+ eliza_inference_mtp_stream_generate(
290
+ handle: FfiLlmHandle,
291
+ maxNewTokens: number,
292
+ temperature: number,
293
+ topP: number,
294
+ tokenCallback: TokenCallback,
295
+ ): number | Promise<number>;
296
+
297
+ /**
298
+ * Cancel an active MTP generation at the next speculation boundary.
299
+ * Same contract as `FfiLlmStreamingAbi.eliza_inference_llm_stream_cancel`.
300
+ */
301
+ eliza_inference_mtp_stream_cancel(handle: FfiLlmHandle): void;
302
+
303
+ /**
304
+ * Release both drafter and verifier sessions.
305
+ * Same contract as `FfiLlmStreamingAbi.eliza_inference_llm_stream_close`.
306
+ */
307
+ eliza_inference_mtp_stream_close(handle: FfiLlmHandle): void;
308
+ }
309
+
310
+ // ---------------------------------------------------------------------------
311
+ // Mobile capability snapshot
312
+ // ---------------------------------------------------------------------------
313
+
314
+ /**
315
+ * Device-side inference capability snapshot used by the mobile bootstrap
316
+ * to decide which ABI path to activate at startup.
317
+ *
318
+ * Produced by `detectMobileCapabilities()`. The runtime re-probes on
319
+ * every foreground resume (thermal / memory state can change while the
320
+ * app is backgrounded).
321
+ */
322
+ export type MobileInferenceCapabilities = {
323
+ /**
324
+ * True when the `eliza_inference_llm_stream_*` symbols are present in
325
+ * the loaded `libelizainference` and `llmStreamSupported()` returns 1.
326
+ * This is the gate for Phase 1 on-device inference.
327
+ */
328
+ streamingLlm: boolean;
329
+
330
+ /**
331
+ * True when `streamingLlm` is true AND the drafter GGUF is bundled AND
332
+ * the device's thermal state is below `serious`. Gate for Phase 2
333
+ * speculative decoding.
334
+ */
335
+ mtpSupported: boolean;
336
+
337
+ /**
338
+ * True when the `eliza_inference_tts_synthesize_stream` symbol is
339
+ * present and `ttsStreamSupported()` returns 1. Gate for the OmniVoice
340
+ * TTS streaming path.
341
+ */
342
+ omnivoiceStreaming: boolean;
343
+
344
+ /**
345
+ * Device-reported maximum KV context in tokens. Derived from available
346
+ * device RAM minus the model weights footprint. The runtime clamps
347
+ * user-configured context sizes to this value.
348
+ *
349
+ * 0 when `streamingLlm` is false (no context available).
350
+ */
351
+ maxContextTokens: number;
352
+
353
+ /**
354
+ * Number of transformer layers the device can offload to GPU/NPU at
355
+ * the current thermal state without risking thermal throttling. 0 means
356
+ * CPU-only execution. The runtime uses this as the initial `gpuLayers`
357
+ * argument to `open`; it can be reduced dynamically when the thermal
358
+ * state worsens mid-session.
359
+ */
360
+ recommendedGpuLayers: number;
361
+ };
362
+
363
+ // ---------------------------------------------------------------------------
364
+ // Capability detection
365
+ // ---------------------------------------------------------------------------
366
+
367
+ /**
368
+ * Derive a `MobileInferenceCapabilities` snapshot from an FFI binding.
369
+ *
370
+ * When `ffi` is `null` (e.g. in test environments, cloud-only builds, or
371
+ * when the native library failed to load), all boolean flags are `false`
372
+ * and numeric fields take safe zero defaults. This keeps the downstream
373
+ * runtime uniform: it can always read the capability struct without
374
+ * branching on "was an FFI loaded".
375
+ *
376
+ * When `ffi` is non-null, the function:
377
+ * 1. Calls `llmStreamSupported()` to set `streamingLlm`.
378
+ * 2. Sets `mtpSupported = false` for Phase 1 (drafter support
379
+ * detection requires a platform-specific bundle probe that is NOT
380
+ * part of this function; callers that have done the probe should set
381
+ * the field themselves after receiving the snapshot).
382
+ * 3. Calls `ttsStreamSupported()` to set `omnivoiceStreaming`.
383
+ * 4. Uses conservative device defaults for `maxContextTokens` and
384
+ * `recommendedGpuLayers` when the underlying library does not
385
+ * expose separate capability-query symbols (Phase 1 does not require
386
+ * them).
387
+ *
388
+ * @param ffi A loaded FFI binding, or `null` for an all-false defaults
389
+ * snapshot.
390
+ */
391
+ export function detectMobileCapabilities(
392
+ ffi: FfiLlmStreamingAbi | null,
393
+ ): MobileInferenceCapabilities {
394
+ if (ffi === null) {
395
+ return {
396
+ streamingLlm: false,
397
+ mtpSupported: false,
398
+ omnivoiceStreaming: false,
399
+ maxContextTokens: 0,
400
+ recommendedGpuLayers: 0,
401
+ };
402
+ }
403
+
404
+ // Phase 1: probe only the streaming-LLM surface. The FFI binding we
405
+ // receive here is typed as `FfiLlmStreamingAbi`, which does not expose
406
+ // ttsStreamSupported(). Cast to unknown to peek at the full binding if
407
+ // it happens to be the fused omnivoice build — but don't fail if it
408
+ // isn't; omnivoiceStreaming gracefully defaults to false.
409
+ const anyFfi = ffi as unknown as Record<string, unknown>;
410
+
411
+ const streamingLlm =
412
+ typeof anyFfi.llmStreamSupported === "function"
413
+ ? (anyFfi.llmStreamSupported as () => boolean)()
414
+ : // If the binding doesn't expose a supported() query but was handed
415
+ // to us at all, assume yes — the caller already verified the symbols
416
+ // exist via `llmStreamOpen !== undefined` elsewhere.
417
+ true;
418
+
419
+ const omnivoiceStreaming =
420
+ typeof anyFfi.ttsStreamSupported === "function"
421
+ ? (anyFfi.ttsStreamSupported as () => boolean)()
422
+ : false;
423
+
424
+ // mtpSupported requires a drafter bundle probe that is not part of
425
+ // this function's responsibility. Phase 1 always returns false here;
426
+ // callers that have completed the bundle probe should OR in their result.
427
+ const mtpSupported = false;
428
+
429
+ // Conservative defaults for Phase 1. Devices with more RAM will
430
+ // override these through the platform-specific capability probe once
431
+ // the full `InferenceCapabilities` path is unified.
432
+ const maxContextTokens = streamingLlm ? 2048 : 0;
433
+ const recommendedGpuLayers = 0;
434
+
435
+ return {
436
+ streamingLlm,
437
+ mtpSupported,
438
+ omnivoiceStreaming,
439
+ maxContextTokens,
440
+ recommendedGpuLayers,
441
+ };
442
+ }
@@ -0,0 +1,180 @@
1
+ /**
2
+ * In-process FFI streaming backend adapter.
3
+ *
4
+ * Implements `LocalInferenceBackend` as the optimized in-process
5
+ * llama.cpp path used by Eliza-1 on desktop and mobile.
6
+ *
7
+ * What this class deliberately does NOT do:
8
+ * - Own the FFI context. The runtime provider passed to this class owns
9
+ * native load/unload and hands back the binding, context, and tokenizer.
10
+ * - Decode image bytes or call mtmd directly. Vision requests are validated
11
+ * here, then forwarded to runtimes that expose `describeImage`.
12
+ */
13
+ import type { BackendPlan, GenerateArgs, GenerateResult, LocalGenerateWithUsageResult, LocalInferenceBackend } from "./backend";
14
+ import type { FfiStreamingRunner } from "./ffi-streaming-runner";
15
+ import type { LlmCtxHandle, LlmStreamingBinding } from "./llm-streaming-binding";
16
+ /**
17
+ * Constructor-injected adapter that resolves the FFI binding, context, and
18
+ * tokenizer for a given load. Two responsibilities:
19
+ *
20
+ * 1. Decide whether the FFI path is viable on the current binding
21
+ * (`supported()`). Mirrors `LlmStreamingBinding.llmStreamSupported()`
22
+ * plus any higher-level constraints (e.g. dylib path exists, build
23
+ * target matches the bundle's required kernels).
24
+ * 2. Lifecycle: `acquire(plan)` returns the FFI runner ready for
25
+ * `generate()` against the requested model, plus a tokenizer that
26
+ * matches that model's vocab. `release()` tears everything down.
27
+ *
28
+ * Production runtime implementation: the fused libelizainference path
29
+ * (`desktop-fused-ffi-backend-runtime.ts`), which wraps `ElizaInferenceFfi`
30
+ * via `wrapElizaInferenceFfi()` from `services/llm-streaming-binding.ts`.
31
+ * libllama has been retired — there is no second runtime behind this slot.
32
+ */
33
+ export interface FfiBackendRuntime {
34
+ supported(): boolean;
35
+ acquire(plan: BackendPlan): Promise<FfiBackendSession>;
36
+ release(): Promise<void>;
37
+ /**
38
+ * Optional parallel-slot pool surface. When the runtime exposes a
39
+ * ctx pool (the desktop libllama path does), `parallelSlots()`
40
+ * reports the live count and `resizeParallel(N)` grows/shrinks it.
41
+ * Runtimes without a pool report 1 and ignore resize requests.
42
+ */
43
+ parallelSlots?(): number;
44
+ resizeParallel?(target: number): Promise<boolean>;
45
+ }
46
+ /**
47
+ * Result of `FfiBackendRuntime.acquire()` — a live FFI session bound to a
48
+ * specific loaded model.
49
+ */
50
+ export interface FfiBackendSession {
51
+ readonly binding: LlmStreamingBinding;
52
+ readonly ctx: LlmCtxHandle;
53
+ readonly runner: FfiStreamingRunner;
54
+ /**
55
+ * Tokenize a prompt string into model token ids using the loaded model's
56
+ * tokenizer. The vocab MUST match the GGUF — mismatches produce gibberish
57
+ * silently. The runtime is responsible for asserting this at acquire
58
+ * time.
59
+ */
60
+ readonly tokenize: (prompt: string) => Int32Array;
61
+ /**
62
+ * Native MTP speculative-decoding policy from the catalog. `null`
63
+ * disables speculative decoding for this session.
64
+ */
65
+ readonly mtp: {
66
+ specType: "draft-mtp";
67
+ draftMin: number;
68
+ draftMax: number;
69
+ gpuLayers: number | "auto";
70
+ } | null;
71
+ /**
72
+ * Absolute path to a *separate* MTP drafter GGUF resolved during load.
73
+ * `null` means same-file MTP: the NextN head is embedded in the main
74
+ * text GGUF and the native runner activates `--spec-type draft-mtp`
75
+ * with no `-md`. Speculative decoding is governed by `mtp`, not by the
76
+ * presence of this path.
77
+ */
78
+ readonly draftModelPath: string | null;
79
+ /**
80
+ * Multimodal projector (mmproj) GGUF path for vision describe. Resolved
81
+ * from `plan.overrides.mmprojPath` at acquire time. `null` disables
82
+ * vision — `describeImage` then throws an actionable error.
83
+ */
84
+ readonly mmprojPath: string | null;
85
+ /**
86
+ * Per-load runtime config the fused libelizainference path applies at its
87
+ * first `llmStreamOpen` (gpuLayers + KV-cache quant types). The desktop
88
+ * libllama runtime applies these at `loadModel()` instead and leaves this
89
+ * `null` — the backend forwards them into the runner's per-call config only
90
+ * when present, so the fused path mirrors the libllama load decision without
91
+ * the libllama path double-applying them.
92
+ */
93
+ readonly loadConfig?: {
94
+ gpuLayers?: number;
95
+ cacheTypeK?: string | null;
96
+ cacheTypeV?: string | null;
97
+ } | null;
98
+ }
99
+ /**
100
+ * Adapter that satisfies `LocalInferenceBackend` by delegating to
101
+ * `FfiStreamingRunner`. The `id` is `"llama-cpp"` because this is the
102
+ * in-process variant of the optimized llama.cpp path.
103
+ */
104
+ export declare class FfiStreamingBackend implements LocalInferenceBackend {
105
+ private readonly runtime;
106
+ readonly id: "llama-cpp";
107
+ private session;
108
+ private loadedPath;
109
+ constructor(runtime: FfiBackendRuntime);
110
+ available(): Promise<boolean>;
111
+ hasLoadedModel(): boolean;
112
+ currentModelPath(): string | null;
113
+ load(plan: BackendPlan): Promise<void>;
114
+ unload(): Promise<void>;
115
+ generate(args: GenerateArgs): Promise<GenerateResult>;
116
+ generateWithUsage(args: GenerateArgs & {
117
+ slotId?: number;
118
+ }): Promise<LocalGenerateWithUsageResult>;
119
+ /**
120
+ * Persist the active session's KV state to a per-conversation file.
121
+ * v1 uses `llama_state_seq_save_file` against seq_id=0. The on-disk file
122
+ * path mirrors `ffi-streaming-backend.ts`'s conversation-keyed slot layout
123
+ * (`<cacheDir>/<conversationId>/<slotId>.kv`) so a switch between
124
+ * FFI and subprocess can resume each other's slots — once both
125
+ * paths agree on the file format.
126
+ */
127
+ persistConversationKv(conversationId: string, slotId: number): Promise<void>;
128
+ /** Restore a previously persisted KV state. Mirror of `persistConversationKv`. */
129
+ restoreConversationKv(conversationId: string, slotId: number): Promise<boolean>;
130
+ /**
131
+ * Pre-decode `promptPrefix` so the next `generate` against the same
132
+ * `cacheKey` skips re-prefill. Returns `false` when the prefix is
133
+ * empty or no session is loaded. The FFI runner serializes by
134
+ * `cacheKey` internally via the `slotInFlight` map.
135
+ */
136
+ prewarmConversation(promptPrefix: string, opts: {
137
+ slotId: number;
138
+ cacheKey: string;
139
+ }): Promise<boolean>;
140
+ /**
141
+ * True when Eliza-1 native MTP is active for the loaded target model.
142
+ * Covers both shapes: same-file MTP (NextN head embedded in the text
143
+ * GGUF, `draftModelPath` null) and separate-drafter MTP.
144
+ */
145
+ mtpEnabled(): boolean;
146
+ /**
147
+ * Parallel-slot pool size. Routed to the runtime's ctx pool when one
148
+ * exists; defaults to 1 otherwise.
149
+ */
150
+ parallelSlots(): number;
151
+ /**
152
+ * Grow or shrink the runtime's ctx pool to `target` slots. Returns
153
+ * false when the runtime has no pool surface (in which case parallel
154
+ * resize is ignored — the conversation registry tolerates
155
+ * fixed 1-slot operation).
156
+ */
157
+ resizeParallel(target: number): Promise<boolean>;
158
+ /**
159
+ * Vision describe via mmproj. Requires:
160
+ * - The shim built with `-DELIZA_ENABLE_VISION=1` (ELIZA_ENABLE_VISION=1
161
+ * at the build script env). When absent the runtime throws an
162
+ * actionable error.
163
+ * - `plan.overrides.mmprojPath` was passed at load time so the
164
+ * adapter knows which mmproj GGUF to feed clip.
165
+ */
166
+ describeImage(args: {
167
+ bytes: Uint8Array;
168
+ mimeType?: string;
169
+ prompt?: string;
170
+ maxTokens?: number;
171
+ temperature?: number;
172
+ signal?: AbortSignal;
173
+ }): Promise<{
174
+ text: string;
175
+ projectorMs?: number;
176
+ decodeMs?: number;
177
+ }>;
178
+ currentMmprojPath(): string | null;
179
+ }
180
+ //# sourceMappingURL=ffi-streaming-backend.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ffi-streaming-backend.d.ts","sourceRoot":"","sources":["ffi-streaming-backend.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EACX,WAAW,EACX,YAAY,EACZ,cAAc,EACd,4BAA4B,EAC5B,qBAAqB,EACrB,MAAM,WAAW,CAAC;AACnB,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,KAAK,EACX,YAAY,EACZ,mBAAmB,EACnB,MAAM,yBAAyB,CAAC;AAGjC;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,WAAW,iBAAiB;IACjC,SAAS,IAAI,OAAO,CAAC;IACrB,OAAO,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;IACvD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACzB;;;;;OAKG;IACH,aAAa,CAAC,IAAI,MAAM,CAAC;IACzB,cAAc,CAAC,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;CAClD;AAED;;;GAGG;AACH,MAAM,WAAW,iBAAiB;IACjC,QAAQ,CAAC,OAAO,EAAE,mBAAmB,CAAC;IACtC,QAAQ,CAAC,GAAG,EAAE,YAAY,CAAC;IAC3B,QAAQ,CAAC,MAAM,EAAE,kBAAkB,CAAC;IACpC;;;;;OAKG;IACH,QAAQ,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,UAAU,CAAC;IAClD;;;OAGG;IACH,QAAQ,CAAC,GAAG,EAAE;QACb,QAAQ,EAAE,WAAW,CAAC;QACtB,QAAQ,EAAE,MAAM,CAAC;QACjB,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,GAAG,MAAM,CAAC;KAC3B,GAAG,IAAI,CAAC;IACT;;;;;;OAMG;IACH,QAAQ,CAAC,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IACvC;;;;OAIG;IACH,QAAQ,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC;;;;;;;OAOG;IACH,QAAQ,CAAC,UAAU,CAAC,EAAE;QACrB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC3B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;KAC3B,GAAG,IAAI,CAAC;CACT;AAED;;;;GAIG;AACH,qBAAa,mBAAoB,YAAW,qBAAqB;IAMpD,OAAO,CAAC,QAAQ,CAAC,OAAO;IALpC,QAAQ,CAAC,EAAE,EAAG,WAAW,CAAU;IAEnC,OAAO,CAAC,OAAO,CAAkC;IACjD,OAAO,CAAC,UAAU,CAAuB;gBAEZ,OAAO,EAAE,iBAAiB;IAEjD,SAAS,IAAI,OAAO,CAAC,OAAO,CAAC;IAInC,cAAc,IAAI,OAAO;IAIzB,gBAAgB,IAAI,MAAM,GAAG,IAAI;IAI3B,IAAI,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;IAMtC,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAgBvB,QAAQ,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,cAAc,CAAC;IAKrD,iBAAiB,CACtB,IAAI,EAAE,YAAY,GAAG;QAAE,MAAM,CAAC,EAAE,MAAM,CAAA;KAAE,GACtC,OAAO,CAAC,4BAA4B,CAAC;IAqDxC;;;;;;;OAOG;IACG,qBAAqB,CAC1B,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,MAAM,GACZ,OAAO,CAAC,IAAI,CAAC;IAYhB,kFAAkF;IAC5E,qBAAqB,CAC1B,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,MAAM,GACZ,OAAO,CAAC,OAAO,CAAC;IASnB;;;;;OAKG;IACG,mBAAmB,CACxB,YAAY,EAAE,MAAM,EACpB,IAAI,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,GACxC,OAAO,CAAC,OAAO,CAAC;IAsBnB;;;;OAIG;IACH,UAAU,IAAI,OAAO;IAIrB;;;OAGG;IACH,aAAa,IAAI,MAAM;IAIvB;;;;;OAKG;IACG,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAKtD;;;;;;;OAOG;IACG,aAAa,CAAC,IAAI,EAAE;QACzB,KAAK,EAAE,UAAU,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,MAAM,CAAC,EAAE,WAAW,CAAC;KACrB,GAAG,OAAO,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAwCtE,iBAAiB,IAAI,MAAM,GAAG,IAAI;CAGlC"}