@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,177 @@
1
+ /**
2
+ * llama-server vision-describe backend (WS2).
3
+ *
4
+ * Wraps the out-of-process llama-server's `/completion` endpoint with
5
+ * the `image_data` array (base64-encoded payloads) and shapes the
6
+ * response to the WS2 `VisionDescribeBackend` contract.
7
+ *
8
+ * llama-server image-data API recap (verified against llama.cpp commit
9
+ * b8198+, May 2026):
10
+ *
11
+ * POST /completion
12
+ * { "prompt": "<...>USER: [img-12] What's in this image?\nASSISTANT:",
13
+ * "image_data": [
14
+ * { "data": "<base64 png/jpeg>", "id": 12 }
15
+ * ],
16
+ * "n_predict": 256,
17
+ * "temperature": 0.2,
18
+ * "stream": false }
19
+ *
20
+ * Response:
21
+ * { "content": "A photo of a cat.", "stop": true,
22
+ * "timings": { "prompt_ms": 180.4, "predicted_ms": 423.1 } }
23
+ *
24
+ * Server-side mmproj is loaded via the `--mmproj <path>` flag on
25
+ * llama-server startup. The FFI runtime wrapper passes this flag
26
+ * already for tiers with vision enabled; this backend assumes the
27
+ * server has been started with the right mmproj for the active model.
28
+ *
29
+ * Backend responsibility:
30
+ * - Encode the image as base64 (when not already).
31
+ * - Build the prompt with the `[img-N]` placeholder convention.
32
+ * - POST to `/completion`, parse the text + timings.
33
+ * - Honour AbortSignal by passing it through to the fetch call.
34
+ *
35
+ * Backend explicitly does NOT:
36
+ * - Start / stop the server. That's the FFI runtime wrapper's job.
37
+ * - Resolve the mmproj path — the server already has it. The arbiter's
38
+ * `--mmproj` was set when the text model loaded.
39
+ * - Implement projector-token reuse. llama-server has no API to
40
+ * accept pre-projected tokens; if the WS1 cache hit happens, this
41
+ * backend ignores the hint and re-runs the projector. The cache
42
+ * is more useful with the in-process node-llama-cpp backend.
43
+ *
44
+ * Metal / CUDA validation:
45
+ * The llama-server build embeds the same mtmd_encode path the
46
+ * in-process binding will eventually expose. On a Metal build the
47
+ * image encode dispatches through the Metal compute encoder; on a
48
+ * CUDA build through cuBLAS. We have no GPU on this host — see the
49
+ * `__tests__/vision-describe.test.ts` notes for the GPU smoke check.
50
+ */
51
+
52
+ import { resolveImageBytes } from "./hash";
53
+ import type {
54
+ VisionDescribeBackend,
55
+ VisionDescribeRequest,
56
+ VisionDescribeResult,
57
+ } from "./types";
58
+
59
+ export interface LlamaServerVisionBackendOptions {
60
+ /**
61
+ * Base URL of the llama-server. The FFI runtime wrapper exposes
62
+ * this via `currentBaseUrl()`; pass the resolved URL here at load
63
+ * time. The backend keeps it as-is across calls.
64
+ */
65
+ baseUrl: string;
66
+ /**
67
+ * Optional fetch override. Tests inject a fake fetch; production
68
+ * uses global fetch. The signature mirrors `fetch` so the test
69
+ * surface is the same as the real one.
70
+ */
71
+ fetch?: typeof fetch;
72
+ /**
73
+ * Default `n_predict` budget when the caller doesn't specify
74
+ * `maxTokens`. 256 matches the description-length budget the
75
+ * Florence-2 / VisionManager path uses today.
76
+ */
77
+ defaultMaxTokens?: number;
78
+ }
79
+
80
+ export function createLlamaServerVisionBackend(
81
+ opts: LlamaServerVisionBackendOptions,
82
+ ): VisionDescribeBackend {
83
+ const fetchImpl = opts.fetch ?? globalThis.fetch;
84
+ const defaultMaxTokens = opts.defaultMaxTokens ?? 256;
85
+ let baseUrl = opts.baseUrl.replace(/\/$/, "");
86
+
87
+ if (!baseUrl) {
88
+ throw new Error(
89
+ "[vision/llama-server] baseUrl is required; pass FFI runtime's currentBaseUrl()",
90
+ );
91
+ }
92
+
93
+ return {
94
+ id: "llama-server",
95
+ async describe(
96
+ request: VisionDescribeRequest,
97
+ ): Promise<VisionDescribeResult> {
98
+ const { bytes, mimeType } = resolveImageBytes(request.image);
99
+ const base64 = Buffer.from(bytes).toString("base64");
100
+ const prompt = buildVisionPrompt(request.prompt);
101
+ const startMs = Date.now();
102
+ const body = JSON.stringify({
103
+ prompt,
104
+ image_data: [{ data: base64, id: 12 }],
105
+ n_predict: request.maxTokens ?? defaultMaxTokens,
106
+ temperature: request.temperature ?? 0.2,
107
+ stream: false,
108
+ // `cache_prompt: false` here so each describe call gets a
109
+ // fresh slot; the WS1 vision-embedding cache handles repeat-
110
+ // frame reuse on the JS side, and the server-side prompt
111
+ // cache would only conflict with that (different KV state
112
+ // for the same projector tokens).
113
+ cache_prompt: false,
114
+ });
115
+ const res = await fetchImpl(`${baseUrl}/completion`, {
116
+ method: "POST",
117
+ headers: {
118
+ "content-type": "application/json",
119
+ ...(mimeType ? { "x-image-mime": mimeType } : {}),
120
+ },
121
+ body,
122
+ signal: request.signal,
123
+ });
124
+ if (!res.ok) {
125
+ const text = await res.text().catch(() => "<unreadable>");
126
+ throw new Error(
127
+ `[vision/llama-server] /completion returned ${res.status}: ${text.slice(0, 200)}`,
128
+ );
129
+ }
130
+ const payload = (await res.json()) as {
131
+ content?: unknown;
132
+ timings?: { prompt_ms?: number; predicted_ms?: number };
133
+ };
134
+ if (typeof payload.content !== "string") {
135
+ throw new Error(
136
+ "[vision/llama-server] /completion response missing string `content`",
137
+ );
138
+ }
139
+ const elapsed = Date.now() - startMs;
140
+ return shape(payload.content, {
141
+ projectorMs: payload.timings?.prompt_ms,
142
+ decodeMs: payload.timings?.predicted_ms ?? elapsed,
143
+ cacheHit: false,
144
+ });
145
+ },
146
+ async dispose() {
147
+ // llama-server lifetime is owned by the FFI runtime wrapper.
148
+ // This backend just holds the baseUrl; nothing to free.
149
+ baseUrl = "";
150
+ },
151
+ };
152
+ }
153
+
154
+ function buildVisionPrompt(userPrompt?: string): string {
155
+ const ask = userPrompt?.trim() || "Describe what is in this image.";
156
+ // `[img-N]` is the placeholder llama-server's mtmd path replaces with
157
+ // the encoded image tokens. The `N` must match the `image_data[*].id`
158
+ // we send in the body; we use 12 because llama-server's stock
159
+ // example uses small integer ids — any positive integer works.
160
+ return `<|im_start|>user\n[img-12]\n${ask}<|im_end|>\n<|im_start|>assistant\n`;
161
+ }
162
+
163
+ function shape(
164
+ text: string,
165
+ telemetry: { projectorMs?: number; decodeMs?: number; cacheHit?: boolean },
166
+ ): VisionDescribeResult {
167
+ const trimmed = text.trim();
168
+ if (!trimmed) {
169
+ throw new Error("[vision/llama-server] empty text from /completion");
170
+ }
171
+ const title = trimmed.split(/[.!?]/, 1)[0]?.trim() || "Image";
172
+ return {
173
+ title,
174
+ description: trimmed,
175
+ ...telemetry,
176
+ };
177
+ }
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Vision-language describe-image types (WS2).
3
+ *
4
+ * Two layers live here:
5
+ *
6
+ * 1. The **request/result** contract every WS2 backend implements
7
+ * (`VisionDescribeRequest`, `VisionDescribeResult`). Callers pass
8
+ * raw image bytes + a prompt; backends return a title+description.
9
+ *
10
+ * 2. The **backend** interface (`VisionDescribeBackend`) that the
11
+ * `MemoryArbiter` registers as a capability handler. One backend
12
+ * per binding family (node-llama-cpp / llama-server / AOSP libllama
13
+ * shim). All three implement the same `load → describe → unload`
14
+ * shape so the arbiter can swap between them without caring how
15
+ * the projector is wired underneath.
16
+ *
17
+ * Why a separate file: the arbiter's `CapabilityRegistration<TBackend,
18
+ * TRequest, TResult>` is generic; pinning concrete shapes here keeps
19
+ * the registration sites short and removes a dozen casts at the
20
+ * call-site.
21
+ */
22
+ /**
23
+ * Channel order for the raw pixel buffer. Most platforms hand us RGBA
24
+ * (HTMLCanvasElement, Capacitor `Camera`, the desktop `puppeteer-core`
25
+ * screenshot pipeline). The encoder normalizes internally; this enum
26
+ * stays so the hash step can pick a stable byte layout that doesn't
27
+ * depend on the platform-provided buffer order.
28
+ */
29
+ export type VisionImageChannelOrder = "rgba" | "rgb" | "bgra" | "bgr";
30
+ /**
31
+ * The raw image data the backend will encode. The arbiter does not see
32
+ * this — it gets handed straight to the backend's `run()`. The reason
33
+ * we accept multiple wrappers (URL / base64 / bytes) is that the three
34
+ * upstream entry points (HTTP route, agent runtime model handler,
35
+ * computer-use frame loop) each prefer a different shape. The backend
36
+ * resolves to bytes once.
37
+ */
38
+ export type VisionImageInput = {
39
+ kind: "bytes";
40
+ bytes: Uint8Array;
41
+ mimeType?: string;
42
+ } | {
43
+ kind: "base64";
44
+ base64: string;
45
+ mimeType?: string;
46
+ } | {
47
+ kind: "dataUrl";
48
+ dataUrl: string;
49
+ } | {
50
+ kind: "url";
51
+ url: string;
52
+ mimeType?: string;
53
+ };
54
+ /**
55
+ * Caller request to `describeImage`. The `modelFamily` distinguishes
56
+ * projected-token cache entries from different VL families that share
57
+ * the same hash space — Qwen3-VL tokens are not interchangeable with
58
+ * Florence-2 tokens. Default is `qwen3-vl` (the WS2 deliverable);
59
+ * each additional family registers under its own identifier.
60
+ */
61
+ export interface VisionDescribeRequest {
62
+ image: VisionImageInput;
63
+ prompt?: string;
64
+ /**
65
+ * The model family identifier. Used to namespace the projector cache
66
+ * so swapping the backend's model family invalidates cached tokens.
67
+ * Defaults to `"qwen3-vl"` when omitted.
68
+ */
69
+ modelFamily?: string;
70
+ /** Max output tokens; defaults to 256 (description-length budget). */
71
+ maxTokens?: number;
72
+ /** 0..1, default 0.2 (descriptions should be deterministic-ish). */
73
+ temperature?: number;
74
+ signal?: AbortSignal;
75
+ }
76
+ /** Backend response — same shape that ImageDescriptionResult expects. */
77
+ export interface VisionDescribeResult {
78
+ title: string;
79
+ description: string;
80
+ /** Best-effort: ms spent in the projector (for arbiter telemetry). */
81
+ projectorMs?: number;
82
+ /** Best-effort: ms spent in the decoder. */
83
+ decodeMs?: number;
84
+ /** Whether the projected tokens came from the WS1 vision cache. */
85
+ cacheHit?: boolean;
86
+ }
87
+ /**
88
+ * Per-load arguments for a vision-describe backend. The arbiter's
89
+ * `load(modelKey)` only carries an opaque key; the binding resolves
90
+ * that key to real model+mmproj paths through this struct, which
91
+ * `createVisionCapabilityRegistration` populates from the catalog.
92
+ */
93
+ export interface VisionDescribeLoadArgs {
94
+ /** Absolute path to the text decoder GGUF (the "main" model). */
95
+ modelPath: string;
96
+ /** Absolute path to the matching mmproj projector GGUF. */
97
+ mmprojPath: string;
98
+ /**
99
+ * GPU offload preference. The backend translates this to its native
100
+ * knob: node-llama-cpp `gpuLayers`, llama-server `--n-gpu-layers`,
101
+ * AOSP libllama shim `eliza_llama_model_params_set_n_gpu_layers`.
102
+ * `"auto"` lets the binding decide; numeric is honoured verbatim.
103
+ */
104
+ gpuLayers?: number | "auto" | "max";
105
+ /** Max sampled context window in tokens. Defaults to 4096. */
106
+ contextSize?: number;
107
+ }
108
+ /**
109
+ * The contract every WS2 backend implements. The shape is intentionally
110
+ * narrow: the arbiter only ever calls `describe`. `dispose` is wrapped
111
+ * by the arbiter's `unload` so the backend can free GPU/VRAM and drop
112
+ * file descriptors on eviction.
113
+ */
114
+ export interface VisionDescribeBackend {
115
+ /** Stable identifier — `"capacitor-llama"`, `"llama-server"`, `"aosp"`, or `"fake"` (tests). */
116
+ readonly id: "capacitor-llama" | "llama-server" | "aosp" | "fake";
117
+ /**
118
+ * Run a describe pass. Backends MAY consult an injected projector cache
119
+ * via `args.projectedTokens` (when the caller's hash already produced
120
+ * a cache hit) instead of running the projector again; backends that
121
+ * don't implement projector-token reuse ignore the field.
122
+ */
123
+ describe(request: VisionDescribeRequest, args?: VisionDescribeBackendOptions): Promise<VisionDescribeResult>;
124
+ /** Release the loaded weights. Idempotent. */
125
+ dispose(): Promise<void>;
126
+ }
127
+ /**
128
+ * Per-call options the arbiter wrapper passes into the backend. Lives
129
+ * here (rather than on `VisionDescribeRequest`) so the caller-facing
130
+ * request type stays free of arbiter implementation details.
131
+ */
132
+ export interface VisionDescribeBackendOptions {
133
+ /**
134
+ * Pre-computed projected tokens from the WS1 vision-embedding cache.
135
+ * When present the backend SHOULD skip its own projector step and
136
+ * decode against these tokens directly. Backends that can't do this
137
+ * still produce a correct result by ignoring the field; the arbiter's
138
+ * wrapper will measure `cacheHit: false` in that case.
139
+ */
140
+ projectedTokens?: {
141
+ tokens: Float32Array;
142
+ tokenCount: number;
143
+ hiddenSize: number;
144
+ };
145
+ }
146
+ /**
147
+ * Capability handler load function. The arbiter calls it with a model
148
+ * key (e.g. `"qwen3-vl-2b"`); the implementation resolves to a real
149
+ * `(modelPath, mmprojPath)` pair from the catalog + installed registry
150
+ * and returns a live backend.
151
+ */
152
+ export type VisionDescribeBackendLoader = (modelKey: string) => Promise<VisionDescribeBackend>;
153
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH;;;;;;GAMG;AACH,MAAM,MAAM,uBAAuB,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,KAAK,CAAC;AAEtE;;;;;;;GAOG;AACH,MAAM,MAAM,gBAAgB,GACzB;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,UAAU,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,GACvD;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,GACrD;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GACpC;IAAE,IAAI,EAAE,KAAK,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAEnD;;;;;;GAMG;AACH,MAAM,WAAW,qBAAqB;IACrC,KAAK,EAAE,gBAAgB,CAAC;IACxB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,sEAAsE;IACtE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oEAAoE;IACpE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,WAAW,CAAC;CACrB;AAED,yEAAyE;AACzE,MAAM,WAAW,oBAAoB;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,sEAAsE;IACtE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,4CAA4C;IAC5C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,mEAAmE;IACnE,QAAQ,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;;;;GAKG;AACH,MAAM,WAAW,sBAAsB;IACtC,iEAAiE;IACjE,SAAS,EAAE,MAAM,CAAC;IAClB,2DAA2D;IAC3D,UAAU,EAAE,MAAM,CAAC;IACnB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,CAAC;IACpC,8DAA8D;IAC9D,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;;;GAKG;AACH,MAAM,WAAW,qBAAqB;IACrC,gGAAgG;IAChG,QAAQ,CAAC,EAAE,EAAE,iBAAiB,GAAG,cAAc,GAAG,MAAM,GAAG,MAAM,CAAC;IAClE;;;;;OAKG;IACH,QAAQ,CACP,OAAO,EAAE,qBAAqB,EAC9B,IAAI,CAAC,EAAE,4BAA4B,GACjC,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACjC,8CAA8C;IAC9C,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACzB;AAED;;;;GAIG;AACH,MAAM,WAAW,4BAA4B;IAC5C;;;;;;OAMG;IACH,eAAe,CAAC,EAAE;QACjB,MAAM,EAAE,YAAY,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,UAAU,EAAE,MAAM,CAAC;KACnB,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,MAAM,2BAA2B,GAAG,CACzC,QAAQ,EAAE,MAAM,KACZ,OAAO,CAAC,qBAAqB,CAAC,CAAC"}
@@ -0,0 +1,154 @@
1
+ /**
2
+ * Vision-language describe-image types (WS2).
3
+ *
4
+ * Two layers live here:
5
+ *
6
+ * 1. The **request/result** contract every WS2 backend implements
7
+ * (`VisionDescribeRequest`, `VisionDescribeResult`). Callers pass
8
+ * raw image bytes + a prompt; backends return a title+description.
9
+ *
10
+ * 2. The **backend** interface (`VisionDescribeBackend`) that the
11
+ * `MemoryArbiter` registers as a capability handler. One backend
12
+ * per binding family (node-llama-cpp / llama-server / AOSP libllama
13
+ * shim). All three implement the same `load → describe → unload`
14
+ * shape so the arbiter can swap between them without caring how
15
+ * the projector is wired underneath.
16
+ *
17
+ * Why a separate file: the arbiter's `CapabilityRegistration<TBackend,
18
+ * TRequest, TResult>` is generic; pinning concrete shapes here keeps
19
+ * the registration sites short and removes a dozen casts at the
20
+ * call-site.
21
+ */
22
+
23
+ /**
24
+ * Channel order for the raw pixel buffer. Most platforms hand us RGBA
25
+ * (HTMLCanvasElement, Capacitor `Camera`, the desktop `puppeteer-core`
26
+ * screenshot pipeline). The encoder normalizes internally; this enum
27
+ * stays so the hash step can pick a stable byte layout that doesn't
28
+ * depend on the platform-provided buffer order.
29
+ */
30
+ export type VisionImageChannelOrder = "rgba" | "rgb" | "bgra" | "bgr";
31
+
32
+ /**
33
+ * The raw image data the backend will encode. The arbiter does not see
34
+ * this — it gets handed straight to the backend's `run()`. The reason
35
+ * we accept multiple wrappers (URL / base64 / bytes) is that the three
36
+ * upstream entry points (HTTP route, agent runtime model handler,
37
+ * computer-use frame loop) each prefer a different shape. The backend
38
+ * resolves to bytes once.
39
+ */
40
+ export type VisionImageInput =
41
+ | { kind: "bytes"; bytes: Uint8Array; mimeType?: string }
42
+ | { kind: "base64"; base64: string; mimeType?: string }
43
+ | { kind: "dataUrl"; dataUrl: string }
44
+ | { kind: "url"; url: string; mimeType?: string };
45
+
46
+ /**
47
+ * Caller request to `describeImage`. The `modelFamily` distinguishes
48
+ * projected-token cache entries from different VL families that share
49
+ * the same hash space — Qwen3-VL tokens are not interchangeable with
50
+ * Florence-2 tokens. Default is `qwen3-vl` (the WS2 deliverable);
51
+ * each additional family registers under its own identifier.
52
+ */
53
+ export interface VisionDescribeRequest {
54
+ image: VisionImageInput;
55
+ prompt?: string;
56
+ /**
57
+ * The model family identifier. Used to namespace the projector cache
58
+ * so swapping the backend's model family invalidates cached tokens.
59
+ * Defaults to `"qwen3-vl"` when omitted.
60
+ */
61
+ modelFamily?: string;
62
+ /** Max output tokens; defaults to 256 (description-length budget). */
63
+ maxTokens?: number;
64
+ /** 0..1, default 0.2 (descriptions should be deterministic-ish). */
65
+ temperature?: number;
66
+ signal?: AbortSignal;
67
+ }
68
+
69
+ /** Backend response — same shape that ImageDescriptionResult expects. */
70
+ export interface VisionDescribeResult {
71
+ title: string;
72
+ description: string;
73
+ /** Best-effort: ms spent in the projector (for arbiter telemetry). */
74
+ projectorMs?: number;
75
+ /** Best-effort: ms spent in the decoder. */
76
+ decodeMs?: number;
77
+ /** Whether the projected tokens came from the WS1 vision cache. */
78
+ cacheHit?: boolean;
79
+ }
80
+
81
+ /**
82
+ * Per-load arguments for a vision-describe backend. The arbiter's
83
+ * `load(modelKey)` only carries an opaque key; the binding resolves
84
+ * that key to real model+mmproj paths through this struct, which
85
+ * `createVisionCapabilityRegistration` populates from the catalog.
86
+ */
87
+ export interface VisionDescribeLoadArgs {
88
+ /** Absolute path to the text decoder GGUF (the "main" model). */
89
+ modelPath: string;
90
+ /** Absolute path to the matching mmproj projector GGUF. */
91
+ mmprojPath: string;
92
+ /**
93
+ * GPU offload preference. The backend translates this to its native
94
+ * knob: node-llama-cpp `gpuLayers`, llama-server `--n-gpu-layers`,
95
+ * AOSP libllama shim `eliza_llama_model_params_set_n_gpu_layers`.
96
+ * `"auto"` lets the binding decide; numeric is honoured verbatim.
97
+ */
98
+ gpuLayers?: number | "auto" | "max";
99
+ /** Max sampled context window in tokens. Defaults to 4096. */
100
+ contextSize?: number;
101
+ }
102
+
103
+ /**
104
+ * The contract every WS2 backend implements. The shape is intentionally
105
+ * narrow: the arbiter only ever calls `describe`. `dispose` is wrapped
106
+ * by the arbiter's `unload` so the backend can free GPU/VRAM and drop
107
+ * file descriptors on eviction.
108
+ */
109
+ export interface VisionDescribeBackend {
110
+ /** Stable identifier — `"capacitor-llama"`, `"llama-server"`, `"aosp"`, or `"fake"` (tests). */
111
+ readonly id: "capacitor-llama" | "llama-server" | "aosp" | "fake";
112
+ /**
113
+ * Run a describe pass. Backends MAY consult an injected projector cache
114
+ * via `args.projectedTokens` (when the caller's hash already produced
115
+ * a cache hit) instead of running the projector again; backends that
116
+ * don't implement projector-token reuse ignore the field.
117
+ */
118
+ describe(
119
+ request: VisionDescribeRequest,
120
+ args?: VisionDescribeBackendOptions,
121
+ ): Promise<VisionDescribeResult>;
122
+ /** Release the loaded weights. Idempotent. */
123
+ dispose(): Promise<void>;
124
+ }
125
+
126
+ /**
127
+ * Per-call options the arbiter wrapper passes into the backend. Lives
128
+ * here (rather than on `VisionDescribeRequest`) so the caller-facing
129
+ * request type stays free of arbiter implementation details.
130
+ */
131
+ export interface VisionDescribeBackendOptions {
132
+ /**
133
+ * Pre-computed projected tokens from the WS1 vision-embedding cache.
134
+ * When present the backend SHOULD skip its own projector step and
135
+ * decode against these tokens directly. Backends that can't do this
136
+ * still produce a correct result by ignoring the field; the arbiter's
137
+ * wrapper will measure `cacheHit: false` in that case.
138
+ */
139
+ projectedTokens?: {
140
+ tokens: Float32Array;
141
+ tokenCount: number;
142
+ hiddenSize: number;
143
+ };
144
+ }
145
+
146
+ /**
147
+ * Capability handler load function. The arbiter calls it with a model
148
+ * key (e.g. `"qwen3-vl-2b"`); the implementation resolves to a real
149
+ * `(modelPath, mmprojPath)` pair from the catalog + installed registry
150
+ * and returns a live backend.
151
+ */
152
+ export type VisionDescribeBackendLoader = (
153
+ modelKey: string,
154
+ ) => Promise<VisionDescribeBackend>;
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Final optional IMAGE_DESCRIPTION fallback layer.
3
+ *
4
+ * This mirrors the cloud wrapper shape but only runs when the previous
5
+ * handler explicitly returned a typed fallback outcome.
6
+ */
7
+ import type { ImageDescriptionParams } from "@elizaos/core";
8
+ import { type LocalVisionOutcome, type VisionFallbackReason, type WrappedImageDescriptionHandler } from "./cloud-fallback";
9
+ export interface VisionVastFallbackOptions {
10
+ enabled?: boolean;
11
+ apiKey?: string;
12
+ baseUrl?: string;
13
+ fetch?: typeof fetch;
14
+ handler?: (params: ImageDescriptionParams | string, reason: VisionFallbackReason) => Promise<LocalVisionOutcome>;
15
+ log?: (message: string, detail?: Record<string, unknown>) => void;
16
+ }
17
+ export declare function wrapImageDescriptionHandlerWithVastFallback(previous: WrappedImageDescriptionHandler, options?: VisionVastFallbackOptions): WrappedImageDescriptionHandler;
18
+ //# sourceMappingURL=vast-fallback.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vast-fallback.d.ts","sourceRoot":"","sources":["vast-fallback.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EACX,sBAAsB,EAEtB,MAAM,eAAe,CAAC;AACvB,OAAO,EAEN,KAAK,kBAAkB,EAGvB,KAAK,oBAAoB,EACzB,KAAK,8BAA8B,EACnC,MAAM,kBAAkB,CAAC;AAE1B,MAAM,WAAW,yBAAyB;IACzC,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,OAAO,KAAK,CAAC;IACrB,OAAO,CAAC,EAAE,CACT,MAAM,EAAE,sBAAsB,GAAG,MAAM,EACvC,MAAM,EAAE,oBAAoB,KACxB,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACjC,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,IAAI,CAAC;CAClE;AA+DD,wBAAgB,2CAA2C,CAC1D,QAAQ,EAAE,8BAA8B,EACxC,OAAO,GAAE,yBAA8B,GACrC,8BAA8B,CA8BhC"}
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Final optional IMAGE_DESCRIPTION fallback layer.
3
+ *
4
+ * This mirrors the cloud wrapper shape but only runs when the previous
5
+ * handler explicitly returned a typed fallback outcome.
6
+ */
7
+
8
+ import type {
9
+ ImageDescriptionParams,
10
+ ImageDescriptionResult,
11
+ } from "@elizaos/core";
12
+ import {
13
+ isVisionFallbackOutcome,
14
+ type LocalVisionOutcome,
15
+ type LocalVisionResult,
16
+ normalizeVisionDescription,
17
+ type VisionFallbackReason,
18
+ type WrappedImageDescriptionHandler,
19
+ } from "./cloud-fallback";
20
+
21
+ export interface VisionVastFallbackOptions {
22
+ enabled?: boolean;
23
+ apiKey?: string;
24
+ baseUrl?: string;
25
+ fetch?: typeof fetch;
26
+ handler?: (
27
+ params: ImageDescriptionParams | string,
28
+ reason: VisionFallbackReason,
29
+ ) => Promise<LocalVisionOutcome>;
30
+ log?: (message: string, detail?: Record<string, unknown>) => void;
31
+ }
32
+
33
+ function resolveVastApiKey(options: VisionVastFallbackOptions): string | null {
34
+ return (
35
+ options.apiKey?.trim() || process.env.ELIZA_VAST_API_KEY?.trim() || null
36
+ );
37
+ }
38
+
39
+ function resolveVastBaseUrl(options: VisionVastFallbackOptions): string {
40
+ return (
41
+ options.baseUrl?.trim() ||
42
+ process.env.ELIZA_VAST_BASE_URL?.trim() ||
43
+ "https://api.vast.ai"
44
+ ).replace(/\/+$/, "");
45
+ }
46
+
47
+ function imageRequestBody(params: ImageDescriptionParams | string): {
48
+ image: { kind: "url"; url: string } | { kind: "data"; data: string };
49
+ prompt?: string;
50
+ } {
51
+ if (typeof params === "string") {
52
+ return params.startsWith("data:")
53
+ ? { image: { kind: "data", data: params } }
54
+ : { image: { kind: "url", url: params } };
55
+ }
56
+ const imageUrl = (params as { imageUrl?: string }).imageUrl;
57
+ const image = (params as { image?: string }).image;
58
+ const source = imageUrl ?? image;
59
+ const body = source?.startsWith("data:")
60
+ ? { image: { kind: "data" as const, data: source } }
61
+ : { image: { kind: "url" as const, url: source ?? "" } };
62
+ if (params.prompt) return { ...body, prompt: params.prompt };
63
+ return body;
64
+ }
65
+
66
+ async function callVastVision(
67
+ params: ImageDescriptionParams | string,
68
+ options: VisionVastFallbackOptions,
69
+ ): Promise<ImageDescriptionResult> {
70
+ const apiKey = resolveVastApiKey(options);
71
+ if (!apiKey) {
72
+ throw new Error("VAST image fallback is not configured");
73
+ }
74
+ const fetchImpl = options.fetch ?? fetch;
75
+ const response = await fetchImpl(
76
+ `${resolveVastBaseUrl(options)}/v1/vision/describe`,
77
+ {
78
+ method: "POST",
79
+ headers: {
80
+ "content-type": "application/json",
81
+ authorization: `Bearer ${apiKey}`,
82
+ },
83
+ body: JSON.stringify(imageRequestBody(params)),
84
+ },
85
+ );
86
+ if (!response.ok) {
87
+ throw new Error(`VAST image fallback failed with ${response.status}`);
88
+ }
89
+ return normalizeVisionDescription(
90
+ (await response.json()) as LocalVisionResult,
91
+ );
92
+ }
93
+
94
+ export function wrapImageDescriptionHandlerWithVastFallback(
95
+ previous: WrappedImageDescriptionHandler,
96
+ options: VisionVastFallbackOptions = {},
97
+ ): WrappedImageDescriptionHandler {
98
+ const enabled = options.enabled ?? true;
99
+ const log = options.log ?? (() => undefined);
100
+ return async (params): Promise<LocalVisionOutcome> => {
101
+ const outcome = await previous(params);
102
+ if (!isVisionFallbackOutcome(outcome)) {
103
+ return normalizeVisionDescription(outcome);
104
+ }
105
+ if (!enabled) return outcome;
106
+
107
+ const apiKey = resolveVastApiKey(options);
108
+ if (!options.handler && !apiKey) return outcome;
109
+
110
+ log("[vision/vast-fallback] upstream IMAGE_DESCRIPTION fallback", {
111
+ reason: outcome.reason,
112
+ });
113
+ try {
114
+ const vastOutcome = options.handler
115
+ ? await options.handler(params, outcome.reason)
116
+ : await callVastVision(params, options);
117
+ if (isVisionFallbackOutcome(vastOutcome)) return vastOutcome;
118
+ return normalizeVisionDescription(vastOutcome);
119
+ } catch (error) {
120
+ return {
121
+ kind: "fallback",
122
+ reason: "vast-error",
123
+ cause: error instanceof Error ? error : new Error(String(error)),
124
+ };
125
+ }
126
+ };
127
+ }