@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (676) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +81 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  11. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  12. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  13. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  14. package/src/adapters/capacitor-llama/environment.ts +71 -0
  15. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  16. package/src/adapters/capacitor-llama/index.ts +807 -0
  17. package/src/adapters/capacitor-llama/loader.ts +109 -0
  18. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  19. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  20. package/src/adapters/capacitor-llama/types.ts +374 -0
  21. package/src/backends/apple-foundation.ts +127 -0
  22. package/src/index.d.ts +7 -0
  23. package/src/index.d.ts.map +1 -0
  24. package/src/index.ts +54 -0
  25. package/src/local-inference-routes.d.ts +38 -0
  26. package/src/local-inference-routes.d.ts.map +1 -0
  27. package/src/local-inference-routes.test.ts +344 -0
  28. package/src/local-inference-routes.ts +1543 -0
  29. package/src/provider.d.ts +21 -0
  30. package/src/provider.d.ts.map +1 -0
  31. package/src/provider.ts +1171 -0
  32. package/src/routes/compat-helpers.d.ts +18 -0
  33. package/src/routes/compat-helpers.d.ts.map +1 -0
  34. package/src/routes/compat-helpers.ts +274 -0
  35. package/src/routes/family-member-route.d.ts +62 -0
  36. package/src/routes/family-member-route.d.ts.map +1 -0
  37. package/src/routes/family-member-route.ts +353 -0
  38. package/src/routes/index.d.ts +19 -0
  39. package/src/routes/index.d.ts.map +1 -0
  40. package/src/routes/index.ts +60 -0
  41. package/src/routes/live-diarization-route.d.ts +26 -0
  42. package/src/routes/live-diarization-route.d.ts.map +1 -0
  43. package/src/routes/live-diarization-route.test.ts +213 -0
  44. package/src/routes/live-diarization-route.ts +122 -0
  45. package/src/routes/local-inference-asr-route.d.ts +4 -0
  46. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  47. package/src/routes/local-inference-asr-route.test.ts +190 -0
  48. package/src/routes/local-inference-asr-route.ts +213 -0
  49. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  50. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  51. package/src/routes/local-inference-compat-routes.test.ts +423 -0
  52. package/src/routes/local-inference-compat-routes.ts +782 -0
  53. package/src/routes/local-inference-tts-route.d.ts +7 -0
  54. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  55. package/src/routes/local-inference-tts-route.test.ts +179 -0
  56. package/src/routes/local-inference-tts-route.ts +230 -0
  57. package/src/routes/voice-first-run-routes.d.ts +62 -0
  58. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  59. package/src/routes/voice-first-run-routes.ts +524 -0
  60. package/src/routes/voice-models-routes.d.ts +62 -0
  61. package/src/routes/voice-models-routes.d.ts.map +1 -0
  62. package/src/routes/voice-models-routes.ts +554 -0
  63. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  64. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  65. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  66. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  67. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  68. package/src/routes/voice-profiles-management-routes.ts +476 -0
  69. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  70. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  71. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  72. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  73. package/src/runtime/capacitor-llama.d.ts +25 -0
  74. package/src/runtime/embedding-manager-support.d.ts +77 -0
  75. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  76. package/src/runtime/embedding-manager-support.ts +497 -0
  77. package/src/runtime/embedding-presets.d.ts +16 -0
  78. package/src/runtime/embedding-presets.d.ts.map +1 -0
  79. package/src/runtime/embedding-presets.ts +81 -0
  80. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  81. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  82. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  83. package/src/runtime/embedding-warmup-policy.ts +48 -0
  84. package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
  85. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  86. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  87. package/src/runtime/ensure-local-inference-handler.ts +1398 -0
  88. package/src/runtime/index.d.ts +14 -0
  89. package/src/runtime/index.d.ts.map +1 -0
  90. package/src/runtime/index.ts +27 -0
  91. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  92. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  93. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  94. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  95. package/src/runtime/voice-entity-binding.d.ts +103 -0
  96. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  97. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  98. package/src/runtime/voice-entity-binding.ts +328 -0
  99. package/src/services/README.md +71 -0
  100. package/src/services/__tests__/backend-selector.test.ts +101 -0
  101. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  102. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  103. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  104. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  105. package/src/services/__tests__/runtime-target.test.ts +176 -0
  106. package/src/services/active-model-switch-rollback.test.ts +183 -0
  107. package/src/services/active-model.d.ts +282 -0
  108. package/src/services/active-model.d.ts.map +1 -0
  109. package/src/services/active-model.ts +1213 -0
  110. package/src/services/asr/errors.d.ts +21 -0
  111. package/src/services/asr/errors.d.ts.map +1 -0
  112. package/src/services/asr/errors.ts +50 -0
  113. package/src/services/asr/hash.d.ts +28 -0
  114. package/src/services/asr/hash.d.ts.map +1 -0
  115. package/src/services/asr/hash.ts +49 -0
  116. package/src/services/asr/index.d.ts +76 -0
  117. package/src/services/asr/index.d.ts.map +1 -0
  118. package/src/services/asr/index.ts +178 -0
  119. package/src/services/asr/types.d.ts +91 -0
  120. package/src/services/asr/types.d.ts.map +1 -0
  121. package/src/services/asr/types.ts +95 -0
  122. package/src/services/assignments.d.ts +71 -0
  123. package/src/services/assignments.d.ts.map +1 -0
  124. package/src/services/assignments.test.ts +80 -0
  125. package/src/services/assignments.ts +230 -0
  126. package/src/services/backend-selector.ts +95 -0
  127. package/src/services/backend.d.ts +346 -0
  128. package/src/services/backend.d.ts.map +1 -0
  129. package/src/services/backend.ts +612 -0
  130. package/src/services/bundled-models.d.ts +34 -0
  131. package/src/services/bundled-models.d.ts.map +1 -0
  132. package/src/services/bundled-models.ts +129 -0
  133. package/src/services/cache-bridge.d.ts +206 -0
  134. package/src/services/cache-bridge.d.ts.map +1 -0
  135. package/src/services/cache-bridge.test.ts +516 -0
  136. package/src/services/cache-bridge.ts +423 -0
  137. package/src/services/catalog.d.ts +10 -0
  138. package/src/services/catalog.d.ts.map +1 -0
  139. package/src/services/catalog.test.ts +240 -0
  140. package/src/services/catalog.ts +27 -0
  141. package/src/services/checkpoint-client.d.ts +109 -0
  142. package/src/services/checkpoint-client.d.ts.map +1 -0
  143. package/src/services/checkpoint-client.ts +258 -0
  144. package/src/services/checkpoint-manager.ts +474 -0
  145. package/src/services/cloud-fallback.d.ts +102 -0
  146. package/src/services/cloud-fallback.d.ts.map +1 -0
  147. package/src/services/cloud-fallback.ts +230 -0
  148. package/src/services/conversation-registry.d.ts +142 -0
  149. package/src/services/conversation-registry.d.ts.map +1 -0
  150. package/src/services/conversation-registry.test.ts +235 -0
  151. package/src/services/conversation-registry.ts +264 -0
  152. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
  153. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  154. package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
  155. package/src/services/device-bridge.d.ts +188 -0
  156. package/src/services/device-bridge.d.ts.map +1 -0
  157. package/src/services/device-bridge.ts +1237 -0
  158. package/src/services/device-resource-metrics.d.ts +149 -0
  159. package/src/services/device-resource-metrics.d.ts.map +1 -0
  160. package/src/services/device-resource-metrics.test.ts +98 -0
  161. package/src/services/device-resource-metrics.ts +346 -0
  162. package/src/services/device-tier.d.ts +115 -0
  163. package/src/services/device-tier.d.ts.map +1 -0
  164. package/src/services/device-tier.test.ts +371 -0
  165. package/src/services/device-tier.ts +410 -0
  166. package/src/services/downloader.d.ts +82 -0
  167. package/src/services/downloader.d.ts.map +1 -0
  168. package/src/services/downloader.test.ts +724 -0
  169. package/src/services/downloader.ts +899 -0
  170. package/src/services/engine-direct-bundle.test.ts +58 -0
  171. package/src/services/engine-streaming.test.ts +80 -0
  172. package/src/services/engine.d.ts +534 -0
  173. package/src/services/engine.d.ts.map +1 -0
  174. package/src/services/engine.ts +1891 -0
  175. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  176. package/src/services/ensure-local-artifacts.test.ts +368 -0
  177. package/src/services/ensure-local-artifacts.ts +351 -0
  178. package/src/services/external-scanner.d.ts +17 -0
  179. package/src/services/external-scanner.d.ts.map +1 -0
  180. package/src/services/external-scanner.ts +312 -0
  181. package/src/services/ffi-llm-mock.ts +354 -0
  182. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  183. package/src/services/ffi-streaming-backend.d.ts +180 -0
  184. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  185. package/src/services/ffi-streaming-backend.ts +382 -0
  186. package/src/services/ffi-streaming-runner.d.ts +122 -0
  187. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  188. package/src/services/ffi-streaming-runner.test.ts +60 -0
  189. package/src/services/ffi-streaming-runner.ts +354 -0
  190. package/src/services/ffi-unload-ordering.test.ts +162 -0
  191. package/src/services/gpu-autotune.ts +534 -0
  192. package/src/services/gpu-detect.ts +139 -0
  193. package/src/services/handler-registry.d.ts +72 -0
  194. package/src/services/handler-registry.d.ts.map +1 -0
  195. package/src/services/handler-registry.ts +240 -0
  196. package/src/services/hardware.d.ts +63 -0
  197. package/src/services/hardware.d.ts.map +1 -0
  198. package/src/services/hardware.test.ts +183 -0
  199. package/src/services/hardware.ts +404 -0
  200. package/src/services/hf-search.d.ts +26 -0
  201. package/src/services/hf-search.d.ts.map +1 -0
  202. package/src/services/hf-search.test.ts +69 -0
  203. package/src/services/hf-search.ts +420 -0
  204. package/src/services/image-description-runtime.d.ts +14 -0
  205. package/src/services/image-description-runtime.d.ts.map +1 -0
  206. package/src/services/image-description-runtime.test.ts +61 -0
  207. package/src/services/image-description-runtime.ts +118 -0
  208. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  209. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  210. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  211. package/src/services/imagegen/backend-selector.d.ts +118 -0
  212. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  213. package/src/services/imagegen/backend-selector.ts +281 -0
  214. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  215. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  216. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  217. package/src/services/imagegen/errors.d.ts +16 -0
  218. package/src/services/imagegen/errors.d.ts.map +1 -0
  219. package/src/services/imagegen/errors.ts +40 -0
  220. package/src/services/imagegen/index.d.ts +58 -0
  221. package/src/services/imagegen/index.d.ts.map +1 -0
  222. package/src/services/imagegen/index.ts +144 -0
  223. package/src/services/imagegen/mflux.d.ts +74 -0
  224. package/src/services/imagegen/mflux.d.ts.map +1 -0
  225. package/src/services/imagegen/mflux.ts +313 -0
  226. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  227. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  228. package/src/services/imagegen/sd-cpp.ts +718 -0
  229. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  230. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  231. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  232. package/src/services/imagegen/types.d.ts +181 -0
  233. package/src/services/imagegen/types.d.ts.map +1 -0
  234. package/src/services/imagegen/types.ts +193 -0
  235. package/src/services/index.d.ts +30 -0
  236. package/src/services/index.d.ts.map +1 -0
  237. package/src/services/index.ts +225 -0
  238. package/src/services/inference-capabilities.d.ts +132 -0
  239. package/src/services/inference-capabilities.d.ts.map +1 -0
  240. package/src/services/inference-capabilities.test.ts +75 -0
  241. package/src/services/inference-capabilities.ts +204 -0
  242. package/src/services/inference-telemetry.d.ts +59 -0
  243. package/src/services/inference-telemetry.d.ts.map +1 -0
  244. package/src/services/inference-telemetry.ts +143 -0
  245. package/src/services/ios-llama-streaming.ts +248 -0
  246. package/src/services/kv-spill.d.ts +189 -0
  247. package/src/services/kv-spill.d.ts.map +1 -0
  248. package/src/services/kv-spill.test.ts +222 -0
  249. package/src/services/kv-spill.ts +356 -0
  250. package/src/services/latency-trace.d.ts +346 -0
  251. package/src/services/latency-trace.d.ts.map +1 -0
  252. package/src/services/latency-trace.test.ts +266 -0
  253. package/src/services/latency-trace.ts +844 -0
  254. package/src/services/llama-server-metrics.ts +304 -0
  255. package/src/services/llm-streaming-binding.d.ts +96 -0
  256. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  257. package/src/services/llm-streaming-binding.ts +136 -0
  258. package/src/services/load-args.d.ts +82 -0
  259. package/src/services/load-args.d.ts.map +1 -0
  260. package/src/services/load-args.ts +81 -0
  261. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  262. package/src/services/manifest/index.d.ts +4 -0
  263. package/src/services/manifest/index.d.ts.map +1 -0
  264. package/src/services/manifest/index.ts +66 -0
  265. package/src/services/manifest/manifest.test.ts +693 -0
  266. package/src/services/manifest/schema.d.ts +715 -0
  267. package/src/services/manifest/schema.d.ts.map +1 -0
  268. package/src/services/manifest/schema.ts +655 -0
  269. package/src/services/manifest/types.d.ts +30 -0
  270. package/src/services/manifest/types.d.ts.map +1 -0
  271. package/src/services/manifest/types.ts +55 -0
  272. package/src/services/manifest/validator.d.ts +66 -0
  273. package/src/services/manifest/validator.d.ts.map +1 -0
  274. package/src/services/manifest/validator.ts +569 -0
  275. package/src/services/memory-arbiter.d.ts +343 -0
  276. package/src/services/memory-arbiter.d.ts.map +1 -0
  277. package/src/services/memory-arbiter.test.ts +419 -0
  278. package/src/services/memory-arbiter.ts +1000 -0
  279. package/src/services/memory-monitor.d.ts +119 -0
  280. package/src/services/memory-monitor.d.ts.map +1 -0
  281. package/src/services/memory-monitor.test.ts +208 -0
  282. package/src/services/memory-monitor.ts +296 -0
  283. package/src/services/memory-pressure.d.ts +127 -0
  284. package/src/services/memory-pressure.d.ts.map +1 -0
  285. package/src/services/memory-pressure.ts +413 -0
  286. package/src/services/mtp-doctor.d.ts +13 -0
  287. package/src/services/mtp-doctor.d.ts.map +1 -0
  288. package/src/services/mtp-doctor.ts +78 -0
  289. package/src/services/network-policy.d.ts +127 -0
  290. package/src/services/network-policy.d.ts.map +1 -0
  291. package/src/services/network-policy.ts +346 -0
  292. package/src/services/paths.d.ts +6 -0
  293. package/src/services/paths.d.ts.map +1 -0
  294. package/src/services/paths.ts +25 -0
  295. package/src/services/planner-skeleton.d.ts +124 -0
  296. package/src/services/planner-skeleton.d.ts.map +1 -0
  297. package/src/services/planner-skeleton.ts +175 -0
  298. package/src/services/providers.d.ts +38 -0
  299. package/src/services/providers.d.ts.map +1 -0
  300. package/src/services/providers.ts +507 -0
  301. package/src/services/ram-budget-cache.test.ts +163 -0
  302. package/src/services/ram-budget.d.ts +110 -0
  303. package/src/services/ram-budget.d.ts.map +1 -0
  304. package/src/services/ram-budget.ts +0 -0
  305. package/src/services/readiness.d.ts +9 -0
  306. package/src/services/readiness.d.ts.map +1 -0
  307. package/src/services/readiness.test.ts +87 -0
  308. package/src/services/readiness.ts +238 -0
  309. package/src/services/recommendation.d.ts +111 -0
  310. package/src/services/recommendation.d.ts.map +1 -0
  311. package/src/services/recommendation.ts +672 -0
  312. package/src/services/registry.d.ts +35 -0
  313. package/src/services/registry.d.ts.map +1 -0
  314. package/src/services/registry.ts +151 -0
  315. package/src/services/router-handler.d.ts +92 -0
  316. package/src/services/router-handler.d.ts.map +1 -0
  317. package/src/services/router-handler.test.ts +45 -0
  318. package/src/services/router-handler.ts +376 -0
  319. package/src/services/routing-policy.d.ts +55 -0
  320. package/src/services/routing-policy.d.ts.map +1 -0
  321. package/src/services/routing-policy.ts +228 -0
  322. package/src/services/routing-preferences.d.ts +8 -0
  323. package/src/services/routing-preferences.d.ts.map +1 -0
  324. package/src/services/routing-preferences.ts +15 -0
  325. package/src/services/runtime-target.d.ts +98 -0
  326. package/src/services/runtime-target.d.ts.map +1 -0
  327. package/src/services/runtime-target.ts +154 -0
  328. package/src/services/service.d.ts +128 -0
  329. package/src/services/service.d.ts.map +1 -0
  330. package/src/services/service.test.ts +223 -0
  331. package/src/services/service.ts +735 -0
  332. package/src/services/session-pool.d.ts +72 -0
  333. package/src/services/session-pool.d.ts.map +1 -0
  334. package/src/services/session-pool.ts +153 -0
  335. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  336. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  337. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  338. package/src/services/structured-output/deterministic-repair.ts +443 -0
  339. package/src/services/structured-output/index.ts +4 -0
  340. package/src/services/structured-output.d.ts +311 -0
  341. package/src/services/structured-output.d.ts.map +1 -0
  342. package/src/services/structured-output.test.ts +483 -0
  343. package/src/services/structured-output.ts +712 -0
  344. package/src/services/transcription-priority.test.ts +211 -0
  345. package/src/services/tts/errors.ts +46 -0
  346. package/src/services/tts/index.ts +214 -0
  347. package/src/services/tts/tts-audio-cache.ts +235 -0
  348. package/src/services/tts/types.ts +157 -0
  349. package/src/services/types.d.ts +19 -0
  350. package/src/services/types.d.ts.map +1 -0
  351. package/src/services/types.ts +55 -0
  352. package/src/services/verify-on-device.d.ts +34 -0
  353. package/src/services/verify-on-device.d.ts.map +1 -0
  354. package/src/services/verify-on-device.test.ts +87 -0
  355. package/src/services/verify-on-device.ts +127 -0
  356. package/src/services/verify.d.ts +8 -0
  357. package/src/services/verify.d.ts.map +1 -0
  358. package/src/services/verify.ts +13 -0
  359. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  360. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  361. package/src/services/vision/aosp-unavailable.ts +163 -0
  362. package/src/services/vision/capacitor-llama.d.ts +99 -0
  363. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  364. package/src/services/vision/capacitor-llama.ts +255 -0
  365. package/src/services/vision/cloud-fallback.d.ts +47 -0
  366. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  367. package/src/services/vision/cloud-fallback.test.ts +243 -0
  368. package/src/services/vision/cloud-fallback.ts +268 -0
  369. package/src/services/vision/fallback-chain.test.ts +86 -0
  370. package/src/services/vision/hash.d.ts +71 -0
  371. package/src/services/vision/hash.d.ts.map +1 -0
  372. package/src/services/vision/hash.ts +157 -0
  373. package/src/services/vision/index.d.ts +95 -0
  374. package/src/services/vision/index.d.ts.map +1 -0
  375. package/src/services/vision/index.ts +251 -0
  376. package/src/services/vision/llama-server.d.ts +73 -0
  377. package/src/services/vision/llama-server.d.ts.map +1 -0
  378. package/src/services/vision/llama-server.ts +177 -0
  379. package/src/services/vision/types.d.ts +153 -0
  380. package/src/services/vision/types.d.ts.map +1 -0
  381. package/src/services/vision/types.ts +154 -0
  382. package/src/services/vision/vast-fallback.d.ts +18 -0
  383. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  384. package/src/services/vision/vast-fallback.ts +127 -0
  385. package/src/services/vision-embedding-cache.d.ts +98 -0
  386. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  387. package/src/services/vision-embedding-cache.ts +189 -0
  388. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  389. package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
  390. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  391. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  392. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  393. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  394. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  395. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  396. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  397. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  398. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  399. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  400. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  401. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
  402. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  403. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  404. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  405. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  406. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  407. package/src/services/voice/audio-frame-consumer.ts +491 -0
  408. package/src/services/voice/barge-in.d.ts +112 -0
  409. package/src/services/voice/barge-in.d.ts.map +1 -0
  410. package/src/services/voice/barge-in.test.ts +244 -0
  411. package/src/services/voice/barge-in.ts +336 -0
  412. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  413. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  414. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  415. package/src/services/voice/cancellation-coordinator.ts +269 -0
  416. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  417. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  418. package/src/services/voice/checkpoint-manager.ts +401 -0
  419. package/src/services/voice/checkpoint-policy.ts +336 -0
  420. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  421. package/src/services/voice/e2e-harness.test.ts +182 -0
  422. package/src/services/voice/e2e-harness.ts +743 -0
  423. package/src/services/voice/eager-context-builder.d.ts +170 -0
  424. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  425. package/src/services/voice/eager-context-builder.ts +262 -0
  426. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  427. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  428. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  429. package/src/services/voice/embedding-server.ts +200 -0
  430. package/src/services/voice/embedding.d.ts +133 -0
  431. package/src/services/voice/embedding.d.ts.map +1 -0
  432. package/src/services/voice/embedding.test.ts +148 -0
  433. package/src/services/voice/embedding.ts +244 -0
  434. package/src/services/voice/emotion-attribution.d.ts +68 -0
  435. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  436. package/src/services/voice/emotion-attribution.test.ts +129 -0
  437. package/src/services/voice/emotion-attribution.ts +361 -0
  438. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  439. package/src/services/voice/engine-bridge.d.ts +746 -0
  440. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  441. package/src/services/voice/engine-bridge.test.ts +384 -0
  442. package/src/services/voice/engine-bridge.ts +2226 -0
  443. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  444. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  445. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  446. package/src/services/voice/eot-classifier.d.ts +214 -0
  447. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  448. package/src/services/voice/eot-classifier.ts +533 -0
  449. package/src/services/voice/errors.d.ts +20 -0
  450. package/src/services/voice/errors.d.ts.map +1 -0
  451. package/src/services/voice/errors.ts +32 -0
  452. package/src/services/voice/expressive-tags.d.ts +158 -0
  453. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  454. package/src/services/voice/expressive-tags.ts +405 -0
  455. package/src/services/voice/ffi-bindings.d.ts +636 -0
  456. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  457. package/src/services/voice/ffi-bindings.test.ts +671 -0
  458. package/src/services/voice/ffi-bindings.ts +3050 -0
  459. package/src/services/voice/first-line-cache.d.ts +181 -0
  460. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  461. package/src/services/voice/first-line-cache.ts +725 -0
  462. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  463. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  464. package/src/services/voice/fused-eot-scorer.ts +135 -0
  465. package/src/services/voice/index.d.ts +91 -0
  466. package/src/services/voice/index.d.ts.map +1 -0
  467. package/src/services/voice/index.ts +481 -0
  468. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  469. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  470. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  471. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  472. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  473. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  474. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  475. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  476. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  477. package/src/services/voice/kokoro/index.ts +79 -0
  478. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  479. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  480. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  481. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  482. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  483. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  484. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  485. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  486. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  487. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  488. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  489. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  490. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  491. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  492. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  493. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  494. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  495. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  496. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  497. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  498. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  499. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  501. package/src/services/voice/kokoro/types.d.ts +82 -0
  502. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  503. package/src/services/voice/kokoro/types.ts +95 -0
  504. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  505. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  506. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  507. package/src/services/voice/kokoro/voices.d.ts +30 -0
  508. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  509. package/src/services/voice/kokoro/voices.ts +64 -0
  510. package/src/services/voice/lifecycle.d.ts +135 -0
  511. package/src/services/voice/lifecycle.d.ts.map +1 -0
  512. package/src/services/voice/lifecycle.test.ts +315 -0
  513. package/src/services/voice/lifecycle.ts +301 -0
  514. package/src/services/voice/live-diarization-session.d.ts +96 -0
  515. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  516. package/src/services/voice/live-diarization-session.ts +289 -0
  517. package/src/services/voice/mic-source.d.ts +136 -0
  518. package/src/services/voice/mic-source.d.ts.map +1 -0
  519. package/src/services/voice/mic-source.test.ts +210 -0
  520. package/src/services/voice/mic-source.ts +503 -0
  521. package/src/services/voice/optimistic-policy.d.ts +109 -0
  522. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  523. package/src/services/voice/optimistic-policy.test.ts +101 -0
  524. package/src/services/voice/optimistic-policy.ts +192 -0
  525. package/src/services/voice/optimistic-rollback.ts +343 -0
  526. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  527. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  528. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  529. package/src/services/voice/partial-stabilizer.ts +140 -0
  530. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  531. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  532. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  533. package/src/services/voice/phrase-cache.d.ts +76 -0
  534. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  535. package/src/services/voice/phrase-cache.test.ts +242 -0
  536. package/src/services/voice/phrase-cache.ts +186 -0
  537. package/src/services/voice/phrase-chunker.d.ts +62 -0
  538. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  539. package/src/services/voice/phrase-chunker.test.ts +239 -0
  540. package/src/services/voice/phrase-chunker.ts +281 -0
  541. package/src/services/voice/pipeline-impls.d.ts +151 -0
  542. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  543. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  544. package/src/services/voice/pipeline-impls.test.ts +292 -0
  545. package/src/services/voice/pipeline-impls.ts +315 -0
  546. package/src/services/voice/pipeline.d.ts +216 -0
  547. package/src/services/voice/pipeline.d.ts.map +1 -0
  548. package/src/services/voice/pipeline.ts +505 -0
  549. package/src/services/voice/prefill-client.d.ts +123 -0
  550. package/src/services/voice/prefill-client.d.ts.map +1 -0
  551. package/src/services/voice/prefill-client.ts +316 -0
  552. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  553. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  554. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  555. package/src/services/voice/profile-store.d.ts +248 -0
  556. package/src/services/voice/profile-store.d.ts.map +1 -0
  557. package/src/services/voice/profile-store.ts +887 -0
  558. package/src/services/voice/ring-buffer.d.ts +40 -0
  559. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  560. package/src/services/voice/ring-buffer.ts +105 -0
  561. package/src/services/voice/rollback-queue.d.ts +24 -0
  562. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  563. package/src/services/voice/rollback-queue.ts +74 -0
  564. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  565. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  566. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  567. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  568. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  569. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  570. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  571. package/src/services/voice/scheduler.d.ts +146 -0
  572. package/src/services/voice/scheduler.d.ts.map +1 -0
  573. package/src/services/voice/scheduler.t2.test.ts +141 -0
  574. package/src/services/voice/scheduler.ts +927 -0
  575. package/src/services/voice/shared-resources.d.ts +190 -0
  576. package/src/services/voice/shared-resources.d.ts.map +1 -0
  577. package/src/services/voice/shared-resources.ts +320 -0
  578. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  579. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  580. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  581. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  582. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  583. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  584. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  585. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  586. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  587. package/src/services/voice/speaker/diarizer.ts +218 -0
  588. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  589. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  590. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  591. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  592. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  593. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  594. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  595. package/src/services/voice/speaker/encoder.d.ts +37 -0
  596. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  597. package/src/services/voice/speaker/encoder.ts +105 -0
  598. package/src/services/voice/speaker-imprint.d.ts +83 -0
  599. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  600. package/src/services/voice/speaker-imprint.test.ts +185 -0
  601. package/src/services/voice/speaker-imprint.ts +312 -0
  602. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  603. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  604. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  605. package/src/services/voice/speaker-preset-cache.ts +195 -0
  606. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  607. package/src/services/voice/system-audio-sink.d.ts +73 -0
  608. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  609. package/src/services/voice/system-audio-sink.test.ts +29 -0
  610. package/src/services/voice/system-audio-sink.ts +366 -0
  611. package/src/services/voice/transcriber.d.ts +244 -0
  612. package/src/services/voice/transcriber.d.ts.map +1 -0
  613. package/src/services/voice/transcriber.test.ts +392 -0
  614. package/src/services/voice/transcriber.ts +704 -0
  615. package/src/services/voice/turn-controller.d.ts +183 -0
  616. package/src/services/voice/turn-controller.d.ts.map +1 -0
  617. package/src/services/voice/turn-controller.test.ts +575 -0
  618. package/src/services/voice/turn-controller.ts +596 -0
  619. package/src/services/voice/types.d.ts +643 -0
  620. package/src/services/voice/types.d.ts.map +1 -0
  621. package/src/services/voice/types.ts +699 -0
  622. package/src/services/voice/vad.d.ts +282 -0
  623. package/src/services/voice/vad.d.ts.map +1 -0
  624. package/src/services/voice/vad.test.ts +480 -0
  625. package/src/services/voice/vad.ts +827 -0
  626. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  627. package/src/services/voice/voice-budget.d.ts +241 -0
  628. package/src/services/voice/voice-budget.d.ts.map +1 -0
  629. package/src/services/voice/voice-budget.test.ts +420 -0
  630. package/src/services/voice/voice-budget.ts +656 -0
  631. package/src/services/voice/voice-duet.test.ts +375 -0
  632. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  633. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  634. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  635. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  636. package/src/services/voice/voice-preset-format.d.ts +158 -0
  637. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  638. package/src/services/voice/voice-preset-format.ts +700 -0
  639. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  640. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  641. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  642. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  643. package/src/services/voice/voice-profile-artifact.ts +518 -0
  644. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  645. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  646. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  647. package/src/services/voice/voice-profile-routes.ts +425 -0
  648. package/src/services/voice/voice-scenario.ts +154 -0
  649. package/src/services/voice/voice-settings.d.ts +82 -0
  650. package/src/services/voice/voice-settings.d.ts.map +1 -0
  651. package/src/services/voice/voice-settings.ts +172 -0
  652. package/src/services/voice/voice-state-machine.d.ts +364 -0
  653. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  654. package/src/services/voice/voice-state-machine.ts +727 -0
  655. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  656. package/src/services/voice/voice-workbench-report.ts +326 -0
  657. package/src/services/voice/voice-workbench.test.ts +158 -0
  658. package/src/services/voice/voice.test.ts +1070 -0
  659. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  660. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  661. package/src/services/voice/wake-word-ggml.ts +320 -0
  662. package/src/services/voice/wake-word.d.ts +255 -0
  663. package/src/services/voice/wake-word.d.ts.map +1 -0
  664. package/src/services/voice/wake-word.test.ts +298 -0
  665. package/src/services/voice/wake-word.ts +554 -0
  666. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  667. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  668. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  669. package/src/services/voice-model-updater.d.ts +240 -0
  670. package/src/services/voice-model-updater.d.ts.map +1 -0
  671. package/src/services/voice-model-updater.ts +724 -0
  672. package/src/services/voice-prewarm.d.ts +3 -0
  673. package/src/services/voice-prewarm.d.ts.map +1 -0
  674. package/src/services/voice-prewarm.ts +51 -0
  675. package/dist/index.d.ts +0 -37
  676. package/dist/index.js +0 -1098
@@ -0,0 +1,157 @@
1
+ /**
2
+ * Vision-embedding cache key derivation (WS2).
3
+ *
4
+ * The arbiter's vision-embedding cache (WS1, `vision-embedding-cache.ts`)
5
+ * is keyed by SHA-256 of a *normalized* representation of the input
6
+ * image. The normalization step is what makes the cache useful across
7
+ * platforms: two JPEG encodings of the same screenshot, or an RGBA vs
8
+ * RGB frame captured by different platforms, must hash to the same key
9
+ * or the cache hit rate collapses.
10
+ *
11
+ * Normalization is deliberately minimal:
12
+ *
13
+ * 1. Resolve the input to raw bytes (decoding base64/data-url wrappers).
14
+ * 2. Hash with the model-family prefix so the cache can hold tokens
15
+ * for multiple VL families without collision.
16
+ *
17
+ * What we DO NOT do here:
18
+ *
19
+ * - Resize the image. The backend's projector enforces its own input
20
+ * resolution; the bytes the projector sees are what gets projected.
21
+ * Re-encoding here would add work without changing the hit rate
22
+ * (the platform-provided buffer is already at the camera's native
23
+ * resolution).
24
+ * - Strip JPEG/PNG headers. They contribute to the hash; two
25
+ * reencodings of the same pixel array land in different cache
26
+ * slots intentionally. Reuse only the exact same byte stream.
27
+ *
28
+ * If a downstream caller wants finer-grained cache hits (e.g. dedupe
29
+ * across re-encodings of the same screen frame), it should decode to
30
+ * RGBA pixels itself and call `hashRawPixels`. The default
31
+ * `hashVisionInput` path is the conservative, byte-stream-only path.
32
+ */
33
+
34
+ import { createHash } from "node:crypto";
35
+ import type { VisionImageInput } from "./types";
36
+
37
+ const DEFAULT_FAMILY = "qwen3-vl";
38
+
39
+ /**
40
+ * Resolve a `VisionImageInput` to its raw bytes. Returns the decoded
41
+ * payload plus an optional MIME type the caller can forward to the
42
+ * backend. Throws on `url:` inputs — those must be fetched by the
43
+ * caller; the hash step does not own HTTP.
44
+ */
45
+ export function resolveImageBytes(input: VisionImageInput): {
46
+ bytes: Uint8Array;
47
+ mimeType?: string;
48
+ } {
49
+ switch (input.kind) {
50
+ case "bytes":
51
+ return { bytes: input.bytes, mimeType: input.mimeType };
52
+ case "base64": {
53
+ const bytes = Uint8Array.from(Buffer.from(input.base64, "base64"));
54
+ return { bytes, mimeType: input.mimeType };
55
+ }
56
+ case "dataUrl": {
57
+ const match = /^data:([^;,]+)(?:;[^,]*)?,(.*)$/s.exec(input.dataUrl);
58
+ if (!match) {
59
+ throw new Error(
60
+ "[vision/hash] malformed data URL — expected data:<mime>;base64,<payload>",
61
+ );
62
+ }
63
+ const mimeType = match[1];
64
+ const payload = match[2];
65
+ const isBase64 = /;base64/i.test(input.dataUrl);
66
+ const bytes = Uint8Array.from(
67
+ Buffer.from(payload, isBase64 ? "base64" : "utf8"),
68
+ );
69
+ return { bytes, mimeType };
70
+ }
71
+ case "url":
72
+ throw new Error(
73
+ "[vision/hash] url inputs must be fetched by the caller before hashing — the hash step does not own HTTP",
74
+ );
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Hash an opaque byte stream with the model-family prefix. The result
80
+ * is stable across processes and platforms (Node, Bun, and the
81
+ * Capacitor JS bridge all return the same hex string for the same
82
+ * input).
83
+ */
84
+ export function hashImageBytes(
85
+ bytes: Uint8Array,
86
+ modelFamily: string = DEFAULT_FAMILY,
87
+ ): string {
88
+ const h = createHash("sha256");
89
+ h.update(modelFamily);
90
+ // Length prefix prevents a `family || bytes` collision against a
91
+ // crafted family string that ends with the leading bytes of the
92
+ // payload. Cheap, defensible.
93
+ const lenBuf = Buffer.alloc(4);
94
+ lenBuf.writeUInt32BE(bytes.byteLength, 0);
95
+ h.update(lenBuf);
96
+ h.update(bytes);
97
+ return h.digest("hex");
98
+ }
99
+
100
+ /**
101
+ * Hash a raw pixel buffer (RGBA / RGB / BGRA / BGR). The channel order
102
+ * is folded into the prefix so the same image captured on two different
103
+ * platforms (Android = RGBA, macOS screenshot = BGRA) produces the same
104
+ * key when normalized. Width / height are also included so the cache
105
+ * doesn't conflate two scaled versions of the same source.
106
+ */
107
+ export function hashRawPixels(args: {
108
+ bytes: Uint8Array;
109
+ width: number;
110
+ height: number;
111
+ channelOrder: "rgba" | "rgb" | "bgra" | "bgr";
112
+ modelFamily?: string;
113
+ }): string {
114
+ const h = createHash("sha256");
115
+ h.update(args.modelFamily ?? DEFAULT_FAMILY);
116
+ h.update("|raw|");
117
+ const prefix = Buffer.alloc(12);
118
+ prefix.writeUInt32BE(args.width, 0);
119
+ prefix.writeUInt32BE(args.height, 4);
120
+ prefix.write(args.channelOrder.padEnd(4, " "), 8, "ascii");
121
+ h.update(prefix);
122
+ // Channel-order normalization: rewrite BGRA→RGBA and BGR→RGB in
123
+ // place into a new buffer so all three platforms land on the same
124
+ // hash even when the input buffer order differs.
125
+ const normalized = normalizeChannels(args.bytes, args.channelOrder);
126
+ h.update(normalized);
127
+ return h.digest("hex");
128
+ }
129
+
130
+ function normalizeChannels(
131
+ bytes: Uint8Array,
132
+ order: "rgba" | "rgb" | "bgra" | "bgr",
133
+ ): Uint8Array {
134
+ if (order === "rgba" || order === "rgb") return bytes;
135
+ const stride = order === "bgra" ? 4 : 3;
136
+ const out = new Uint8Array(bytes.byteLength);
137
+ for (let i = 0; i + stride <= bytes.byteLength; i += stride) {
138
+ out[i] = bytes[i + 2];
139
+ out[i + 1] = bytes[i + 1];
140
+ out[i + 2] = bytes[i];
141
+ if (stride === 4) out[i + 3] = bytes[i + 3];
142
+ }
143
+ return out;
144
+ }
145
+
146
+ /**
147
+ * Convenience wrapper used by the provider: takes a `VisionImageInput`
148
+ * and a model family, returns the cache key. URL inputs throw —
149
+ * callers must fetch first.
150
+ */
151
+ export function hashVisionInput(
152
+ input: VisionImageInput,
153
+ modelFamily: string = DEFAULT_FAMILY,
154
+ ): string {
155
+ const { bytes } = resolveImageBytes(input);
156
+ return hashImageBytes(bytes, modelFamily);
157
+ }
@@ -0,0 +1,95 @@
1
+ /**
2
+ * Vision-describe capability (WS2) — public entry point.
3
+ *
4
+ * This module is what plugin-vision (WS4), the IMAGE_DESCRIPTION
5
+ * handler in `provider.ts`, and computer-use (WS9) import to register
6
+ * vision capability with the WS1 MemoryArbiter.
7
+ *
8
+ * Wiring:
9
+ *
10
+ * const arbiter = service.getMemoryArbiter();
11
+ * const registration = createVisionCapabilityRegistration({
12
+ * loader: createDefaultVisionLoader({ ... }),
13
+ * arbiterCache: arbiter,
14
+ * });
15
+ * arbiter.registerCapability(registration);
16
+ *
17
+ * `createVisionCapabilityRegistration` wraps the underlying backend so
18
+ * the arbiter's `run(request)` path:
19
+ *
20
+ * 1. Hashes the request's image bytes (model-family-scoped).
21
+ * 2. Checks the arbiter's vision-embedding cache.
22
+ * 3. On miss: calls `backend.describe(request)`, lets the backend
23
+ * run its own projector + decoder. Backends that cannot expose projected
24
+ * tokens return decoder text only, so the cache stays empty for this hash.
25
+ * The decoder text is what the caller wanted anyway.
26
+ * 4. On hit: calls `backend.describe(request, { projectedTokens })`.
27
+ * Backends that support pre-projected token reuse skip the
28
+ * projector entirely. Backends that don't ignore the hint; the
29
+ * result is still correct but the projector cost is paid again.
30
+ */
31
+ export { type AospLlamaMtmdBinding, type AospMtmdHandle, type LoadAospVisionBackendOptions, loadAospVisionBackend, } from "./aosp-unavailable";
32
+ export { type CapacitorLlamaMtmdBinding, type CapacitorLlamaMtmdHandle, type CapacitorLlamaVisionBackendOptions, loadCapacitorLlamaVisionBackend, VisionBackendUnavailableError, type VisionManagerLike, } from "./capacitor-llama";
33
+ export { classifyLocalVisionError, type LocalImageDescriptionHandler, type LocalVisionOutcome, type VisionCloudFallbackOptions, type VisionFallbackReason, type WrappedImageDescriptionHandler, wrapImageDescriptionHandlerWithCloudFallback, } from "./cloud-fallback";
34
+ export { hashImageBytes, hashRawPixels, hashVisionInput, resolveImageBytes, } from "./hash";
35
+ export { createLlamaServerVisionBackend, type LlamaServerVisionBackendOptions, } from "./llama-server";
36
+ export type { VisionDescribeBackend, VisionDescribeBackendLoader, VisionDescribeBackendOptions, VisionDescribeLoadArgs, VisionDescribeRequest, VisionDescribeResult, VisionImageChannelOrder, VisionImageInput, } from "./types";
37
+ export { type VisionVastFallbackOptions, wrapImageDescriptionHandlerWithVastFallback, } from "./vast-fallback";
38
+ import type { CapabilityRegistration } from "../memory-arbiter";
39
+ import type { VisionDescribeBackend, VisionDescribeBackendLoader, VisionDescribeRequest, VisionDescribeResult } from "./types";
40
+ /**
41
+ * Minimal arbiter shape we need from the cache. Lets tests inject a
42
+ * fake cache without pulling in the whole MemoryArbiter.
43
+ */
44
+ export interface VisionEmbeddingCacheLike {
45
+ getCachedVisionEmbedding(hash: string): {
46
+ tokens: Float32Array;
47
+ tokenCount: number;
48
+ hiddenSize: number;
49
+ live?: boolean;
50
+ } | null;
51
+ setCachedVisionEmbedding(hash: string, entry: {
52
+ tokens: Float32Array;
53
+ tokenCount: number;
54
+ hiddenSize: number;
55
+ }, ttlMs?: number): void;
56
+ }
57
+ export interface CreateVisionCapabilityRegistrationOptions {
58
+ /**
59
+ * The arbiter (or any object with the cache passthroughs). When
60
+ * provided the wrapper performs hash → cache lookup before calling
61
+ * the backend's `describe`.
62
+ */
63
+ arbiterCache?: VisionEmbeddingCacheLike;
64
+ loader: VisionDescribeBackendLoader;
65
+ /** Default model family for the cache key. Defaults to `qwen3-vl`. */
66
+ modelFamily?: string;
67
+ estimatedMb?: number;
68
+ }
69
+ /**
70
+ * Build a `CapabilityRegistration` ready to feed to
71
+ * `arbiter.registerCapability()`. The wrapper plumbs the cache hint
72
+ * into the backend's describe call so backends that support
73
+ * pre-projected tokens skip the projector.
74
+ */
75
+ export declare function createVisionCapabilityRegistration(opts: CreateVisionCapabilityRegistrationOptions): CapabilityRegistration<VisionDescribeBackend, VisionDescribeRequest, VisionDescribeResult>;
76
+ import type { IAgentRuntime, ImageDescriptionParams, ImageDescriptionResult } from "@elizaos/core";
77
+ import { type LocalImageDescriptionHandler, type VisionCloudFallbackOptions } from "./cloud-fallback";
78
+ import { type VisionVastFallbackOptions } from "./vast-fallback";
79
+ /**
80
+ * Compose the full local → cloud → vast IMAGE_DESCRIPTION chain and
81
+ * terminate it as a runtime-shaped `ImageDescriptionHandler`. When all
82
+ * three paths return `{ kind: "fallback" }`, the terminator throws the
83
+ * underlying cause (or a structured upstream-fail message) so the runtime
84
+ * surfaces the failure cleanly rather than serving a sentinel result.
85
+ *
86
+ * This is the single entry point `ensure-local-inference-handler.ts`
87
+ * uses at the IMAGE_DESCRIPTION model registration site. Tests
88
+ * exercise the composition via the individual `wrap*` helpers; this
89
+ * function is the production wiring.
90
+ */
91
+ export declare function withVisionFallbackChain(local: LocalImageDescriptionHandler, options?: {
92
+ cloud?: VisionCloudFallbackOptions;
93
+ vast?: VisionVastFallbackOptions;
94
+ }): (runtime: IAgentRuntime, params: ImageDescriptionParams | string) => Promise<ImageDescriptionResult>;
95
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAEH,OAAO,EACN,KAAK,oBAAoB,EACzB,KAAK,cAAc,EACnB,KAAK,4BAA4B,EACjC,qBAAqB,GACrB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EACN,KAAK,yBAAyB,EAC9B,KAAK,wBAAwB,EAC7B,KAAK,kCAAkC,EACvC,+BAA+B,EAC/B,6BAA6B,EAC7B,KAAK,iBAAiB,GACtB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACN,wBAAwB,EACxB,KAAK,4BAA4B,EACjC,KAAK,kBAAkB,EACvB,KAAK,0BAA0B,EAC/B,KAAK,oBAAoB,EACzB,KAAK,8BAA8B,EACnC,4CAA4C,GAC5C,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACN,cAAc,EACd,aAAa,EACb,eAAe,EACf,iBAAiB,GACjB,MAAM,QAAQ,CAAC;AAChB,OAAO,EACN,8BAA8B,EAC9B,KAAK,+BAA+B,GACpC,MAAM,gBAAgB,CAAC;AACxB,YAAY,EACX,qBAAqB,EACrB,2BAA2B,EAC3B,4BAA4B,EAC5B,sBAAsB,EACtB,qBAAqB,EACrB,oBAAoB,EACpB,uBAAuB,EACvB,gBAAgB,GAChB,MAAM,SAAS,CAAC;AACjB,OAAO,EACN,KAAK,yBAAyB,EAC9B,2CAA2C,GAC3C,MAAM,iBAAiB,CAAC;AAEzB,OAAO,KAAK,EAEX,sBAAsB,EACtB,MAAM,mBAAmB,CAAC;AAE3B,OAAO,KAAK,EACX,qBAAqB,EACrB,2BAA2B,EAC3B,qBAAqB,EACrB,oBAAoB,EACpB,MAAM,SAAS,CAAC;AAEjB;;;GAGG;AACH,MAAM,WAAW,wBAAwB;IACxC,wBAAwB,CAAC,IAAI,EAAE,MAAM,GAAG;QACvC,MAAM,EAAE,YAAY,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,UAAU,EAAE,MAAM,CAAC;QACnB,IAAI,CAAC,EAAE,OAAO,CAAC;KACf,GAAG,IAAI,CAAC;IACT,wBAAwB,CACvB,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE;QACN,MAAM,EAAE,YAAY,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,UAAU,EAAE,MAAM,CAAC;KACnB,EACD,KAAK,CAAC,EAAE,MAAM,GACZ,IAAI,CAAC;CACR;AAED,MAAM,WAAW,yCAAyC;IACzD;;;;OAIG;IACH,YAAY,CAAC,EAAE,wBAAwB,CAAC;IACxC,MAAM,EAAE,2BAA2B,CAAC;IACpC,sEAAsE;IACtE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;;;GAKG;AACH,wBAAgB,kCAAkC,CACjD,IAAI,EAAE,yCAAyC,GAC7C,sBAAsB,CACxB,qBAAqB,EACrB,qBAAqB,EACrB,oBAAoB,CACpB,CAkDA;AAED,OAAO,KAAK,EACX,aAAa,EACb,sBAAsB,EACtB,sBAAsB,EACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EACN,KAAK,4BAA4B,EACjC,KAAK,0BAA0B,EAE/B,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACN,KAAK,yBAAyB,EAE9B,MAAM,iBAAiB,CAAC;AAEzB;;;;;;;;;;;GAWG;AACH,wBAAgB,uBAAuB,CACtC,KAAK,EAAE,4BAA4B,EACnC,OAAO,GAAE;IACR,KAAK,CAAC,EAAE,0BAA0B,CAAC;IACnC,IAAI,CAAC,EAAE,yBAAyB,CAAC;CAC5B,GACJ,CACF,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,sBAAsB,GAAG,MAAM,KACnC,OAAO,CAAC,sBAAsB,CAAC,CAwBnC"}
@@ -0,0 +1,251 @@
1
+ /**
2
+ * Vision-describe capability (WS2) — public entry point.
3
+ *
4
+ * This module is what plugin-vision (WS4), the IMAGE_DESCRIPTION
5
+ * handler in `provider.ts`, and computer-use (WS9) import to register
6
+ * vision capability with the WS1 MemoryArbiter.
7
+ *
8
+ * Wiring:
9
+ *
10
+ * const arbiter = service.getMemoryArbiter();
11
+ * const registration = createVisionCapabilityRegistration({
12
+ * loader: createDefaultVisionLoader({ ... }),
13
+ * arbiterCache: arbiter,
14
+ * });
15
+ * arbiter.registerCapability(registration);
16
+ *
17
+ * `createVisionCapabilityRegistration` wraps the underlying backend so
18
+ * the arbiter's `run(request)` path:
19
+ *
20
+ * 1. Hashes the request's image bytes (model-family-scoped).
21
+ * 2. Checks the arbiter's vision-embedding cache.
22
+ * 3. On miss: calls `backend.describe(request)`, lets the backend
23
+ * run its own projector + decoder. Backends that cannot expose projected
24
+ * tokens return decoder text only, so the cache stays empty for this hash.
25
+ * The decoder text is what the caller wanted anyway.
26
+ * 4. On hit: calls `backend.describe(request, { projectedTokens })`.
27
+ * Backends that support pre-projected token reuse skip the
28
+ * projector entirely. Backends that don't ignore the hint; the
29
+ * result is still correct but the projector cost is paid again.
30
+ */
31
+
32
+ export {
33
+ type AospLlamaMtmdBinding,
34
+ type AospMtmdHandle,
35
+ type LoadAospVisionBackendOptions,
36
+ loadAospVisionBackend,
37
+ } from "./aosp-unavailable";
38
+ export {
39
+ type CapacitorLlamaMtmdBinding,
40
+ type CapacitorLlamaMtmdHandle,
41
+ type CapacitorLlamaVisionBackendOptions,
42
+ loadCapacitorLlamaVisionBackend,
43
+ VisionBackendUnavailableError,
44
+ type VisionManagerLike,
45
+ } from "./capacitor-llama";
46
+ export {
47
+ classifyLocalVisionError,
48
+ type LocalImageDescriptionHandler,
49
+ type LocalVisionOutcome,
50
+ type VisionCloudFallbackOptions,
51
+ type VisionFallbackReason,
52
+ type WrappedImageDescriptionHandler,
53
+ wrapImageDescriptionHandlerWithCloudFallback,
54
+ } from "./cloud-fallback";
55
+ export {
56
+ hashImageBytes,
57
+ hashRawPixels,
58
+ hashVisionInput,
59
+ resolveImageBytes,
60
+ } from "./hash";
61
+ export {
62
+ createLlamaServerVisionBackend,
63
+ type LlamaServerVisionBackendOptions,
64
+ } from "./llama-server";
65
+ export type {
66
+ VisionDescribeBackend,
67
+ VisionDescribeBackendLoader,
68
+ VisionDescribeBackendOptions,
69
+ VisionDescribeLoadArgs,
70
+ VisionDescribeRequest,
71
+ VisionDescribeResult,
72
+ VisionImageChannelOrder,
73
+ VisionImageInput,
74
+ } from "./types";
75
+ export {
76
+ type VisionVastFallbackOptions,
77
+ wrapImageDescriptionHandlerWithVastFallback,
78
+ } from "./vast-fallback";
79
+
80
+ import type {
81
+ ArbiterCapability,
82
+ CapabilityRegistration,
83
+ } from "../memory-arbiter";
84
+ import { hashVisionInput } from "./hash";
85
+ import type {
86
+ VisionDescribeBackend,
87
+ VisionDescribeBackendLoader,
88
+ VisionDescribeRequest,
89
+ VisionDescribeResult,
90
+ } from "./types";
91
+
92
+ /**
93
+ * Minimal arbiter shape we need from the cache. Lets tests inject a
94
+ * fake cache without pulling in the whole MemoryArbiter.
95
+ */
96
+ export interface VisionEmbeddingCacheLike {
97
+ getCachedVisionEmbedding(hash: string): {
98
+ tokens: Float32Array;
99
+ tokenCount: number;
100
+ hiddenSize: number;
101
+ live?: boolean;
102
+ } | null;
103
+ setCachedVisionEmbedding(
104
+ hash: string,
105
+ entry: {
106
+ tokens: Float32Array;
107
+ tokenCount: number;
108
+ hiddenSize: number;
109
+ },
110
+ ttlMs?: number,
111
+ ): void;
112
+ }
113
+
114
+ export interface CreateVisionCapabilityRegistrationOptions {
115
+ /**
116
+ * The arbiter (or any object with the cache passthroughs). When
117
+ * provided the wrapper performs hash → cache lookup before calling
118
+ * the backend's `describe`.
119
+ */
120
+ arbiterCache?: VisionEmbeddingCacheLike;
121
+ loader: VisionDescribeBackendLoader;
122
+ /** Default model family for the cache key. Defaults to `qwen3-vl`. */
123
+ modelFamily?: string;
124
+ estimatedMb?: number;
125
+ }
126
+
127
+ /**
128
+ * Build a `CapabilityRegistration` ready to feed to
129
+ * `arbiter.registerCapability()`. The wrapper plumbs the cache hint
130
+ * into the backend's describe call so backends that support
131
+ * pre-projected tokens skip the projector.
132
+ */
133
+ export function createVisionCapabilityRegistration(
134
+ opts: CreateVisionCapabilityRegistrationOptions,
135
+ ): CapabilityRegistration<
136
+ VisionDescribeBackend,
137
+ VisionDescribeRequest,
138
+ VisionDescribeResult
139
+ > {
140
+ const capability: ArbiterCapability = "vision-describe";
141
+ const family = opts.modelFamily ?? "qwen3-vl";
142
+ const cache = opts.arbiterCache;
143
+ const loader = opts.loader;
144
+ return {
145
+ capability,
146
+ residentRole: "vision",
147
+ estimatedMb: opts.estimatedMb ?? 600,
148
+ async load(modelKey) {
149
+ return await loader(modelKey);
150
+ },
151
+ async unload(backend) {
152
+ await backend.dispose();
153
+ },
154
+ async run(backend, request) {
155
+ const effectiveFamily = request.modelFamily ?? family;
156
+ const cached = (() => {
157
+ if (!cache) return null;
158
+ if (request.image.kind === "url") {
159
+ // URL inputs can't be hashed without first fetching; skip
160
+ // the cache lookup rather than paying the fetch cost twice.
161
+ return null;
162
+ }
163
+ try {
164
+ const hash = hashVisionInput(request.image, effectiveFamily);
165
+ const hit = cache.getCachedVisionEmbedding(hash);
166
+ if (hit && hit.live !== false) return { hash, hit };
167
+ } catch {
168
+ // Hashing failed (malformed data URL etc.); proceed without
169
+ // cache rather than failing the request.
170
+ }
171
+ return null;
172
+ })();
173
+ const projected = cached?.hit
174
+ ? {
175
+ tokens: cached.hit.tokens,
176
+ tokenCount: cached.hit.tokenCount,
177
+ hiddenSize: cached.hit.hiddenSize,
178
+ }
179
+ : undefined;
180
+ const result = await backend.describe(request, {
181
+ projectedTokens: projected,
182
+ });
183
+ return {
184
+ ...result,
185
+ cacheHit: Boolean(projected),
186
+ };
187
+ },
188
+ };
189
+ }
190
+
191
+ import type {
192
+ IAgentRuntime,
193
+ ImageDescriptionParams,
194
+ ImageDescriptionResult,
195
+ } from "@elizaos/core";
196
+ import {
197
+ type LocalImageDescriptionHandler,
198
+ type VisionCloudFallbackOptions,
199
+ wrapImageDescriptionHandlerWithCloudFallback,
200
+ } from "./cloud-fallback";
201
+ import {
202
+ type VisionVastFallbackOptions,
203
+ wrapImageDescriptionHandlerWithVastFallback,
204
+ } from "./vast-fallback";
205
+
206
+ /**
207
+ * Compose the full local → cloud → vast IMAGE_DESCRIPTION chain and
208
+ * terminate it as a runtime-shaped `ImageDescriptionHandler`. When all
209
+ * three paths return `{ kind: "fallback" }`, the terminator throws the
210
+ * underlying cause (or a structured upstream-fail message) so the runtime
211
+ * surfaces the failure cleanly rather than serving a sentinel result.
212
+ *
213
+ * This is the single entry point `ensure-local-inference-handler.ts`
214
+ * uses at the IMAGE_DESCRIPTION model registration site. Tests
215
+ * exercise the composition via the individual `wrap*` helpers; this
216
+ * function is the production wiring.
217
+ */
218
+ export function withVisionFallbackChain(
219
+ local: LocalImageDescriptionHandler,
220
+ options: {
221
+ cloud?: VisionCloudFallbackOptions;
222
+ vast?: VisionVastFallbackOptions;
223
+ } = {},
224
+ ): (
225
+ runtime: IAgentRuntime,
226
+ params: ImageDescriptionParams | string,
227
+ ) => Promise<ImageDescriptionResult> {
228
+ const wrapped = wrapImageDescriptionHandlerWithVastFallback(
229
+ wrapImageDescriptionHandlerWithCloudFallback(local, options.cloud),
230
+ options.vast,
231
+ );
232
+ return async (_runtime, params) => {
233
+ const outcome = await wrapped(params);
234
+ if (
235
+ outcome &&
236
+ typeof outcome === "object" &&
237
+ "kind" in outcome &&
238
+ outcome.kind === "fallback"
239
+ ) {
240
+ const causeMsg = outcome.cause?.message ?? outcome.reason;
241
+ const err = new Error(
242
+ `[VisionFallback] all IMAGE_DESCRIPTION providers exhausted (reason=${outcome.reason}): ${causeMsg}`,
243
+ );
244
+ if (outcome.cause) {
245
+ (err as Error & { cause?: unknown }).cause = outcome.cause;
246
+ }
247
+ throw err;
248
+ }
249
+ return outcome as ImageDescriptionResult;
250
+ };
251
+ }
@@ -0,0 +1,73 @@
1
+ /**
2
+ * llama-server vision-describe backend (WS2).
3
+ *
4
+ * Wraps the out-of-process llama-server's `/completion` endpoint with
5
+ * the `image_data` array (base64-encoded payloads) and shapes the
6
+ * response to the WS2 `VisionDescribeBackend` contract.
7
+ *
8
+ * llama-server image-data API recap (verified against llama.cpp commit
9
+ * b8198+, May 2026):
10
+ *
11
+ * POST /completion
12
+ * { "prompt": "<...>USER: [img-12] What's in this image?\nASSISTANT:",
13
+ * "image_data": [
14
+ * { "data": "<base64 png/jpeg>", "id": 12 }
15
+ * ],
16
+ * "n_predict": 256,
17
+ * "temperature": 0.2,
18
+ * "stream": false }
19
+ *
20
+ * Response:
21
+ * { "content": "A photo of a cat.", "stop": true,
22
+ * "timings": { "prompt_ms": 180.4, "predicted_ms": 423.1 } }
23
+ *
24
+ * Server-side mmproj is loaded via the `--mmproj <path>` flag on
25
+ * llama-server startup. The FFI runtime wrapper passes this flag
26
+ * already for tiers with vision enabled; this backend assumes the
27
+ * server has been started with the right mmproj for the active model.
28
+ *
29
+ * Backend responsibility:
30
+ * - Encode the image as base64 (when not already).
31
+ * - Build the prompt with the `[img-N]` placeholder convention.
32
+ * - POST to `/completion`, parse the text + timings.
33
+ * - Honour AbortSignal by passing it through to the fetch call.
34
+ *
35
+ * Backend explicitly does NOT:
36
+ * - Start / stop the server. That's the FFI runtime wrapper's job.
37
+ * - Resolve the mmproj path — the server already has it. The arbiter's
38
+ * `--mmproj` was set when the text model loaded.
39
+ * - Implement projector-token reuse. llama-server has no API to
40
+ * accept pre-projected tokens; if the WS1 cache hit happens, this
41
+ * backend ignores the hint and re-runs the projector. The cache
42
+ * is more useful with the in-process node-llama-cpp backend.
43
+ *
44
+ * Metal / CUDA validation:
45
+ * The llama-server build embeds the same mtmd_encode path the
46
+ * in-process binding will eventually expose. On a Metal build the
47
+ * image encode dispatches through the Metal compute encoder; on a
48
+ * CUDA build through cuBLAS. We have no GPU on this host — see the
49
+ * `__tests__/vision-describe.test.ts` notes for the GPU smoke check.
50
+ */
51
+ import type { VisionDescribeBackend } from "./types";
52
+ export interface LlamaServerVisionBackendOptions {
53
+ /**
54
+ * Base URL of the llama-server. The FFI runtime wrapper exposes
55
+ * this via `currentBaseUrl()`; pass the resolved URL here at load
56
+ * time. The backend keeps it as-is across calls.
57
+ */
58
+ baseUrl: string;
59
+ /**
60
+ * Optional fetch override. Tests inject a fake fetch; production
61
+ * uses global fetch. The signature mirrors `fetch` so the test
62
+ * surface is the same as the real one.
63
+ */
64
+ fetch?: typeof fetch;
65
+ /**
66
+ * Default `n_predict` budget when the caller doesn't specify
67
+ * `maxTokens`. 256 matches the description-length budget the
68
+ * Florence-2 / VisionManager path uses today.
69
+ */
70
+ defaultMaxTokens?: number;
71
+ }
72
+ export declare function createLlamaServerVisionBackend(opts: LlamaServerVisionBackendOptions): VisionDescribeBackend;
73
+ //# sourceMappingURL=llama-server.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llama-server.d.ts","sourceRoot":"","sources":["llama-server.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiDG;AAGH,OAAO,KAAK,EACX,qBAAqB,EAGrB,MAAM,SAAS,CAAC;AAEjB,MAAM,WAAW,+BAA+B;IAC/C;;;;OAIG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,KAAK,CAAC,EAAE,OAAO,KAAK,CAAC;IACrB;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,wBAAgB,8BAA8B,CAC7C,IAAI,EAAE,+BAA+B,GACnC,qBAAqB,CAsEvB"}