@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (701) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +83 -0
  3. package/package.json +82 -15
  4. package/src/actions/generate-media.d.ts +59 -0
  5. package/src/actions/generate-media.d.ts.map +1 -0
  6. package/src/actions/generate-media.ts +647 -0
  7. package/src/actions/identify-speaker.d.ts +23 -0
  8. package/src/actions/identify-speaker.d.ts.map +1 -0
  9. package/src/actions/identify-speaker.ts +171 -0
  10. package/src/actions/transcription-control.d.ts +29 -0
  11. package/src/actions/transcription-control.d.ts.map +1 -0
  12. package/src/actions/transcription-control.test.ts +100 -0
  13. package/src/actions/transcription-control.ts +127 -0
  14. package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
  15. package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
  16. package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
  17. package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
  18. package/src/adapters/capacitor-llama/environment.ts +71 -0
  19. package/src/adapters/capacitor-llama/index.browser.ts +83 -0
  20. package/src/adapters/capacitor-llama/index.ts +807 -0
  21. package/src/adapters/capacitor-llama/loader.ts +109 -0
  22. package/src/adapters/capacitor-llama/structured-output.ts +165 -0
  23. package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
  24. package/src/adapters/capacitor-llama/types.ts +374 -0
  25. package/src/backends/apple-foundation.ts +127 -0
  26. package/src/index.d.ts +8 -0
  27. package/src/index.d.ts.map +1 -0
  28. package/src/index.ts +62 -0
  29. package/src/local-inference-routes.d.ts +38 -0
  30. package/src/local-inference-routes.d.ts.map +1 -0
  31. package/src/local-inference-routes.test.ts +344 -0
  32. package/src/local-inference-routes.ts +1543 -0
  33. package/src/provider.d.ts +21 -0
  34. package/src/provider.d.ts.map +1 -0
  35. package/src/provider.ts +1082 -0
  36. package/src/routes/compat-helpers.d.ts +18 -0
  37. package/src/routes/compat-helpers.d.ts.map +1 -0
  38. package/src/routes/compat-helpers.ts +274 -0
  39. package/src/routes/family-member-route.d.ts +62 -0
  40. package/src/routes/family-member-route.d.ts.map +1 -0
  41. package/src/routes/family-member-route.ts +353 -0
  42. package/src/routes/index.d.ts +19 -0
  43. package/src/routes/index.d.ts.map +1 -0
  44. package/src/routes/index.ts +60 -0
  45. package/src/routes/live-diarization-route.d.ts +26 -0
  46. package/src/routes/live-diarization-route.d.ts.map +1 -0
  47. package/src/routes/live-diarization-route.test.ts +213 -0
  48. package/src/routes/live-diarization-route.ts +122 -0
  49. package/src/routes/local-inference-asr-route.d.ts +4 -0
  50. package/src/routes/local-inference-asr-route.d.ts.map +1 -0
  51. package/src/routes/local-inference-asr-route.test.ts +205 -0
  52. package/src/routes/local-inference-asr-route.ts +163 -0
  53. package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
  54. package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
  55. package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
  56. package/src/routes/local-inference-asr-transcribe.ts +97 -0
  57. package/src/routes/local-inference-compat-routes.d.ts +16 -0
  58. package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
  59. package/src/routes/local-inference-compat-routes.test.ts +485 -0
  60. package/src/routes/local-inference-compat-routes.ts +808 -0
  61. package/src/routes/local-inference-tts-route.d.ts +7 -0
  62. package/src/routes/local-inference-tts-route.d.ts.map +1 -0
  63. package/src/routes/local-inference-tts-route.test.ts +179 -0
  64. package/src/routes/local-inference-tts-route.ts +230 -0
  65. package/src/routes/transcript-audio-store.d.ts +15 -0
  66. package/src/routes/transcript-audio-store.d.ts.map +1 -0
  67. package/src/routes/transcript-audio-store.ts +27 -0
  68. package/src/routes/transcripts-routes.d.ts +36 -0
  69. package/src/routes/transcripts-routes.d.ts.map +1 -0
  70. package/src/routes/transcripts-routes.test.ts +144 -0
  71. package/src/routes/transcripts-routes.ts +159 -0
  72. package/src/routes/voice-first-run-routes.d.ts +62 -0
  73. package/src/routes/voice-first-run-routes.d.ts.map +1 -0
  74. package/src/routes/voice-first-run-routes.ts +524 -0
  75. package/src/routes/voice-models-routes.d.ts +62 -0
  76. package/src/routes/voice-models-routes.d.ts.map +1 -0
  77. package/src/routes/voice-models-routes.ts +554 -0
  78. package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
  79. package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
  80. package/src/routes/voice-profile-plugin-routes.ts +138 -0
  81. package/src/routes/voice-profiles-management-routes.d.ts +52 -0
  82. package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
  83. package/src/routes/voice-profiles-management-routes.ts +476 -0
  84. package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
  85. package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
  86. package/src/routes/voice-speaker-profile-routes.ts +199 -0
  87. package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
  88. package/src/runtime/capacitor-llama.d.ts +25 -0
  89. package/src/runtime/embedding-manager-support.d.ts +77 -0
  90. package/src/runtime/embedding-manager-support.d.ts.map +1 -0
  91. package/src/runtime/embedding-manager-support.ts +497 -0
  92. package/src/runtime/embedding-presets.d.ts +16 -0
  93. package/src/runtime/embedding-presets.d.ts.map +1 -0
  94. package/src/runtime/embedding-presets.ts +81 -0
  95. package/src/runtime/embedding-warmup-policy.d.ts +14 -0
  96. package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
  97. package/src/runtime/embedding-warmup-policy.test.ts +53 -0
  98. package/src/runtime/embedding-warmup-policy.ts +48 -0
  99. package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
  100. package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
  101. package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
  102. package/src/runtime/ensure-local-inference-handler.ts +1448 -0
  103. package/src/runtime/index.d.ts +15 -0
  104. package/src/runtime/index.d.ts.map +1 -0
  105. package/src/runtime/index.ts +33 -0
  106. package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
  107. package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
  108. package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
  109. package/src/runtime/mobile-local-inference-gate.ts +44 -0
  110. package/src/runtime/voice-entity-binding.d.ts +103 -0
  111. package/src/runtime/voice-entity-binding.d.ts.map +1 -0
  112. package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
  113. package/src/runtime/voice-entity-binding.ts +328 -0
  114. package/src/services/README.md +71 -0
  115. package/src/services/__tests__/backend-selector.test.ts +101 -0
  116. package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
  117. package/src/services/__tests__/gpu-autotune.test.ts +400 -0
  118. package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
  119. package/src/services/__tests__/planner-grammar.test.ts +372 -0
  120. package/src/services/__tests__/runtime-target.test.ts +176 -0
  121. package/src/services/active-model-switch-rollback.test.ts +183 -0
  122. package/src/services/active-model.d.ts +282 -0
  123. package/src/services/active-model.d.ts.map +1 -0
  124. package/src/services/active-model.ts +1213 -0
  125. package/src/services/assignments.d.ts +71 -0
  126. package/src/services/assignments.d.ts.map +1 -0
  127. package/src/services/assignments.test.ts +80 -0
  128. package/src/services/assignments.ts +230 -0
  129. package/src/services/backend-selector.ts +95 -0
  130. package/src/services/backend.d.ts +346 -0
  131. package/src/services/backend.d.ts.map +1 -0
  132. package/src/services/backend.ts +612 -0
  133. package/src/services/bionic-host-loader.d.ts +46 -0
  134. package/src/services/bionic-host-loader.d.ts.map +1 -0
  135. package/src/services/bionic-host-loader.test.ts +133 -0
  136. package/src/services/bionic-host-loader.ts +180 -0
  137. package/src/services/bundled-models.d.ts +34 -0
  138. package/src/services/bundled-models.d.ts.map +1 -0
  139. package/src/services/bundled-models.ts +129 -0
  140. package/src/services/cache-bridge.d.ts +206 -0
  141. package/src/services/cache-bridge.d.ts.map +1 -0
  142. package/src/services/cache-bridge.test.ts +516 -0
  143. package/src/services/cache-bridge.ts +423 -0
  144. package/src/services/catalog.d.ts +10 -0
  145. package/src/services/catalog.d.ts.map +1 -0
  146. package/src/services/catalog.test.ts +238 -0
  147. package/src/services/catalog.ts +27 -0
  148. package/src/services/checkpoint-client.d.ts +109 -0
  149. package/src/services/checkpoint-client.d.ts.map +1 -0
  150. package/src/services/checkpoint-client.ts +258 -0
  151. package/src/services/checkpoint-manager.ts +474 -0
  152. package/src/services/cloud-fallback.d.ts +102 -0
  153. package/src/services/cloud-fallback.d.ts.map +1 -0
  154. package/src/services/cloud-fallback.ts +230 -0
  155. package/src/services/conversation-registry.d.ts +142 -0
  156. package/src/services/conversation-registry.d.ts.map +1 -0
  157. package/src/services/conversation-registry.test.ts +235 -0
  158. package/src/services/conversation-registry.ts +264 -0
  159. package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
  160. package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
  161. package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
  162. package/src/services/device-bridge.d.ts +188 -0
  163. package/src/services/device-bridge.d.ts.map +1 -0
  164. package/src/services/device-bridge.ts +1237 -0
  165. package/src/services/device-resource-metrics.d.ts +149 -0
  166. package/src/services/device-resource-metrics.d.ts.map +1 -0
  167. package/src/services/device-resource-metrics.test.ts +98 -0
  168. package/src/services/device-resource-metrics.ts +346 -0
  169. package/src/services/device-tier.d.ts +115 -0
  170. package/src/services/device-tier.d.ts.map +1 -0
  171. package/src/services/device-tier.test.ts +371 -0
  172. package/src/services/device-tier.ts +410 -0
  173. package/src/services/downloader.d.ts +82 -0
  174. package/src/services/downloader.d.ts.map +1 -0
  175. package/src/services/downloader.test.ts +747 -0
  176. package/src/services/downloader.ts +925 -0
  177. package/src/services/engine-direct-bundle.test.ts +58 -0
  178. package/src/services/engine-streaming.test.ts +80 -0
  179. package/src/services/engine.d.ts +540 -0
  180. package/src/services/engine.d.ts.map +1 -0
  181. package/src/services/engine.ts +1909 -0
  182. package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
  183. package/src/services/ensure-local-artifacts.test.ts +368 -0
  184. package/src/services/ensure-local-artifacts.ts +351 -0
  185. package/src/services/external-scanner.d.ts +17 -0
  186. package/src/services/external-scanner.d.ts.map +1 -0
  187. package/src/services/external-scanner.ts +312 -0
  188. package/src/services/ffi-llm-mock.ts +354 -0
  189. package/src/services/ffi-llm-streaming-abi.ts +442 -0
  190. package/src/services/ffi-streaming-backend.d.ts +180 -0
  191. package/src/services/ffi-streaming-backend.d.ts.map +1 -0
  192. package/src/services/ffi-streaming-backend.ts +382 -0
  193. package/src/services/ffi-streaming-runner.d.ts +122 -0
  194. package/src/services/ffi-streaming-runner.d.ts.map +1 -0
  195. package/src/services/ffi-streaming-runner.test.ts +60 -0
  196. package/src/services/ffi-streaming-runner.ts +354 -0
  197. package/src/services/ffi-unload-ordering.test.ts +162 -0
  198. package/src/services/gpu-autotune.ts +534 -0
  199. package/src/services/gpu-detect.d.ts +56 -0
  200. package/src/services/gpu-detect.d.ts.map +1 -0
  201. package/src/services/gpu-detect.ts +139 -0
  202. package/src/services/handler-registry.d.ts +72 -0
  203. package/src/services/handler-registry.d.ts.map +1 -0
  204. package/src/services/handler-registry.ts +240 -0
  205. package/src/services/hardware.d.ts +63 -0
  206. package/src/services/hardware.d.ts.map +1 -0
  207. package/src/services/hardware.test.ts +231 -0
  208. package/src/services/hardware.ts +410 -0
  209. package/src/services/hf-search.d.ts +26 -0
  210. package/src/services/hf-search.d.ts.map +1 -0
  211. package/src/services/hf-search.test.ts +69 -0
  212. package/src/services/hf-search.ts +420 -0
  213. package/src/services/image-description-runtime.d.ts +14 -0
  214. package/src/services/image-description-runtime.d.ts.map +1 -0
  215. package/src/services/image-description-runtime.test.ts +61 -0
  216. package/src/services/image-description-runtime.ts +118 -0
  217. package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
  218. package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
  219. package/src/services/imagegen/aosp-unavailable.ts +229 -0
  220. package/src/services/imagegen/backend-selector.d.ts +118 -0
  221. package/src/services/imagegen/backend-selector.d.ts.map +1 -0
  222. package/src/services/imagegen/backend-selector.ts +277 -0
  223. package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
  224. package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
  225. package/src/services/imagegen/coreml-unavailable.ts +237 -0
  226. package/src/services/imagegen/errors.d.ts +16 -0
  227. package/src/services/imagegen/errors.d.ts.map +1 -0
  228. package/src/services/imagegen/errors.ts +40 -0
  229. package/src/services/imagegen/index.d.ts +58 -0
  230. package/src/services/imagegen/index.d.ts.map +1 -0
  231. package/src/services/imagegen/index.ts +144 -0
  232. package/src/services/imagegen/mflux.d.ts +74 -0
  233. package/src/services/imagegen/mflux.d.ts.map +1 -0
  234. package/src/services/imagegen/mflux.ts +313 -0
  235. package/src/services/imagegen/sd-cpp.d.ts +180 -0
  236. package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
  237. package/src/services/imagegen/sd-cpp.ts +718 -0
  238. package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
  239. package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
  240. package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
  241. package/src/services/imagegen/types.d.ts +181 -0
  242. package/src/services/imagegen/types.d.ts.map +1 -0
  243. package/src/services/imagegen/types.ts +193 -0
  244. package/src/services/index.d.ts +29 -0
  245. package/src/services/index.d.ts.map +1 -0
  246. package/src/services/index.ts +211 -0
  247. package/src/services/inference-capabilities.d.ts +132 -0
  248. package/src/services/inference-capabilities.d.ts.map +1 -0
  249. package/src/services/inference-capabilities.test.ts +75 -0
  250. package/src/services/inference-capabilities.ts +204 -0
  251. package/src/services/inference-telemetry.d.ts +59 -0
  252. package/src/services/inference-telemetry.d.ts.map +1 -0
  253. package/src/services/inference-telemetry.ts +143 -0
  254. package/src/services/ios-llama-streaming.ts +248 -0
  255. package/src/services/kv-spill.d.ts +189 -0
  256. package/src/services/kv-spill.d.ts.map +1 -0
  257. package/src/services/kv-spill.test.ts +222 -0
  258. package/src/services/kv-spill.ts +356 -0
  259. package/src/services/latency-trace.d.ts +346 -0
  260. package/src/services/latency-trace.d.ts.map +1 -0
  261. package/src/services/latency-trace.test.ts +266 -0
  262. package/src/services/latency-trace.ts +844 -0
  263. package/src/services/llama-server-metrics.ts +304 -0
  264. package/src/services/llm-streaming-binding.d.ts +96 -0
  265. package/src/services/llm-streaming-binding.d.ts.map +1 -0
  266. package/src/services/llm-streaming-binding.ts +136 -0
  267. package/src/services/load-args.d.ts +82 -0
  268. package/src/services/load-args.d.ts.map +1 -0
  269. package/src/services/load-args.ts +81 -0
  270. package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
  271. package/src/services/manifest/index.d.ts +4 -0
  272. package/src/services/manifest/index.d.ts.map +1 -0
  273. package/src/services/manifest/index.ts +66 -0
  274. package/src/services/manifest/manifest.test.ts +689 -0
  275. package/src/services/manifest/schema.d.ts +713 -0
  276. package/src/services/manifest/schema.d.ts.map +1 -0
  277. package/src/services/manifest/schema.ts +653 -0
  278. package/src/services/manifest/types.d.ts +30 -0
  279. package/src/services/manifest/types.d.ts.map +1 -0
  280. package/src/services/manifest/types.ts +55 -0
  281. package/src/services/manifest/validator.d.ts +66 -0
  282. package/src/services/manifest/validator.d.ts.map +1 -0
  283. package/src/services/manifest/validator.ts +567 -0
  284. package/src/services/memory-arbiter.d.ts +318 -0
  285. package/src/services/memory-arbiter.d.ts.map +1 -0
  286. package/src/services/memory-arbiter.test.ts +419 -0
  287. package/src/services/memory-arbiter.ts +925 -0
  288. package/src/services/memory-monitor.d.ts +122 -0
  289. package/src/services/memory-monitor.d.ts.map +1 -0
  290. package/src/services/memory-monitor.test.ts +208 -0
  291. package/src/services/memory-monitor.ts +297 -0
  292. package/src/services/memory-pressure.d.ts +130 -0
  293. package/src/services/memory-pressure.d.ts.map +1 -0
  294. package/src/services/memory-pressure.ts +414 -0
  295. package/src/services/mtp-doctor.d.ts +13 -0
  296. package/src/services/mtp-doctor.d.ts.map +1 -0
  297. package/src/services/mtp-doctor.ts +78 -0
  298. package/src/services/network-policy.d.ts +127 -0
  299. package/src/services/network-policy.d.ts.map +1 -0
  300. package/src/services/network-policy.ts +346 -0
  301. package/src/services/paths.d.ts +6 -0
  302. package/src/services/paths.d.ts.map +1 -0
  303. package/src/services/paths.ts +25 -0
  304. package/src/services/planner-skeleton.d.ts +124 -0
  305. package/src/services/planner-skeleton.d.ts.map +1 -0
  306. package/src/services/planner-skeleton.ts +175 -0
  307. package/src/services/providers.d.ts +38 -0
  308. package/src/services/providers.d.ts.map +1 -0
  309. package/src/services/providers.ts +507 -0
  310. package/src/services/ram-budget-cache.test.ts +163 -0
  311. package/src/services/ram-budget.d.ts +110 -0
  312. package/src/services/ram-budget.d.ts.map +1 -0
  313. package/src/services/ram-budget.ts +0 -0
  314. package/src/services/readiness.d.ts +9 -0
  315. package/src/services/readiness.d.ts.map +1 -0
  316. package/src/services/readiness.test.ts +87 -0
  317. package/src/services/readiness.ts +238 -0
  318. package/src/services/recommendation.d.ts +111 -0
  319. package/src/services/recommendation.d.ts.map +1 -0
  320. package/src/services/recommendation.ts +671 -0
  321. package/src/services/registry.d.ts +35 -0
  322. package/src/services/registry.d.ts.map +1 -0
  323. package/src/services/registry.ts +151 -0
  324. package/src/services/router-handler.d.ts +92 -0
  325. package/src/services/router-handler.d.ts.map +1 -0
  326. package/src/services/router-handler.test.ts +45 -0
  327. package/src/services/router-handler.ts +407 -0
  328. package/src/services/routing-policy.d.ts +69 -0
  329. package/src/services/routing-policy.d.ts.map +1 -0
  330. package/src/services/routing-policy.test.ts +164 -0
  331. package/src/services/routing-policy.ts +297 -0
  332. package/src/services/routing-preferences.d.ts +8 -0
  333. package/src/services/routing-preferences.d.ts.map +1 -0
  334. package/src/services/routing-preferences.ts +17 -0
  335. package/src/services/runtime-target.d.ts +98 -0
  336. package/src/services/runtime-target.d.ts.map +1 -0
  337. package/src/services/runtime-target.ts +154 -0
  338. package/src/services/service.d.ts +128 -0
  339. package/src/services/service.d.ts.map +1 -0
  340. package/src/services/service.test.ts +223 -0
  341. package/src/services/service.ts +735 -0
  342. package/src/services/session-pool.d.ts +72 -0
  343. package/src/services/session-pool.d.ts.map +1 -0
  344. package/src/services/session-pool.ts +153 -0
  345. package/src/services/structured-output/deterministic-repair.d.ts +23 -0
  346. package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
  347. package/src/services/structured-output/deterministic-repair.test.ts +169 -0
  348. package/src/services/structured-output/deterministic-repair.ts +443 -0
  349. package/src/services/structured-output/index.ts +4 -0
  350. package/src/services/structured-output.d.ts +311 -0
  351. package/src/services/structured-output.d.ts.map +1 -0
  352. package/src/services/structured-output.test.ts +483 -0
  353. package/src/services/structured-output.ts +712 -0
  354. package/src/services/system-memory.d.ts +33 -0
  355. package/src/services/system-memory.d.ts.map +1 -0
  356. package/src/services/system-memory.test.ts +47 -0
  357. package/src/services/system-memory.ts +67 -0
  358. package/src/services/transcription-priority.test.ts +211 -0
  359. package/src/services/types.d.ts +19 -0
  360. package/src/services/types.d.ts.map +1 -0
  361. package/src/services/types.ts +55 -0
  362. package/src/services/verify-on-device.d.ts +34 -0
  363. package/src/services/verify-on-device.d.ts.map +1 -0
  364. package/src/services/verify-on-device.test.ts +87 -0
  365. package/src/services/verify-on-device.ts +127 -0
  366. package/src/services/verify.d.ts +8 -0
  367. package/src/services/verify.d.ts.map +1 -0
  368. package/src/services/verify.ts +13 -0
  369. package/src/services/vision/aosp-unavailable.d.ts +115 -0
  370. package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
  371. package/src/services/vision/aosp-unavailable.ts +163 -0
  372. package/src/services/vision/capacitor-llama.d.ts +99 -0
  373. package/src/services/vision/capacitor-llama.d.ts.map +1 -0
  374. package/src/services/vision/capacitor-llama.ts +255 -0
  375. package/src/services/vision/cloud-fallback.d.ts +47 -0
  376. package/src/services/vision/cloud-fallback.d.ts.map +1 -0
  377. package/src/services/vision/cloud-fallback.test.ts +243 -0
  378. package/src/services/vision/cloud-fallback.ts +268 -0
  379. package/src/services/vision/fallback-chain.test.ts +86 -0
  380. package/src/services/vision/hash.d.ts +71 -0
  381. package/src/services/vision/hash.d.ts.map +1 -0
  382. package/src/services/vision/hash.ts +157 -0
  383. package/src/services/vision/index.d.ts +95 -0
  384. package/src/services/vision/index.d.ts.map +1 -0
  385. package/src/services/vision/index.ts +251 -0
  386. package/src/services/vision/llama-server.d.ts +73 -0
  387. package/src/services/vision/llama-server.d.ts.map +1 -0
  388. package/src/services/vision/llama-server.ts +177 -0
  389. package/src/services/vision/types.d.ts +153 -0
  390. package/src/services/vision/types.d.ts.map +1 -0
  391. package/src/services/vision/types.ts +154 -0
  392. package/src/services/vision/vast-fallback.d.ts +18 -0
  393. package/src/services/vision/vast-fallback.d.ts.map +1 -0
  394. package/src/services/vision/vast-fallback.ts +127 -0
  395. package/src/services/vision-embedding-cache.d.ts +98 -0
  396. package/src/services/vision-embedding-cache.d.ts.map +1 -0
  397. package/src/services/vision-embedding-cache.ts +189 -0
  398. package/src/services/voice/VOICE_WORKBENCH.md +88 -0
  399. package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
  400. package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
  401. package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
  402. package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
  403. package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
  404. package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
  405. package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
  406. package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
  407. package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
  408. package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
  409. package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
  410. package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
  411. package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
  412. package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
  413. package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
  414. package/src/services/voice/asr-timed.real.test.ts +141 -0
  415. package/src/services/voice/audio-frame-consumer.d.ts +212 -0
  416. package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
  417. package/src/services/voice/audio-frame-consumer.test.ts +343 -0
  418. package/src/services/voice/audio-frame-consumer.ts +491 -0
  419. package/src/services/voice/barge-in.d.ts +112 -0
  420. package/src/services/voice/barge-in.d.ts.map +1 -0
  421. package/src/services/voice/barge-in.test.ts +244 -0
  422. package/src/services/voice/barge-in.ts +336 -0
  423. package/src/services/voice/cancellation-coordinator.d.ts +127 -0
  424. package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
  425. package/src/services/voice/cancellation-coordinator.test.ts +196 -0
  426. package/src/services/voice/cancellation-coordinator.ts +269 -0
  427. package/src/services/voice/checkpoint-manager.d.ts +199 -0
  428. package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
  429. package/src/services/voice/checkpoint-manager.ts +401 -0
  430. package/src/services/voice/checkpoint-policy.ts +336 -0
  431. package/src/services/voice/composite-eot-classifier.test.ts +59 -0
  432. package/src/services/voice/e2e-harness.test.ts +182 -0
  433. package/src/services/voice/e2e-harness.ts +743 -0
  434. package/src/services/voice/eager-context-builder.d.ts +170 -0
  435. package/src/services/voice/eager-context-builder.d.ts.map +1 -0
  436. package/src/services/voice/eager-context-builder.ts +262 -0
  437. package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
  438. package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
  439. package/src/services/voice/eliza1-eot-scorer.ts +242 -0
  440. package/src/services/voice/embedding-server.ts +200 -0
  441. package/src/services/voice/embedding.d.ts +133 -0
  442. package/src/services/voice/embedding.d.ts.map +1 -0
  443. package/src/services/voice/embedding.test.ts +131 -0
  444. package/src/services/voice/embedding.ts +243 -0
  445. package/src/services/voice/emotion-attribution.d.ts +68 -0
  446. package/src/services/voice/emotion-attribution.d.ts.map +1 -0
  447. package/src/services/voice/emotion-attribution.test.ts +129 -0
  448. package/src/services/voice/emotion-attribution.ts +361 -0
  449. package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
  450. package/src/services/voice/engine-bridge.d.ts +759 -0
  451. package/src/services/voice/engine-bridge.d.ts.map +1 -0
  452. package/src/services/voice/engine-bridge.test.ts +384 -0
  453. package/src/services/voice/engine-bridge.ts +2302 -0
  454. package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
  455. package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
  456. package/src/services/voice/eot-classifier-ggml.ts +566 -0
  457. package/src/services/voice/eot-classifier.d.ts +214 -0
  458. package/src/services/voice/eot-classifier.d.ts.map +1 -0
  459. package/src/services/voice/eot-classifier.ts +533 -0
  460. package/src/services/voice/errors.d.ts +20 -0
  461. package/src/services/voice/errors.d.ts.map +1 -0
  462. package/src/services/voice/errors.ts +32 -0
  463. package/src/services/voice/expressive-tags.d.ts +158 -0
  464. package/src/services/voice/expressive-tags.d.ts.map +1 -0
  465. package/src/services/voice/expressive-tags.ts +405 -0
  466. package/src/services/voice/ffi-bindings.d.ts +674 -0
  467. package/src/services/voice/ffi-bindings.d.ts.map +1 -0
  468. package/src/services/voice/ffi-bindings.test.ts +728 -0
  469. package/src/services/voice/ffi-bindings.ts +3225 -0
  470. package/src/services/voice/first-line-cache.d.ts +181 -0
  471. package/src/services/voice/first-line-cache.d.ts.map +1 -0
  472. package/src/services/voice/first-line-cache.ts +725 -0
  473. package/src/services/voice/fused-eot-scorer.d.ts +51 -0
  474. package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
  475. package/src/services/voice/fused-eot-scorer.ts +135 -0
  476. package/src/services/voice/index.d.ts +91 -0
  477. package/src/services/voice/index.d.ts.map +1 -0
  478. package/src/services/voice/index.ts +481 -0
  479. package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
  480. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
  481. package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
  482. package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
  483. package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
  484. package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
  485. package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
  486. package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
  487. package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
  488. package/src/services/voice/kokoro/index.ts +79 -0
  489. package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
  490. package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
  491. package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
  492. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
  493. package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
  494. package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
  495. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
  496. package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
  497. package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
  498. package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
  499. package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
  500. package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
  501. package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
  502. package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
  503. package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
  504. package/src/services/voice/kokoro/phonemizer.ts +344 -0
  505. package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
  506. package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
  507. package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
  508. package/src/services/voice/kokoro/pick-runtime.ts +130 -0
  509. package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
  510. package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
  511. package/src/services/voice/kokoro/runtime-selection.ts +237 -0
  512. package/src/services/voice/kokoro/types.d.ts +82 -0
  513. package/src/services/voice/kokoro/types.d.ts.map +1 -0
  514. package/src/services/voice/kokoro/types.ts +95 -0
  515. package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
  516. package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
  517. package/src/services/voice/kokoro/voice-presets.ts +129 -0
  518. package/src/services/voice/kokoro/voices.d.ts +30 -0
  519. package/src/services/voice/kokoro/voices.d.ts.map +1 -0
  520. package/src/services/voice/kokoro/voices.ts +64 -0
  521. package/src/services/voice/lifecycle.d.ts +135 -0
  522. package/src/services/voice/lifecycle.d.ts.map +1 -0
  523. package/src/services/voice/lifecycle.test.ts +315 -0
  524. package/src/services/voice/lifecycle.ts +301 -0
  525. package/src/services/voice/live-diarization-session.d.ts +96 -0
  526. package/src/services/voice/live-diarization-session.d.ts.map +1 -0
  527. package/src/services/voice/live-diarization-session.ts +289 -0
  528. package/src/services/voice/mic-source.d.ts +136 -0
  529. package/src/services/voice/mic-source.d.ts.map +1 -0
  530. package/src/services/voice/mic-source.test.ts +210 -0
  531. package/src/services/voice/mic-source.ts +503 -0
  532. package/src/services/voice/optimistic-policy.d.ts +109 -0
  533. package/src/services/voice/optimistic-policy.d.ts.map +1 -0
  534. package/src/services/voice/optimistic-policy.test.ts +101 -0
  535. package/src/services/voice/optimistic-policy.ts +192 -0
  536. package/src/services/voice/optimistic-rollback.ts +343 -0
  537. package/src/services/voice/partial-stabilizer.d.ts +73 -0
  538. package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
  539. package/src/services/voice/partial-stabilizer.test.ts +68 -0
  540. package/src/services/voice/partial-stabilizer.ts +140 -0
  541. package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
  542. package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
  543. package/src/services/voice/phoneme-tokenizer.ts +158 -0
  544. package/src/services/voice/phrase-cache.d.ts +76 -0
  545. package/src/services/voice/phrase-cache.d.ts.map +1 -0
  546. package/src/services/voice/phrase-cache.test.ts +242 -0
  547. package/src/services/voice/phrase-cache.ts +186 -0
  548. package/src/services/voice/phrase-chunker.d.ts +62 -0
  549. package/src/services/voice/phrase-chunker.d.ts.map +1 -0
  550. package/src/services/voice/phrase-chunker.test.ts +239 -0
  551. package/src/services/voice/phrase-chunker.ts +281 -0
  552. package/src/services/voice/pipeline-impls.d.ts +151 -0
  553. package/src/services/voice/pipeline-impls.d.ts.map +1 -0
  554. package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
  555. package/src/services/voice/pipeline-impls.test.ts +292 -0
  556. package/src/services/voice/pipeline-impls.ts +315 -0
  557. package/src/services/voice/pipeline.d.ts +216 -0
  558. package/src/services/voice/pipeline.d.ts.map +1 -0
  559. package/src/services/voice/pipeline.ts +505 -0
  560. package/src/services/voice/prefill-client.d.ts +123 -0
  561. package/src/services/voice/prefill-client.d.ts.map +1 -0
  562. package/src/services/voice/prefill-client.ts +316 -0
  563. package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
  564. package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
  565. package/src/services/voice/prefix-preserving-queue.ts +162 -0
  566. package/src/services/voice/profile-store.d.ts +248 -0
  567. package/src/services/voice/profile-store.d.ts.map +1 -0
  568. package/src/services/voice/profile-store.ts +887 -0
  569. package/src/services/voice/real-audio-decode.test.ts +148 -0
  570. package/src/services/voice/ring-buffer.d.ts +40 -0
  571. package/src/services/voice/ring-buffer.d.ts.map +1 -0
  572. package/src/services/voice/ring-buffer.test.ts +129 -0
  573. package/src/services/voice/ring-buffer.ts +123 -0
  574. package/src/services/voice/rollback-queue.d.ts +24 -0
  575. package/src/services/voice/rollback-queue.d.ts.map +1 -0
  576. package/src/services/voice/rollback-queue.ts +74 -0
  577. package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
  578. package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
  579. package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
  580. package/src/services/voice/samantha-preset-placeholder.ts +148 -0
  581. package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
  582. package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
  583. package/src/services/voice/samantha-preset-regenerator.ts +393 -0
  584. package/src/services/voice/scheduler.d.ts +146 -0
  585. package/src/services/voice/scheduler.d.ts.map +1 -0
  586. package/src/services/voice/scheduler.t2.test.ts +141 -0
  587. package/src/services/voice/scheduler.ts +927 -0
  588. package/src/services/voice/shared-resources.d.ts +190 -0
  589. package/src/services/voice/shared-resources.d.ts.map +1 -0
  590. package/src/services/voice/shared-resources.ts +320 -0
  591. package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
  592. package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
  593. package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
  594. package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
  595. package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
  596. package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
  597. package/src/services/voice/speaker/diarizer-fused.ts +154 -0
  598. package/src/services/voice/speaker/diarizer.d.ts +75 -0
  599. package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
  600. package/src/services/voice/speaker/diarizer.ts +218 -0
  601. package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
  602. package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
  603. package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
  604. package/src/services/voice/speaker/encoder-fused.ts +138 -0
  605. package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
  606. package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
  607. package/src/services/voice/speaker/encoder-ggml.ts +79 -0
  608. package/src/services/voice/speaker/encoder.d.ts +37 -0
  609. package/src/services/voice/speaker/encoder.d.ts.map +1 -0
  610. package/src/services/voice/speaker/encoder.ts +105 -0
  611. package/src/services/voice/speaker-imprint.d.ts +83 -0
  612. package/src/services/voice/speaker-imprint.d.ts.map +1 -0
  613. package/src/services/voice/speaker-imprint.test.ts +185 -0
  614. package/src/services/voice/speaker-imprint.ts +312 -0
  615. package/src/services/voice/speaker-preset-cache.d.ts +77 -0
  616. package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
  617. package/src/services/voice/speaker-preset-cache.test.ts +154 -0
  618. package/src/services/voice/speaker-preset-cache.ts +195 -0
  619. package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
  620. package/src/services/voice/system-audio-sink.d.ts +73 -0
  621. package/src/services/voice/system-audio-sink.d.ts.map +1 -0
  622. package/src/services/voice/system-audio-sink.test.ts +29 -0
  623. package/src/services/voice/system-audio-sink.ts +366 -0
  624. package/src/services/voice/transcriber.d.ts +244 -0
  625. package/src/services/voice/transcriber.d.ts.map +1 -0
  626. package/src/services/voice/transcriber.test.ts +392 -0
  627. package/src/services/voice/transcriber.ts +704 -0
  628. package/src/services/voice/transcript-knowledge.d.ts +37 -0
  629. package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
  630. package/src/services/voice/transcript-knowledge.test.ts +68 -0
  631. package/src/services/voice/transcript-knowledge.ts +75 -0
  632. package/src/services/voice/transcript-service.d.ts +41 -0
  633. package/src/services/voice/transcript-service.d.ts.map +1 -0
  634. package/src/services/voice/transcript-service.test.ts +137 -0
  635. package/src/services/voice/transcript-service.ts +141 -0
  636. package/src/services/voice/transcript-store.d.ts +53 -0
  637. package/src/services/voice/transcript-store.d.ts.map +1 -0
  638. package/src/services/voice/transcript-store.test.ts +153 -0
  639. package/src/services/voice/transcript-store.ts +132 -0
  640. package/src/services/voice/turn-controller.d.ts +183 -0
  641. package/src/services/voice/turn-controller.d.ts.map +1 -0
  642. package/src/services/voice/turn-controller.test.ts +575 -0
  643. package/src/services/voice/turn-controller.ts +596 -0
  644. package/src/services/voice/types.d.ts +643 -0
  645. package/src/services/voice/types.d.ts.map +1 -0
  646. package/src/services/voice/types.ts +699 -0
  647. package/src/services/voice/vad.d.ts +282 -0
  648. package/src/services/voice/vad.d.ts.map +1 -0
  649. package/src/services/voice/vad.test.ts +480 -0
  650. package/src/services/voice/vad.ts +827 -0
  651. package/src/services/voice/vad.v1-v4.test.ts +222 -0
  652. package/src/services/voice/voice-budget.d.ts +241 -0
  653. package/src/services/voice/voice-budget.d.ts.map +1 -0
  654. package/src/services/voice/voice-budget.test.ts +418 -0
  655. package/src/services/voice/voice-budget.ts +635 -0
  656. package/src/services/voice/voice-duet.test.ts +375 -0
  657. package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
  658. package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
  659. package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
  660. package/src/services/voice/voice-emotion-classifier.ts +273 -0
  661. package/src/services/voice/voice-preset-format.d.ts +158 -0
  662. package/src/services/voice/voice-preset-format.d.ts.map +1 -0
  663. package/src/services/voice/voice-preset-format.ts +700 -0
  664. package/src/services/voice/voice-preset-generator.test.ts +89 -0
  665. package/src/services/voice/voice-profile-artifact.d.ts +116 -0
  666. package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
  667. package/src/services/voice/voice-profile-artifact.test.ts +138 -0
  668. package/src/services/voice/voice-profile-artifact.ts +518 -0
  669. package/src/services/voice/voice-profile-routes.d.ts +83 -0
  670. package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
  671. package/src/services/voice/voice-profile-routes.test.ts +429 -0
  672. package/src/services/voice/voice-profile-routes.ts +425 -0
  673. package/src/services/voice/voice-scenario.ts +154 -0
  674. package/src/services/voice/voice-settings.d.ts +82 -0
  675. package/src/services/voice/voice-settings.d.ts.map +1 -0
  676. package/src/services/voice/voice-settings.ts +172 -0
  677. package/src/services/voice/voice-state-machine.d.ts +364 -0
  678. package/src/services/voice/voice-state-machine.d.ts.map +1 -0
  679. package/src/services/voice/voice-state-machine.ts +727 -0
  680. package/src/services/voice/voice-workbench-report.test.ts +168 -0
  681. package/src/services/voice/voice-workbench-report.ts +326 -0
  682. package/src/services/voice/voice-workbench.test.ts +158 -0
  683. package/src/services/voice/voice.test.ts +1070 -0
  684. package/src/services/voice/wake-word-ggml.d.ts +101 -0
  685. package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
  686. package/src/services/voice/wake-word-ggml.ts +320 -0
  687. package/src/services/voice/wake-word.d.ts +255 -0
  688. package/src/services/voice/wake-word.d.ts.map +1 -0
  689. package/src/services/voice/wake-word.test.ts +298 -0
  690. package/src/services/voice/wake-word.ts +554 -0
  691. package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
  692. package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
  693. package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
  694. package/src/services/voice-model-updater.d.ts +240 -0
  695. package/src/services/voice-model-updater.d.ts.map +1 -0
  696. package/src/services/voice-model-updater.ts +724 -0
  697. package/src/services/voice-prewarm.d.ts +3 -0
  698. package/src/services/voice-prewarm.d.ts.map +1 -0
  699. package/src/services/voice-prewarm.ts +51 -0
  700. package/dist/index.d.ts +0 -37
  701. package/dist/index.js +0 -1098
@@ -0,0 +1,251 @@
1
+ /**
2
+ * Vision-describe capability (WS2) — public entry point.
3
+ *
4
+ * This module is what plugin-vision (WS4), the IMAGE_DESCRIPTION
5
+ * handler in `provider.ts`, and computer-use (WS9) import to register
6
+ * vision capability with the WS1 MemoryArbiter.
7
+ *
8
+ * Wiring:
9
+ *
10
+ * const arbiter = service.getMemoryArbiter();
11
+ * const registration = createVisionCapabilityRegistration({
12
+ * loader: createDefaultVisionLoader({ ... }),
13
+ * arbiterCache: arbiter,
14
+ * });
15
+ * arbiter.registerCapability(registration);
16
+ *
17
+ * `createVisionCapabilityRegistration` wraps the underlying backend so
18
+ * the arbiter's `run(request)` path:
19
+ *
20
+ * 1. Hashes the request's image bytes (model-family-scoped).
21
+ * 2. Checks the arbiter's vision-embedding cache.
22
+ * 3. On miss: calls `backend.describe(request)`, lets the backend
23
+ * run its own projector + decoder. Backends that cannot expose projected
24
+ * tokens return decoder text only, so the cache stays empty for this hash.
25
+ * The decoder text is what the caller wanted anyway.
26
+ * 4. On hit: calls `backend.describe(request, { projectedTokens })`.
27
+ * Backends that support pre-projected token reuse skip the
28
+ * projector entirely. Backends that don't ignore the hint; the
29
+ * result is still correct but the projector cost is paid again.
30
+ */
31
+
32
+ export {
33
+ type AospLlamaMtmdBinding,
34
+ type AospMtmdHandle,
35
+ type LoadAospVisionBackendOptions,
36
+ loadAospVisionBackend,
37
+ } from "./aosp-unavailable";
38
+ export {
39
+ type CapacitorLlamaMtmdBinding,
40
+ type CapacitorLlamaMtmdHandle,
41
+ type CapacitorLlamaVisionBackendOptions,
42
+ loadCapacitorLlamaVisionBackend,
43
+ VisionBackendUnavailableError,
44
+ type VisionManagerLike,
45
+ } from "./capacitor-llama";
46
+ export {
47
+ classifyLocalVisionError,
48
+ type LocalImageDescriptionHandler,
49
+ type LocalVisionOutcome,
50
+ type VisionCloudFallbackOptions,
51
+ type VisionFallbackReason,
52
+ type WrappedImageDescriptionHandler,
53
+ wrapImageDescriptionHandlerWithCloudFallback,
54
+ } from "./cloud-fallback";
55
+ export {
56
+ hashImageBytes,
57
+ hashRawPixels,
58
+ hashVisionInput,
59
+ resolveImageBytes,
60
+ } from "./hash";
61
+ export {
62
+ createLlamaServerVisionBackend,
63
+ type LlamaServerVisionBackendOptions,
64
+ } from "./llama-server";
65
+ export type {
66
+ VisionDescribeBackend,
67
+ VisionDescribeBackendLoader,
68
+ VisionDescribeBackendOptions,
69
+ VisionDescribeLoadArgs,
70
+ VisionDescribeRequest,
71
+ VisionDescribeResult,
72
+ VisionImageChannelOrder,
73
+ VisionImageInput,
74
+ } from "./types";
75
+ export {
76
+ type VisionVastFallbackOptions,
77
+ wrapImageDescriptionHandlerWithVastFallback,
78
+ } from "./vast-fallback";
79
+
80
+ import type {
81
+ ArbiterCapability,
82
+ CapabilityRegistration,
83
+ } from "../memory-arbiter";
84
+ import { hashVisionInput } from "./hash";
85
+ import type {
86
+ VisionDescribeBackend,
87
+ VisionDescribeBackendLoader,
88
+ VisionDescribeRequest,
89
+ VisionDescribeResult,
90
+ } from "./types";
91
+
92
+ /**
93
+ * Minimal arbiter shape we need from the cache. Lets tests inject a
94
+ * fake cache without pulling in the whole MemoryArbiter.
95
+ */
96
+ export interface VisionEmbeddingCacheLike {
97
+ getCachedVisionEmbedding(hash: string): {
98
+ tokens: Float32Array;
99
+ tokenCount: number;
100
+ hiddenSize: number;
101
+ live?: boolean;
102
+ } | null;
103
+ setCachedVisionEmbedding(
104
+ hash: string,
105
+ entry: {
106
+ tokens: Float32Array;
107
+ tokenCount: number;
108
+ hiddenSize: number;
109
+ },
110
+ ttlMs?: number,
111
+ ): void;
112
+ }
113
+
114
+ export interface CreateVisionCapabilityRegistrationOptions {
115
+ /**
116
+ * The arbiter (or any object with the cache passthroughs). When
117
+ * provided the wrapper performs hash → cache lookup before calling
118
+ * the backend's `describe`.
119
+ */
120
+ arbiterCache?: VisionEmbeddingCacheLike;
121
+ loader: VisionDescribeBackendLoader;
122
+ /** Default model family for the cache key. Defaults to `qwen3-vl`. */
123
+ modelFamily?: string;
124
+ estimatedMb?: number;
125
+ }
126
+
127
+ /**
128
+ * Build a `CapabilityRegistration` ready to feed to
129
+ * `arbiter.registerCapability()`. The wrapper plumbs the cache hint
130
+ * into the backend's describe call so backends that support
131
+ * pre-projected tokens skip the projector.
132
+ */
133
+ export function createVisionCapabilityRegistration(
134
+ opts: CreateVisionCapabilityRegistrationOptions,
135
+ ): CapabilityRegistration<
136
+ VisionDescribeBackend,
137
+ VisionDescribeRequest,
138
+ VisionDescribeResult
139
+ > {
140
+ const capability: ArbiterCapability = "vision-describe";
141
+ const family = opts.modelFamily ?? "qwen3-vl";
142
+ const cache = opts.arbiterCache;
143
+ const loader = opts.loader;
144
+ return {
145
+ capability,
146
+ residentRole: "vision",
147
+ estimatedMb: opts.estimatedMb ?? 600,
148
+ async load(modelKey) {
149
+ return await loader(modelKey);
150
+ },
151
+ async unload(backend) {
152
+ await backend.dispose();
153
+ },
154
+ async run(backend, request) {
155
+ const effectiveFamily = request.modelFamily ?? family;
156
+ const cached = (() => {
157
+ if (!cache) return null;
158
+ if (request.image.kind === "url") {
159
+ // URL inputs can't be hashed without first fetching; skip
160
+ // the cache lookup rather than paying the fetch cost twice.
161
+ return null;
162
+ }
163
+ try {
164
+ const hash = hashVisionInput(request.image, effectiveFamily);
165
+ const hit = cache.getCachedVisionEmbedding(hash);
166
+ if (hit && hit.live !== false) return { hash, hit };
167
+ } catch {
168
+ // Hashing failed (malformed data URL etc.); proceed without
169
+ // cache rather than failing the request.
170
+ }
171
+ return null;
172
+ })();
173
+ const projected = cached?.hit
174
+ ? {
175
+ tokens: cached.hit.tokens,
176
+ tokenCount: cached.hit.tokenCount,
177
+ hiddenSize: cached.hit.hiddenSize,
178
+ }
179
+ : undefined;
180
+ const result = await backend.describe(request, {
181
+ projectedTokens: projected,
182
+ });
183
+ return {
184
+ ...result,
185
+ cacheHit: Boolean(projected),
186
+ };
187
+ },
188
+ };
189
+ }
190
+
191
+ import type {
192
+ IAgentRuntime,
193
+ ImageDescriptionParams,
194
+ ImageDescriptionResult,
195
+ } from "@elizaos/core";
196
+ import {
197
+ type LocalImageDescriptionHandler,
198
+ type VisionCloudFallbackOptions,
199
+ wrapImageDescriptionHandlerWithCloudFallback,
200
+ } from "./cloud-fallback";
201
+ import {
202
+ type VisionVastFallbackOptions,
203
+ wrapImageDescriptionHandlerWithVastFallback,
204
+ } from "./vast-fallback";
205
+
206
+ /**
207
+ * Compose the full local → cloud → vast IMAGE_DESCRIPTION chain and
208
+ * terminate it as a runtime-shaped `ImageDescriptionHandler`. When all
209
+ * three paths return `{ kind: "fallback" }`, the terminator throws the
210
+ * underlying cause (or a structured upstream-fail message) so the runtime
211
+ * surfaces the failure cleanly rather than serving a sentinel result.
212
+ *
213
+ * This is the single entry point `ensure-local-inference-handler.ts`
214
+ * uses at the IMAGE_DESCRIPTION model registration site. Tests
215
+ * exercise the composition via the individual `wrap*` helpers; this
216
+ * function is the production wiring.
217
+ */
218
+ export function withVisionFallbackChain(
219
+ local: LocalImageDescriptionHandler,
220
+ options: {
221
+ cloud?: VisionCloudFallbackOptions;
222
+ vast?: VisionVastFallbackOptions;
223
+ } = {},
224
+ ): (
225
+ runtime: IAgentRuntime,
226
+ params: ImageDescriptionParams | string,
227
+ ) => Promise<ImageDescriptionResult> {
228
+ const wrapped = wrapImageDescriptionHandlerWithVastFallback(
229
+ wrapImageDescriptionHandlerWithCloudFallback(local, options.cloud),
230
+ options.vast,
231
+ );
232
+ return async (_runtime, params) => {
233
+ const outcome = await wrapped(params);
234
+ if (
235
+ outcome &&
236
+ typeof outcome === "object" &&
237
+ "kind" in outcome &&
238
+ outcome.kind === "fallback"
239
+ ) {
240
+ const causeMsg = outcome.cause?.message ?? outcome.reason;
241
+ const err = new Error(
242
+ `[VisionFallback] all IMAGE_DESCRIPTION providers exhausted (reason=${outcome.reason}): ${causeMsg}`,
243
+ );
244
+ if (outcome.cause) {
245
+ (err as Error & { cause?: unknown }).cause = outcome.cause;
246
+ }
247
+ throw err;
248
+ }
249
+ return outcome as ImageDescriptionResult;
250
+ };
251
+ }
@@ -0,0 +1,73 @@
1
+ /**
2
+ * llama-server vision-describe backend (WS2).
3
+ *
4
+ * Wraps the out-of-process llama-server's `/completion` endpoint with
5
+ * the `image_data` array (base64-encoded payloads) and shapes the
6
+ * response to the WS2 `VisionDescribeBackend` contract.
7
+ *
8
+ * llama-server image-data API recap (verified against llama.cpp commit
9
+ * b8198+, May 2026):
10
+ *
11
+ * POST /completion
12
+ * { "prompt": "<...>USER: [img-12] What's in this image?\nASSISTANT:",
13
+ * "image_data": [
14
+ * { "data": "<base64 png/jpeg>", "id": 12 }
15
+ * ],
16
+ * "n_predict": 256,
17
+ * "temperature": 0.2,
18
+ * "stream": false }
19
+ *
20
+ * Response:
21
+ * { "content": "A photo of a cat.", "stop": true,
22
+ * "timings": { "prompt_ms": 180.4, "predicted_ms": 423.1 } }
23
+ *
24
+ * Server-side mmproj is loaded via the `--mmproj <path>` flag on
25
+ * llama-server startup. The FFI runtime wrapper passes this flag
26
+ * already for tiers with vision enabled; this backend assumes the
27
+ * server has been started with the right mmproj for the active model.
28
+ *
29
+ * Backend responsibility:
30
+ * - Encode the image as base64 (when not already).
31
+ * - Build the prompt with the `[img-N]` placeholder convention.
32
+ * - POST to `/completion`, parse the text + timings.
33
+ * - Honour AbortSignal by passing it through to the fetch call.
34
+ *
35
+ * Backend explicitly does NOT:
36
+ * - Start / stop the server. That's the FFI runtime wrapper's job.
37
+ * - Resolve the mmproj path — the server already has it. The arbiter's
38
+ * `--mmproj` was set when the text model loaded.
39
+ * - Implement projector-token reuse. llama-server has no API to
40
+ * accept pre-projected tokens; if the WS1 cache hit happens, this
41
+ * backend ignores the hint and re-runs the projector. The cache
42
+ * is more useful with the in-process node-llama-cpp backend.
43
+ *
44
+ * Metal / CUDA validation:
45
+ * The llama-server build embeds the same mtmd_encode path the
46
+ * in-process binding will eventually expose. On a Metal build the
47
+ * image encode dispatches through the Metal compute encoder; on a
48
+ * CUDA build through cuBLAS. We have no GPU on this host — see the
49
+ * `__tests__/vision-describe.test.ts` notes for the GPU smoke check.
50
+ */
51
+ import type { VisionDescribeBackend } from "./types";
52
+ export interface LlamaServerVisionBackendOptions {
53
+ /**
54
+ * Base URL of the llama-server. The FFI runtime wrapper exposes
55
+ * this via `currentBaseUrl()`; pass the resolved URL here at load
56
+ * time. The backend keeps it as-is across calls.
57
+ */
58
+ baseUrl: string;
59
+ /**
60
+ * Optional fetch override. Tests inject a fake fetch; production
61
+ * uses global fetch. The signature mirrors `fetch` so the test
62
+ * surface is the same as the real one.
63
+ */
64
+ fetch?: typeof fetch;
65
+ /**
66
+ * Default `n_predict` budget when the caller doesn't specify
67
+ * `maxTokens`. 256 matches the description-length budget the
68
+ * Florence-2 / VisionManager path uses today.
69
+ */
70
+ defaultMaxTokens?: number;
71
+ }
72
+ export declare function createLlamaServerVisionBackend(opts: LlamaServerVisionBackendOptions): VisionDescribeBackend;
73
+ //# sourceMappingURL=llama-server.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llama-server.d.ts","sourceRoot":"","sources":["llama-server.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiDG;AAGH,OAAO,KAAK,EACX,qBAAqB,EAGrB,MAAM,SAAS,CAAC;AAEjB,MAAM,WAAW,+BAA+B;IAC/C;;;;OAIG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,KAAK,CAAC,EAAE,OAAO,KAAK,CAAC;IACrB;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,wBAAgB,8BAA8B,CAC7C,IAAI,EAAE,+BAA+B,GACnC,qBAAqB,CAsEvB"}
@@ -0,0 +1,177 @@
1
+ /**
2
+ * llama-server vision-describe backend (WS2).
3
+ *
4
+ * Wraps the out-of-process llama-server's `/completion` endpoint with
5
+ * the `image_data` array (base64-encoded payloads) and shapes the
6
+ * response to the WS2 `VisionDescribeBackend` contract.
7
+ *
8
+ * llama-server image-data API recap (verified against llama.cpp commit
9
+ * b8198+, May 2026):
10
+ *
11
+ * POST /completion
12
+ * { "prompt": "<...>USER: [img-12] What's in this image?\nASSISTANT:",
13
+ * "image_data": [
14
+ * { "data": "<base64 png/jpeg>", "id": 12 }
15
+ * ],
16
+ * "n_predict": 256,
17
+ * "temperature": 0.2,
18
+ * "stream": false }
19
+ *
20
+ * Response:
21
+ * { "content": "A photo of a cat.", "stop": true,
22
+ * "timings": { "prompt_ms": 180.4, "predicted_ms": 423.1 } }
23
+ *
24
+ * Server-side mmproj is loaded via the `--mmproj <path>` flag on
25
+ * llama-server startup. The FFI runtime wrapper passes this flag
26
+ * already for tiers with vision enabled; this backend assumes the
27
+ * server has been started with the right mmproj for the active model.
28
+ *
29
+ * Backend responsibility:
30
+ * - Encode the image as base64 (when not already).
31
+ * - Build the prompt with the `[img-N]` placeholder convention.
32
+ * - POST to `/completion`, parse the text + timings.
33
+ * - Honour AbortSignal by passing it through to the fetch call.
34
+ *
35
+ * Backend explicitly does NOT:
36
+ * - Start / stop the server. That's the FFI runtime wrapper's job.
37
+ * - Resolve the mmproj path — the server already has it. The arbiter's
38
+ * `--mmproj` was set when the text model loaded.
39
+ * - Implement projector-token reuse. llama-server has no API to
40
+ * accept pre-projected tokens; if the WS1 cache hit happens, this
41
+ * backend ignores the hint and re-runs the projector. The cache
42
+ * is more useful with the in-process node-llama-cpp backend.
43
+ *
44
+ * Metal / CUDA validation:
45
+ * The llama-server build embeds the same mtmd_encode path the
46
+ * in-process binding will eventually expose. On a Metal build the
47
+ * image encode dispatches through the Metal compute encoder; on a
48
+ * CUDA build through cuBLAS. We have no GPU on this host — see the
49
+ * `__tests__/vision-describe.test.ts` notes for the GPU smoke check.
50
+ */
51
+
52
+ import { resolveImageBytes } from "./hash";
53
+ import type {
54
+ VisionDescribeBackend,
55
+ VisionDescribeRequest,
56
+ VisionDescribeResult,
57
+ } from "./types";
58
+
59
+ export interface LlamaServerVisionBackendOptions {
60
+ /**
61
+ * Base URL of the llama-server. The FFI runtime wrapper exposes
62
+ * this via `currentBaseUrl()`; pass the resolved URL here at load
63
+ * time. The backend keeps it as-is across calls.
64
+ */
65
+ baseUrl: string;
66
+ /**
67
+ * Optional fetch override. Tests inject a fake fetch; production
68
+ * uses global fetch. The signature mirrors `fetch` so the test
69
+ * surface is the same as the real one.
70
+ */
71
+ fetch?: typeof fetch;
72
+ /**
73
+ * Default `n_predict` budget when the caller doesn't specify
74
+ * `maxTokens`. 256 matches the description-length budget the
75
+ * Florence-2 / VisionManager path uses today.
76
+ */
77
+ defaultMaxTokens?: number;
78
+ }
79
+
80
+ export function createLlamaServerVisionBackend(
81
+ opts: LlamaServerVisionBackendOptions,
82
+ ): VisionDescribeBackend {
83
+ const fetchImpl = opts.fetch ?? globalThis.fetch;
84
+ const defaultMaxTokens = opts.defaultMaxTokens ?? 256;
85
+ let baseUrl = opts.baseUrl.replace(/\/$/, "");
86
+
87
+ if (!baseUrl) {
88
+ throw new Error(
89
+ "[vision/llama-server] baseUrl is required; pass FFI runtime's currentBaseUrl()",
90
+ );
91
+ }
92
+
93
+ return {
94
+ id: "llama-server",
95
+ async describe(
96
+ request: VisionDescribeRequest,
97
+ ): Promise<VisionDescribeResult> {
98
+ const { bytes, mimeType } = resolveImageBytes(request.image);
99
+ const base64 = Buffer.from(bytes).toString("base64");
100
+ const prompt = buildVisionPrompt(request.prompt);
101
+ const startMs = Date.now();
102
+ const body = JSON.stringify({
103
+ prompt,
104
+ image_data: [{ data: base64, id: 12 }],
105
+ n_predict: request.maxTokens ?? defaultMaxTokens,
106
+ temperature: request.temperature ?? 0.2,
107
+ stream: false,
108
+ // `cache_prompt: false` here so each describe call gets a
109
+ // fresh slot; the WS1 vision-embedding cache handles repeat-
110
+ // frame reuse on the JS side, and the server-side prompt
111
+ // cache would only conflict with that (different KV state
112
+ // for the same projector tokens).
113
+ cache_prompt: false,
114
+ });
115
+ const res = await fetchImpl(`${baseUrl}/completion`, {
116
+ method: "POST",
117
+ headers: {
118
+ "content-type": "application/json",
119
+ ...(mimeType ? { "x-image-mime": mimeType } : {}),
120
+ },
121
+ body,
122
+ signal: request.signal,
123
+ });
124
+ if (!res.ok) {
125
+ const text = await res.text().catch(() => "<unreadable>");
126
+ throw new Error(
127
+ `[vision/llama-server] /completion returned ${res.status}: ${text.slice(0, 200)}`,
128
+ );
129
+ }
130
+ const payload = (await res.json()) as {
131
+ content?: unknown;
132
+ timings?: { prompt_ms?: number; predicted_ms?: number };
133
+ };
134
+ if (typeof payload.content !== "string") {
135
+ throw new Error(
136
+ "[vision/llama-server] /completion response missing string `content`",
137
+ );
138
+ }
139
+ const elapsed = Date.now() - startMs;
140
+ return shape(payload.content, {
141
+ projectorMs: payload.timings?.prompt_ms,
142
+ decodeMs: payload.timings?.predicted_ms ?? elapsed,
143
+ cacheHit: false,
144
+ });
145
+ },
146
+ async dispose() {
147
+ // llama-server lifetime is owned by the FFI runtime wrapper.
148
+ // This backend just holds the baseUrl; nothing to free.
149
+ baseUrl = "";
150
+ },
151
+ };
152
+ }
153
+
154
+ function buildVisionPrompt(userPrompt?: string): string {
155
+ const ask = userPrompt?.trim() || "Describe what is in this image.";
156
+ // `[img-N]` is the placeholder llama-server's mtmd path replaces with
157
+ // the encoded image tokens. The `N` must match the `image_data[*].id`
158
+ // we send in the body; we use 12 because llama-server's stock
159
+ // example uses small integer ids — any positive integer works.
160
+ return `<|im_start|>user\n[img-12]\n${ask}<|im_end|>\n<|im_start|>assistant\n`;
161
+ }
162
+
163
+ function shape(
164
+ text: string,
165
+ telemetry: { projectorMs?: number; decodeMs?: number; cacheHit?: boolean },
166
+ ): VisionDescribeResult {
167
+ const trimmed = text.trim();
168
+ if (!trimmed) {
169
+ throw new Error("[vision/llama-server] empty text from /completion");
170
+ }
171
+ const title = trimmed.split(/[.!?]/, 1)[0]?.trim() || "Image";
172
+ return {
173
+ title,
174
+ description: trimmed,
175
+ ...telemetry,
176
+ };
177
+ }
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Vision-language describe-image types (WS2).
3
+ *
4
+ * Two layers live here:
5
+ *
6
+ * 1. The **request/result** contract every WS2 backend implements
7
+ * (`VisionDescribeRequest`, `VisionDescribeResult`). Callers pass
8
+ * raw image bytes + a prompt; backends return a title+description.
9
+ *
10
+ * 2. The **backend** interface (`VisionDescribeBackend`) that the
11
+ * `MemoryArbiter` registers as a capability handler. One backend
12
+ * per binding family (node-llama-cpp / llama-server / AOSP libllama
13
+ * shim). All three implement the same `load → describe → unload`
14
+ * shape so the arbiter can swap between them without caring how
15
+ * the projector is wired underneath.
16
+ *
17
+ * Why a separate file: the arbiter's `CapabilityRegistration<TBackend,
18
+ * TRequest, TResult>` is generic; pinning concrete shapes here keeps
19
+ * the registration sites short and removes a dozen casts at the
20
+ * call-site.
21
+ */
22
+ /**
23
+ * Channel order for the raw pixel buffer. Most platforms hand us RGBA
24
+ * (HTMLCanvasElement, Capacitor `Camera`, the desktop `puppeteer-core`
25
+ * screenshot pipeline). The encoder normalizes internally; this enum
26
+ * stays so the hash step can pick a stable byte layout that doesn't
27
+ * depend on the platform-provided buffer order.
28
+ */
29
+ export type VisionImageChannelOrder = "rgba" | "rgb" | "bgra" | "bgr";
30
+ /**
31
+ * The raw image data the backend will encode. The arbiter does not see
32
+ * this — it gets handed straight to the backend's `run()`. The reason
33
+ * we accept multiple wrappers (URL / base64 / bytes) is that the three
34
+ * upstream entry points (HTTP route, agent runtime model handler,
35
+ * computer-use frame loop) each prefer a different shape. The backend
36
+ * resolves to bytes once.
37
+ */
38
+ export type VisionImageInput = {
39
+ kind: "bytes";
40
+ bytes: Uint8Array;
41
+ mimeType?: string;
42
+ } | {
43
+ kind: "base64";
44
+ base64: string;
45
+ mimeType?: string;
46
+ } | {
47
+ kind: "dataUrl";
48
+ dataUrl: string;
49
+ } | {
50
+ kind: "url";
51
+ url: string;
52
+ mimeType?: string;
53
+ };
54
+ /**
55
+ * Caller request to `describeImage`. The `modelFamily` distinguishes
56
+ * projected-token cache entries from different VL families that share
57
+ * the same hash space — Qwen3-VL tokens are not interchangeable with
58
+ * Florence-2 tokens. Default is `qwen3-vl` (the WS2 deliverable);
59
+ * each additional family registers under its own identifier.
60
+ */
61
+ export interface VisionDescribeRequest {
62
+ image: VisionImageInput;
63
+ prompt?: string;
64
+ /**
65
+ * The model family identifier. Used to namespace the projector cache
66
+ * so swapping the backend's model family invalidates cached tokens.
67
+ * Defaults to `"qwen3-vl"` when omitted.
68
+ */
69
+ modelFamily?: string;
70
+ /** Max output tokens; defaults to 256 (description-length budget). */
71
+ maxTokens?: number;
72
+ /** 0..1, default 0.2 (descriptions should be deterministic-ish). */
73
+ temperature?: number;
74
+ signal?: AbortSignal;
75
+ }
76
+ /** Backend response — same shape that ImageDescriptionResult expects. */
77
+ export interface VisionDescribeResult {
78
+ title: string;
79
+ description: string;
80
+ /** Best-effort: ms spent in the projector (for arbiter telemetry). */
81
+ projectorMs?: number;
82
+ /** Best-effort: ms spent in the decoder. */
83
+ decodeMs?: number;
84
+ /** Whether the projected tokens came from the WS1 vision cache. */
85
+ cacheHit?: boolean;
86
+ }
87
+ /**
88
+ * Per-load arguments for a vision-describe backend. The arbiter's
89
+ * `load(modelKey)` only carries an opaque key; the binding resolves
90
+ * that key to real model+mmproj paths through this struct, which
91
+ * `createVisionCapabilityRegistration` populates from the catalog.
92
+ */
93
+ export interface VisionDescribeLoadArgs {
94
+ /** Absolute path to the text decoder GGUF (the "main" model). */
95
+ modelPath: string;
96
+ /** Absolute path to the matching mmproj projector GGUF. */
97
+ mmprojPath: string;
98
+ /**
99
+ * GPU offload preference. The backend translates this to its native
100
+ * knob: node-llama-cpp `gpuLayers`, llama-server `--n-gpu-layers`,
101
+ * AOSP libllama shim `eliza_llama_model_params_set_n_gpu_layers`.
102
+ * `"auto"` lets the binding decide; numeric is honoured verbatim.
103
+ */
104
+ gpuLayers?: number | "auto" | "max";
105
+ /** Max sampled context window in tokens. Defaults to 4096. */
106
+ contextSize?: number;
107
+ }
108
+ /**
109
+ * The contract every WS2 backend implements. The shape is intentionally
110
+ * narrow: the arbiter only ever calls `describe`. `dispose` is wrapped
111
+ * by the arbiter's `unload` so the backend can free GPU/VRAM and drop
112
+ * file descriptors on eviction.
113
+ */
114
+ export interface VisionDescribeBackend {
115
+ /** Stable identifier — `"capacitor-llama"`, `"llama-server"`, `"aosp"`, or `"fake"` (tests). */
116
+ readonly id: "capacitor-llama" | "llama-server" | "aosp" | "fake";
117
+ /**
118
+ * Run a describe pass. Backends MAY consult an injected projector cache
119
+ * via `args.projectedTokens` (when the caller's hash already produced
120
+ * a cache hit) instead of running the projector again; backends that
121
+ * don't implement projector-token reuse ignore the field.
122
+ */
123
+ describe(request: VisionDescribeRequest, args?: VisionDescribeBackendOptions): Promise<VisionDescribeResult>;
124
+ /** Release the loaded weights. Idempotent. */
125
+ dispose(): Promise<void>;
126
+ }
127
+ /**
128
+ * Per-call options the arbiter wrapper passes into the backend. Lives
129
+ * here (rather than on `VisionDescribeRequest`) so the caller-facing
130
+ * request type stays free of arbiter implementation details.
131
+ */
132
+ export interface VisionDescribeBackendOptions {
133
+ /**
134
+ * Pre-computed projected tokens from the WS1 vision-embedding cache.
135
+ * When present the backend SHOULD skip its own projector step and
136
+ * decode against these tokens directly. Backends that can't do this
137
+ * still produce a correct result by ignoring the field; the arbiter's
138
+ * wrapper will measure `cacheHit: false` in that case.
139
+ */
140
+ projectedTokens?: {
141
+ tokens: Float32Array;
142
+ tokenCount: number;
143
+ hiddenSize: number;
144
+ };
145
+ }
146
+ /**
147
+ * Capability handler load function. The arbiter calls it with a model
148
+ * key (e.g. `"qwen3-vl-2b"`); the implementation resolves to a real
149
+ * `(modelPath, mmprojPath)` pair from the catalog + installed registry
150
+ * and returns a live backend.
151
+ */
152
+ export type VisionDescribeBackendLoader = (modelKey: string) => Promise<VisionDescribeBackend>;
153
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH;;;;;;GAMG;AACH,MAAM,MAAM,uBAAuB,GAAG,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG,KAAK,CAAC;AAEtE;;;;;;;GAOG;AACH,MAAM,MAAM,gBAAgB,GACzB;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,KAAK,EAAE,UAAU,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,GACvD;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,GACrD;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,GACpC;IAAE,IAAI,EAAE,KAAK,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAEnD;;;;;;GAMG;AACH,MAAM,WAAW,qBAAqB;IACrC,KAAK,EAAE,gBAAgB,CAAC;IACxB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,sEAAsE;IACtE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,oEAAoE;IACpE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,WAAW,CAAC;CACrB;AAED,yEAAyE;AACzE,MAAM,WAAW,oBAAoB;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,sEAAsE;IACtE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,4CAA4C;IAC5C,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,mEAAmE;IACnE,QAAQ,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;;;;GAKG;AACH,MAAM,WAAW,sBAAsB;IACtC,iEAAiE;IACjE,SAAS,EAAE,MAAM,CAAC;IAClB,2DAA2D;IAC3D,UAAU,EAAE,MAAM,CAAC;IACnB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,KAAK,CAAC;IACpC,8DAA8D;IAC9D,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;;;GAKG;AACH,MAAM,WAAW,qBAAqB;IACrC,gGAAgG;IAChG,QAAQ,CAAC,EAAE,EAAE,iBAAiB,GAAG,cAAc,GAAG,MAAM,GAAG,MAAM,CAAC;IAClE;;;;;OAKG;IACH,QAAQ,CACP,OAAO,EAAE,qBAAqB,EAC9B,IAAI,CAAC,EAAE,4BAA4B,GACjC,OAAO,CAAC,oBAAoB,CAAC,CAAC;IACjC,8CAA8C;IAC9C,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACzB;AAED;;;;GAIG;AACH,MAAM,WAAW,4BAA4B;IAC5C;;;;;;OAMG;IACH,eAAe,CAAC,EAAE;QACjB,MAAM,EAAE,YAAY,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,UAAU,EAAE,MAAM,CAAC;KACnB,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,MAAM,2BAA2B,GAAG,CACzC,QAAQ,EAAE,MAAM,KACZ,OAAO,CAAC,qBAAqB,CAAC,CAAC"}