@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vision-embedding cache key derivation (WS2).
|
|
3
|
+
*
|
|
4
|
+
* The arbiter's vision-embedding cache (WS1, `vision-embedding-cache.ts`)
|
|
5
|
+
* is keyed by SHA-256 of a *normalized* representation of the input
|
|
6
|
+
* image. The normalization step is what makes the cache useful across
|
|
7
|
+
* platforms: two JPEG encodings of the same screenshot, or an RGBA vs
|
|
8
|
+
* RGB frame captured by different platforms, must hash to the same key
|
|
9
|
+
* or the cache hit rate collapses.
|
|
10
|
+
*
|
|
11
|
+
* Normalization is deliberately minimal:
|
|
12
|
+
*
|
|
13
|
+
* 1. Resolve the input to raw bytes (decoding base64/data-url wrappers).
|
|
14
|
+
* 2. Hash with the model-family prefix so the cache can hold tokens
|
|
15
|
+
* for multiple VL families without collision.
|
|
16
|
+
*
|
|
17
|
+
* What we DO NOT do here:
|
|
18
|
+
*
|
|
19
|
+
* - Resize the image. The backend's projector enforces its own input
|
|
20
|
+
* resolution; the bytes the projector sees are what gets projected.
|
|
21
|
+
* Re-encoding here would add work without changing the hit rate
|
|
22
|
+
* (the platform-provided buffer is already at the camera's native
|
|
23
|
+
* resolution).
|
|
24
|
+
* - Strip JPEG/PNG headers. They contribute to the hash; two
|
|
25
|
+
* reencodings of the same pixel array land in different cache
|
|
26
|
+
* slots intentionally. Reuse only the exact same byte stream.
|
|
27
|
+
*
|
|
28
|
+
* If a downstream caller wants finer-grained cache hits (e.g. dedupe
|
|
29
|
+
* across re-encodings of the same screen frame), it should decode to
|
|
30
|
+
* RGBA pixels itself and call `hashRawPixels`. The default
|
|
31
|
+
* `hashVisionInput` path is the conservative, byte-stream-only path.
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
import { createHash } from "node:crypto";
|
|
35
|
+
import type { VisionImageInput } from "./types";
|
|
36
|
+
|
|
37
|
+
const DEFAULT_FAMILY = "qwen3-vl";
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Resolve a `VisionImageInput` to its raw bytes. Returns the decoded
|
|
41
|
+
* payload plus an optional MIME type the caller can forward to the
|
|
42
|
+
* backend. Throws on `url:` inputs — those must be fetched by the
|
|
43
|
+
* caller; the hash step does not own HTTP.
|
|
44
|
+
*/
|
|
45
|
+
export function resolveImageBytes(input: VisionImageInput): {
|
|
46
|
+
bytes: Uint8Array;
|
|
47
|
+
mimeType?: string;
|
|
48
|
+
} {
|
|
49
|
+
switch (input.kind) {
|
|
50
|
+
case "bytes":
|
|
51
|
+
return { bytes: input.bytes, mimeType: input.mimeType };
|
|
52
|
+
case "base64": {
|
|
53
|
+
const bytes = Uint8Array.from(Buffer.from(input.base64, "base64"));
|
|
54
|
+
return { bytes, mimeType: input.mimeType };
|
|
55
|
+
}
|
|
56
|
+
case "dataUrl": {
|
|
57
|
+
const match = /^data:([^;,]+)(?:;[^,]*)?,(.*)$/s.exec(input.dataUrl);
|
|
58
|
+
if (!match) {
|
|
59
|
+
throw new Error(
|
|
60
|
+
"[vision/hash] malformed data URL — expected data:<mime>;base64,<payload>",
|
|
61
|
+
);
|
|
62
|
+
}
|
|
63
|
+
const mimeType = match[1];
|
|
64
|
+
const payload = match[2];
|
|
65
|
+
const isBase64 = /;base64/i.test(input.dataUrl);
|
|
66
|
+
const bytes = Uint8Array.from(
|
|
67
|
+
Buffer.from(payload, isBase64 ? "base64" : "utf8"),
|
|
68
|
+
);
|
|
69
|
+
return { bytes, mimeType };
|
|
70
|
+
}
|
|
71
|
+
case "url":
|
|
72
|
+
throw new Error(
|
|
73
|
+
"[vision/hash] url inputs must be fetched by the caller before hashing — the hash step does not own HTTP",
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Hash an opaque byte stream with the model-family prefix. The result
|
|
80
|
+
* is stable across processes and platforms (Node, Bun, and the
|
|
81
|
+
* Capacitor JS bridge all return the same hex string for the same
|
|
82
|
+
* input).
|
|
83
|
+
*/
|
|
84
|
+
export function hashImageBytes(
|
|
85
|
+
bytes: Uint8Array,
|
|
86
|
+
modelFamily: string = DEFAULT_FAMILY,
|
|
87
|
+
): string {
|
|
88
|
+
const h = createHash("sha256");
|
|
89
|
+
h.update(modelFamily);
|
|
90
|
+
// Length prefix prevents a `family || bytes` collision against a
|
|
91
|
+
// crafted family string that ends with the leading bytes of the
|
|
92
|
+
// payload. Cheap, defensible.
|
|
93
|
+
const lenBuf = Buffer.alloc(4);
|
|
94
|
+
lenBuf.writeUInt32BE(bytes.byteLength, 0);
|
|
95
|
+
h.update(lenBuf);
|
|
96
|
+
h.update(bytes);
|
|
97
|
+
return h.digest("hex");
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Hash a raw pixel buffer (RGBA / RGB / BGRA / BGR). The channel order
|
|
102
|
+
* is folded into the prefix so the same image captured on two different
|
|
103
|
+
* platforms (Android = RGBA, macOS screenshot = BGRA) produces the same
|
|
104
|
+
* key when normalized. Width / height are also included so the cache
|
|
105
|
+
* doesn't conflate two scaled versions of the same source.
|
|
106
|
+
*/
|
|
107
|
+
export function hashRawPixels(args: {
|
|
108
|
+
bytes: Uint8Array;
|
|
109
|
+
width: number;
|
|
110
|
+
height: number;
|
|
111
|
+
channelOrder: "rgba" | "rgb" | "bgra" | "bgr";
|
|
112
|
+
modelFamily?: string;
|
|
113
|
+
}): string {
|
|
114
|
+
const h = createHash("sha256");
|
|
115
|
+
h.update(args.modelFamily ?? DEFAULT_FAMILY);
|
|
116
|
+
h.update("|raw|");
|
|
117
|
+
const prefix = Buffer.alloc(12);
|
|
118
|
+
prefix.writeUInt32BE(args.width, 0);
|
|
119
|
+
prefix.writeUInt32BE(args.height, 4);
|
|
120
|
+
prefix.write(args.channelOrder.padEnd(4, " "), 8, "ascii");
|
|
121
|
+
h.update(prefix);
|
|
122
|
+
// Channel-order normalization: rewrite BGRA→RGBA and BGR→RGB in
|
|
123
|
+
// place into a new buffer so all three platforms land on the same
|
|
124
|
+
// hash even when the input buffer order differs.
|
|
125
|
+
const normalized = normalizeChannels(args.bytes, args.channelOrder);
|
|
126
|
+
h.update(normalized);
|
|
127
|
+
return h.digest("hex");
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function normalizeChannels(
|
|
131
|
+
bytes: Uint8Array,
|
|
132
|
+
order: "rgba" | "rgb" | "bgra" | "bgr",
|
|
133
|
+
): Uint8Array {
|
|
134
|
+
if (order === "rgba" || order === "rgb") return bytes;
|
|
135
|
+
const stride = order === "bgra" ? 4 : 3;
|
|
136
|
+
const out = new Uint8Array(bytes.byteLength);
|
|
137
|
+
for (let i = 0; i + stride <= bytes.byteLength; i += stride) {
|
|
138
|
+
out[i] = bytes[i + 2];
|
|
139
|
+
out[i + 1] = bytes[i + 1];
|
|
140
|
+
out[i + 2] = bytes[i];
|
|
141
|
+
if (stride === 4) out[i + 3] = bytes[i + 3];
|
|
142
|
+
}
|
|
143
|
+
return out;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Convenience wrapper used by the provider: takes a `VisionImageInput`
|
|
148
|
+
* and a model family, returns the cache key. URL inputs throw —
|
|
149
|
+
* callers must fetch first.
|
|
150
|
+
*/
|
|
151
|
+
export function hashVisionInput(
|
|
152
|
+
input: VisionImageInput,
|
|
153
|
+
modelFamily: string = DEFAULT_FAMILY,
|
|
154
|
+
): string {
|
|
155
|
+
const { bytes } = resolveImageBytes(input);
|
|
156
|
+
return hashImageBytes(bytes, modelFamily);
|
|
157
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vision-describe capability (WS2) — public entry point.
|
|
3
|
+
*
|
|
4
|
+
* This module is what plugin-vision (WS4), the IMAGE_DESCRIPTION
|
|
5
|
+
* handler in `provider.ts`, and computer-use (WS9) import to register
|
|
6
|
+
* vision capability with the WS1 MemoryArbiter.
|
|
7
|
+
*
|
|
8
|
+
* Wiring:
|
|
9
|
+
*
|
|
10
|
+
* const arbiter = service.getMemoryArbiter();
|
|
11
|
+
* const registration = createVisionCapabilityRegistration({
|
|
12
|
+
* loader: createDefaultVisionLoader({ ... }),
|
|
13
|
+
* arbiterCache: arbiter,
|
|
14
|
+
* });
|
|
15
|
+
* arbiter.registerCapability(registration);
|
|
16
|
+
*
|
|
17
|
+
* `createVisionCapabilityRegistration` wraps the underlying backend so
|
|
18
|
+
* the arbiter's `run(request)` path:
|
|
19
|
+
*
|
|
20
|
+
* 1. Hashes the request's image bytes (model-family-scoped).
|
|
21
|
+
* 2. Checks the arbiter's vision-embedding cache.
|
|
22
|
+
* 3. On miss: calls `backend.describe(request)`, lets the backend
|
|
23
|
+
* run its own projector + decoder. Backends that cannot expose projected
|
|
24
|
+
* tokens return decoder text only, so the cache stays empty for this hash.
|
|
25
|
+
* The decoder text is what the caller wanted anyway.
|
|
26
|
+
* 4. On hit: calls `backend.describe(request, { projectedTokens })`.
|
|
27
|
+
* Backends that support pre-projected token reuse skip the
|
|
28
|
+
* projector entirely. Backends that don't ignore the hint; the
|
|
29
|
+
* result is still correct but the projector cost is paid again.
|
|
30
|
+
*/
|
|
31
|
+
export { type AospLlamaMtmdBinding, type AospMtmdHandle, type LoadAospVisionBackendOptions, loadAospVisionBackend, } from "./aosp-unavailable";
|
|
32
|
+
export { type CapacitorLlamaMtmdBinding, type CapacitorLlamaMtmdHandle, type CapacitorLlamaVisionBackendOptions, loadCapacitorLlamaVisionBackend, VisionBackendUnavailableError, type VisionManagerLike, } from "./capacitor-llama";
|
|
33
|
+
export { classifyLocalVisionError, type LocalImageDescriptionHandler, type LocalVisionOutcome, type VisionCloudFallbackOptions, type VisionFallbackReason, type WrappedImageDescriptionHandler, wrapImageDescriptionHandlerWithCloudFallback, } from "./cloud-fallback";
|
|
34
|
+
export { hashImageBytes, hashRawPixels, hashVisionInput, resolveImageBytes, } from "./hash";
|
|
35
|
+
export { createLlamaServerVisionBackend, type LlamaServerVisionBackendOptions, } from "./llama-server";
|
|
36
|
+
export type { VisionDescribeBackend, VisionDescribeBackendLoader, VisionDescribeBackendOptions, VisionDescribeLoadArgs, VisionDescribeRequest, VisionDescribeResult, VisionImageChannelOrder, VisionImageInput, } from "./types";
|
|
37
|
+
export { type VisionVastFallbackOptions, wrapImageDescriptionHandlerWithVastFallback, } from "./vast-fallback";
|
|
38
|
+
import type { CapabilityRegistration } from "../memory-arbiter";
|
|
39
|
+
import type { VisionDescribeBackend, VisionDescribeBackendLoader, VisionDescribeRequest, VisionDescribeResult } from "./types";
|
|
40
|
+
/**
|
|
41
|
+
* Minimal arbiter shape we need from the cache. Lets tests inject a
|
|
42
|
+
* fake cache without pulling in the whole MemoryArbiter.
|
|
43
|
+
*/
|
|
44
|
+
export interface VisionEmbeddingCacheLike {
|
|
45
|
+
getCachedVisionEmbedding(hash: string): {
|
|
46
|
+
tokens: Float32Array;
|
|
47
|
+
tokenCount: number;
|
|
48
|
+
hiddenSize: number;
|
|
49
|
+
live?: boolean;
|
|
50
|
+
} | null;
|
|
51
|
+
setCachedVisionEmbedding(hash: string, entry: {
|
|
52
|
+
tokens: Float32Array;
|
|
53
|
+
tokenCount: number;
|
|
54
|
+
hiddenSize: number;
|
|
55
|
+
}, ttlMs?: number): void;
|
|
56
|
+
}
|
|
57
|
+
export interface CreateVisionCapabilityRegistrationOptions {
|
|
58
|
+
/**
|
|
59
|
+
* The arbiter (or any object with the cache passthroughs). When
|
|
60
|
+
* provided the wrapper performs hash → cache lookup before calling
|
|
61
|
+
* the backend's `describe`.
|
|
62
|
+
*/
|
|
63
|
+
arbiterCache?: VisionEmbeddingCacheLike;
|
|
64
|
+
loader: VisionDescribeBackendLoader;
|
|
65
|
+
/** Default model family for the cache key. Defaults to `qwen3-vl`. */
|
|
66
|
+
modelFamily?: string;
|
|
67
|
+
estimatedMb?: number;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Build a `CapabilityRegistration` ready to feed to
|
|
71
|
+
* `arbiter.registerCapability()`. The wrapper plumbs the cache hint
|
|
72
|
+
* into the backend's describe call so backends that support
|
|
73
|
+
* pre-projected tokens skip the projector.
|
|
74
|
+
*/
|
|
75
|
+
export declare function createVisionCapabilityRegistration(opts: CreateVisionCapabilityRegistrationOptions): CapabilityRegistration<VisionDescribeBackend, VisionDescribeRequest, VisionDescribeResult>;
|
|
76
|
+
import type { IAgentRuntime, ImageDescriptionParams, ImageDescriptionResult } from "@elizaos/core";
|
|
77
|
+
import { type LocalImageDescriptionHandler, type VisionCloudFallbackOptions } from "./cloud-fallback";
|
|
78
|
+
import { type VisionVastFallbackOptions } from "./vast-fallback";
|
|
79
|
+
/**
|
|
80
|
+
* Compose the full local → cloud → vast IMAGE_DESCRIPTION chain and
|
|
81
|
+
* terminate it as a runtime-shaped `ImageDescriptionHandler`. When all
|
|
82
|
+
* three paths return `{ kind: "fallback" }`, the terminator throws the
|
|
83
|
+
* underlying cause (or a structured upstream-fail message) so the runtime
|
|
84
|
+
* surfaces the failure cleanly rather than serving a sentinel result.
|
|
85
|
+
*
|
|
86
|
+
* This is the single entry point `ensure-local-inference-handler.ts`
|
|
87
|
+
* uses at the IMAGE_DESCRIPTION model registration site. Tests
|
|
88
|
+
* exercise the composition via the individual `wrap*` helpers; this
|
|
89
|
+
* function is the production wiring.
|
|
90
|
+
*/
|
|
91
|
+
export declare function withVisionFallbackChain(local: LocalImageDescriptionHandler, options?: {
|
|
92
|
+
cloud?: VisionCloudFallbackOptions;
|
|
93
|
+
vast?: VisionVastFallbackOptions;
|
|
94
|
+
}): (runtime: IAgentRuntime, params: ImageDescriptionParams | string) => Promise<ImageDescriptionResult>;
|
|
95
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAEH,OAAO,EACN,KAAK,oBAAoB,EACzB,KAAK,cAAc,EACnB,KAAK,4BAA4B,EACjC,qBAAqB,GACrB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EACN,KAAK,yBAAyB,EAC9B,KAAK,wBAAwB,EAC7B,KAAK,kCAAkC,EACvC,+BAA+B,EAC/B,6BAA6B,EAC7B,KAAK,iBAAiB,GACtB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACN,wBAAwB,EACxB,KAAK,4BAA4B,EACjC,KAAK,kBAAkB,EACvB,KAAK,0BAA0B,EAC/B,KAAK,oBAAoB,EACzB,KAAK,8BAA8B,EACnC,4CAA4C,GAC5C,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACN,cAAc,EACd,aAAa,EACb,eAAe,EACf,iBAAiB,GACjB,MAAM,QAAQ,CAAC;AAChB,OAAO,EACN,8BAA8B,EAC9B,KAAK,+BAA+B,GACpC,MAAM,gBAAgB,CAAC;AACxB,YAAY,EACX,qBAAqB,EACrB,2BAA2B,EAC3B,4BAA4B,EAC5B,sBAAsB,EACtB,qBAAqB,EACrB,oBAAoB,EACpB,uBAAuB,EACvB,gBAAgB,GAChB,MAAM,SAAS,CAAC;AACjB,OAAO,EACN,KAAK,yBAAyB,EAC9B,2CAA2C,GAC3C,MAAM,iBAAiB,CAAC;AAEzB,OAAO,KAAK,EAEX,sBAAsB,EACtB,MAAM,mBAAmB,CAAC;AAE3B,OAAO,KAAK,EACX,qBAAqB,EACrB,2BAA2B,EAC3B,qBAAqB,EACrB,oBAAoB,EACpB,MAAM,SAAS,CAAC;AAEjB;;;GAGG;AACH,MAAM,WAAW,wBAAwB;IACxC,wBAAwB,CAAC,IAAI,EAAE,MAAM,GAAG;QACvC,MAAM,EAAE,YAAY,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,UAAU,EAAE,MAAM,CAAC;QACnB,IAAI,CAAC,EAAE,OAAO,CAAC;KACf,GAAG,IAAI,CAAC;IACT,wBAAwB,CACvB,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE;QACN,MAAM,EAAE,YAAY,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,UAAU,EAAE,MAAM,CAAC;KACnB,EACD,KAAK,CAAC,EAAE,MAAM,GACZ,IAAI,CAAC;CACR;AAED,MAAM,WAAW,yCAAyC;IACzD;;;;OAIG;IACH,YAAY,CAAC,EAAE,wBAAwB,CAAC;IACxC,MAAM,EAAE,2BAA2B,CAAC;IACpC,sEAAsE;IACtE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;;;GAKG;AACH,wBAAgB,kCAAkC,CACjD,IAAI,EAAE,yCAAyC,GAC7C,sBAAsB,CACxB,qBAAqB,EACrB,qBAAqB,EACrB,oBAAoB,CACpB,CAkDA;AAED,OAAO,KAAK,EACX,aAAa,EACb,sBAAsB,EACtB,sBAAsB,EACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EACN,KAAK,4BAA4B,EACjC,KAAK,0BAA0B,EAE/B,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACN,KAAK,yBAAyB,EAE9B,MAAM,iBAAiB,CAAC;AAEzB;;;;;;;;;;;GAWG;AACH,wBAAgB,uBAAuB,CACtC,KAAK,EAAE,4BAA4B,EACnC,OAAO,GAAE;IACR,KAAK,CAAC,EAAE,0BAA0B,CAAC;IACnC,IAAI,CAAC,EAAE,yBAAyB,CAAC;CAC5B,GACJ,CACF,OAAO,EAAE,aAAa,EACtB,MAAM,EAAE,sBAAsB,GAAG,MAAM,KACnC,OAAO,CAAC,sBAAsB,CAAC,CAwBnC"}
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vision-describe capability (WS2) — public entry point.
|
|
3
|
+
*
|
|
4
|
+
* This module is what plugin-vision (WS4), the IMAGE_DESCRIPTION
|
|
5
|
+
* handler in `provider.ts`, and computer-use (WS9) import to register
|
|
6
|
+
* vision capability with the WS1 MemoryArbiter.
|
|
7
|
+
*
|
|
8
|
+
* Wiring:
|
|
9
|
+
*
|
|
10
|
+
* const arbiter = service.getMemoryArbiter();
|
|
11
|
+
* const registration = createVisionCapabilityRegistration({
|
|
12
|
+
* loader: createDefaultVisionLoader({ ... }),
|
|
13
|
+
* arbiterCache: arbiter,
|
|
14
|
+
* });
|
|
15
|
+
* arbiter.registerCapability(registration);
|
|
16
|
+
*
|
|
17
|
+
* `createVisionCapabilityRegistration` wraps the underlying backend so
|
|
18
|
+
* the arbiter's `run(request)` path:
|
|
19
|
+
*
|
|
20
|
+
* 1. Hashes the request's image bytes (model-family-scoped).
|
|
21
|
+
* 2. Checks the arbiter's vision-embedding cache.
|
|
22
|
+
* 3. On miss: calls `backend.describe(request)`, lets the backend
|
|
23
|
+
* run its own projector + decoder. Backends that cannot expose projected
|
|
24
|
+
* tokens return decoder text only, so the cache stays empty for this hash.
|
|
25
|
+
* The decoder text is what the caller wanted anyway.
|
|
26
|
+
* 4. On hit: calls `backend.describe(request, { projectedTokens })`.
|
|
27
|
+
* Backends that support pre-projected token reuse skip the
|
|
28
|
+
* projector entirely. Backends that don't ignore the hint; the
|
|
29
|
+
* result is still correct but the projector cost is paid again.
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
export {
|
|
33
|
+
type AospLlamaMtmdBinding,
|
|
34
|
+
type AospMtmdHandle,
|
|
35
|
+
type LoadAospVisionBackendOptions,
|
|
36
|
+
loadAospVisionBackend,
|
|
37
|
+
} from "./aosp-unavailable";
|
|
38
|
+
export {
|
|
39
|
+
type CapacitorLlamaMtmdBinding,
|
|
40
|
+
type CapacitorLlamaMtmdHandle,
|
|
41
|
+
type CapacitorLlamaVisionBackendOptions,
|
|
42
|
+
loadCapacitorLlamaVisionBackend,
|
|
43
|
+
VisionBackendUnavailableError,
|
|
44
|
+
type VisionManagerLike,
|
|
45
|
+
} from "./capacitor-llama";
|
|
46
|
+
export {
|
|
47
|
+
classifyLocalVisionError,
|
|
48
|
+
type LocalImageDescriptionHandler,
|
|
49
|
+
type LocalVisionOutcome,
|
|
50
|
+
type VisionCloudFallbackOptions,
|
|
51
|
+
type VisionFallbackReason,
|
|
52
|
+
type WrappedImageDescriptionHandler,
|
|
53
|
+
wrapImageDescriptionHandlerWithCloudFallback,
|
|
54
|
+
} from "./cloud-fallback";
|
|
55
|
+
export {
|
|
56
|
+
hashImageBytes,
|
|
57
|
+
hashRawPixels,
|
|
58
|
+
hashVisionInput,
|
|
59
|
+
resolveImageBytes,
|
|
60
|
+
} from "./hash";
|
|
61
|
+
export {
|
|
62
|
+
createLlamaServerVisionBackend,
|
|
63
|
+
type LlamaServerVisionBackendOptions,
|
|
64
|
+
} from "./llama-server";
|
|
65
|
+
export type {
|
|
66
|
+
VisionDescribeBackend,
|
|
67
|
+
VisionDescribeBackendLoader,
|
|
68
|
+
VisionDescribeBackendOptions,
|
|
69
|
+
VisionDescribeLoadArgs,
|
|
70
|
+
VisionDescribeRequest,
|
|
71
|
+
VisionDescribeResult,
|
|
72
|
+
VisionImageChannelOrder,
|
|
73
|
+
VisionImageInput,
|
|
74
|
+
} from "./types";
|
|
75
|
+
export {
|
|
76
|
+
type VisionVastFallbackOptions,
|
|
77
|
+
wrapImageDescriptionHandlerWithVastFallback,
|
|
78
|
+
} from "./vast-fallback";
|
|
79
|
+
|
|
80
|
+
import type {
|
|
81
|
+
ArbiterCapability,
|
|
82
|
+
CapabilityRegistration,
|
|
83
|
+
} from "../memory-arbiter";
|
|
84
|
+
import { hashVisionInput } from "./hash";
|
|
85
|
+
import type {
|
|
86
|
+
VisionDescribeBackend,
|
|
87
|
+
VisionDescribeBackendLoader,
|
|
88
|
+
VisionDescribeRequest,
|
|
89
|
+
VisionDescribeResult,
|
|
90
|
+
} from "./types";
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Minimal arbiter shape we need from the cache. Lets tests inject a
|
|
94
|
+
* fake cache without pulling in the whole MemoryArbiter.
|
|
95
|
+
*/
|
|
96
|
+
export interface VisionEmbeddingCacheLike {
|
|
97
|
+
getCachedVisionEmbedding(hash: string): {
|
|
98
|
+
tokens: Float32Array;
|
|
99
|
+
tokenCount: number;
|
|
100
|
+
hiddenSize: number;
|
|
101
|
+
live?: boolean;
|
|
102
|
+
} | null;
|
|
103
|
+
setCachedVisionEmbedding(
|
|
104
|
+
hash: string,
|
|
105
|
+
entry: {
|
|
106
|
+
tokens: Float32Array;
|
|
107
|
+
tokenCount: number;
|
|
108
|
+
hiddenSize: number;
|
|
109
|
+
},
|
|
110
|
+
ttlMs?: number,
|
|
111
|
+
): void;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export interface CreateVisionCapabilityRegistrationOptions {
|
|
115
|
+
/**
|
|
116
|
+
* The arbiter (or any object with the cache passthroughs). When
|
|
117
|
+
* provided the wrapper performs hash → cache lookup before calling
|
|
118
|
+
* the backend's `describe`.
|
|
119
|
+
*/
|
|
120
|
+
arbiterCache?: VisionEmbeddingCacheLike;
|
|
121
|
+
loader: VisionDescribeBackendLoader;
|
|
122
|
+
/** Default model family for the cache key. Defaults to `qwen3-vl`. */
|
|
123
|
+
modelFamily?: string;
|
|
124
|
+
estimatedMb?: number;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Build a `CapabilityRegistration` ready to feed to
|
|
129
|
+
* `arbiter.registerCapability()`. The wrapper plumbs the cache hint
|
|
130
|
+
* into the backend's describe call so backends that support
|
|
131
|
+
* pre-projected tokens skip the projector.
|
|
132
|
+
*/
|
|
133
|
+
export function createVisionCapabilityRegistration(
|
|
134
|
+
opts: CreateVisionCapabilityRegistrationOptions,
|
|
135
|
+
): CapabilityRegistration<
|
|
136
|
+
VisionDescribeBackend,
|
|
137
|
+
VisionDescribeRequest,
|
|
138
|
+
VisionDescribeResult
|
|
139
|
+
> {
|
|
140
|
+
const capability: ArbiterCapability = "vision-describe";
|
|
141
|
+
const family = opts.modelFamily ?? "qwen3-vl";
|
|
142
|
+
const cache = opts.arbiterCache;
|
|
143
|
+
const loader = opts.loader;
|
|
144
|
+
return {
|
|
145
|
+
capability,
|
|
146
|
+
residentRole: "vision",
|
|
147
|
+
estimatedMb: opts.estimatedMb ?? 600,
|
|
148
|
+
async load(modelKey) {
|
|
149
|
+
return await loader(modelKey);
|
|
150
|
+
},
|
|
151
|
+
async unload(backend) {
|
|
152
|
+
await backend.dispose();
|
|
153
|
+
},
|
|
154
|
+
async run(backend, request) {
|
|
155
|
+
const effectiveFamily = request.modelFamily ?? family;
|
|
156
|
+
const cached = (() => {
|
|
157
|
+
if (!cache) return null;
|
|
158
|
+
if (request.image.kind === "url") {
|
|
159
|
+
// URL inputs can't be hashed without first fetching; skip
|
|
160
|
+
// the cache lookup rather than paying the fetch cost twice.
|
|
161
|
+
return null;
|
|
162
|
+
}
|
|
163
|
+
try {
|
|
164
|
+
const hash = hashVisionInput(request.image, effectiveFamily);
|
|
165
|
+
const hit = cache.getCachedVisionEmbedding(hash);
|
|
166
|
+
if (hit && hit.live !== false) return { hash, hit };
|
|
167
|
+
} catch {
|
|
168
|
+
// Hashing failed (malformed data URL etc.); proceed without
|
|
169
|
+
// cache rather than failing the request.
|
|
170
|
+
}
|
|
171
|
+
return null;
|
|
172
|
+
})();
|
|
173
|
+
const projected = cached?.hit
|
|
174
|
+
? {
|
|
175
|
+
tokens: cached.hit.tokens,
|
|
176
|
+
tokenCount: cached.hit.tokenCount,
|
|
177
|
+
hiddenSize: cached.hit.hiddenSize,
|
|
178
|
+
}
|
|
179
|
+
: undefined;
|
|
180
|
+
const result = await backend.describe(request, {
|
|
181
|
+
projectedTokens: projected,
|
|
182
|
+
});
|
|
183
|
+
return {
|
|
184
|
+
...result,
|
|
185
|
+
cacheHit: Boolean(projected),
|
|
186
|
+
};
|
|
187
|
+
},
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
import type {
|
|
192
|
+
IAgentRuntime,
|
|
193
|
+
ImageDescriptionParams,
|
|
194
|
+
ImageDescriptionResult,
|
|
195
|
+
} from "@elizaos/core";
|
|
196
|
+
import {
|
|
197
|
+
type LocalImageDescriptionHandler,
|
|
198
|
+
type VisionCloudFallbackOptions,
|
|
199
|
+
wrapImageDescriptionHandlerWithCloudFallback,
|
|
200
|
+
} from "./cloud-fallback";
|
|
201
|
+
import {
|
|
202
|
+
type VisionVastFallbackOptions,
|
|
203
|
+
wrapImageDescriptionHandlerWithVastFallback,
|
|
204
|
+
} from "./vast-fallback";
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Compose the full local → cloud → vast IMAGE_DESCRIPTION chain and
|
|
208
|
+
* terminate it as a runtime-shaped `ImageDescriptionHandler`. When all
|
|
209
|
+
* three paths return `{ kind: "fallback" }`, the terminator throws the
|
|
210
|
+
* underlying cause (or a structured upstream-fail message) so the runtime
|
|
211
|
+
* surfaces the failure cleanly rather than serving a sentinel result.
|
|
212
|
+
*
|
|
213
|
+
* This is the single entry point `ensure-local-inference-handler.ts`
|
|
214
|
+
* uses at the IMAGE_DESCRIPTION model registration site. Tests
|
|
215
|
+
* exercise the composition via the individual `wrap*` helpers; this
|
|
216
|
+
* function is the production wiring.
|
|
217
|
+
*/
|
|
218
|
+
export function withVisionFallbackChain(
|
|
219
|
+
local: LocalImageDescriptionHandler,
|
|
220
|
+
options: {
|
|
221
|
+
cloud?: VisionCloudFallbackOptions;
|
|
222
|
+
vast?: VisionVastFallbackOptions;
|
|
223
|
+
} = {},
|
|
224
|
+
): (
|
|
225
|
+
runtime: IAgentRuntime,
|
|
226
|
+
params: ImageDescriptionParams | string,
|
|
227
|
+
) => Promise<ImageDescriptionResult> {
|
|
228
|
+
const wrapped = wrapImageDescriptionHandlerWithVastFallback(
|
|
229
|
+
wrapImageDescriptionHandlerWithCloudFallback(local, options.cloud),
|
|
230
|
+
options.vast,
|
|
231
|
+
);
|
|
232
|
+
return async (_runtime, params) => {
|
|
233
|
+
const outcome = await wrapped(params);
|
|
234
|
+
if (
|
|
235
|
+
outcome &&
|
|
236
|
+
typeof outcome === "object" &&
|
|
237
|
+
"kind" in outcome &&
|
|
238
|
+
outcome.kind === "fallback"
|
|
239
|
+
) {
|
|
240
|
+
const causeMsg = outcome.cause?.message ?? outcome.reason;
|
|
241
|
+
const err = new Error(
|
|
242
|
+
`[VisionFallback] all IMAGE_DESCRIPTION providers exhausted (reason=${outcome.reason}): ${causeMsg}`,
|
|
243
|
+
);
|
|
244
|
+
if (outcome.cause) {
|
|
245
|
+
(err as Error & { cause?: unknown }).cause = outcome.cause;
|
|
246
|
+
}
|
|
247
|
+
throw err;
|
|
248
|
+
}
|
|
249
|
+
return outcome as ImageDescriptionResult;
|
|
250
|
+
};
|
|
251
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* llama-server vision-describe backend (WS2).
|
|
3
|
+
*
|
|
4
|
+
* Wraps the out-of-process llama-server's `/completion` endpoint with
|
|
5
|
+
* the `image_data` array (base64-encoded payloads) and shapes the
|
|
6
|
+
* response to the WS2 `VisionDescribeBackend` contract.
|
|
7
|
+
*
|
|
8
|
+
* llama-server image-data API recap (verified against llama.cpp commit
|
|
9
|
+
* b8198+, May 2026):
|
|
10
|
+
*
|
|
11
|
+
* POST /completion
|
|
12
|
+
* { "prompt": "<...>USER: [img-12] What's in this image?\nASSISTANT:",
|
|
13
|
+
* "image_data": [
|
|
14
|
+
* { "data": "<base64 png/jpeg>", "id": 12 }
|
|
15
|
+
* ],
|
|
16
|
+
* "n_predict": 256,
|
|
17
|
+
* "temperature": 0.2,
|
|
18
|
+
* "stream": false }
|
|
19
|
+
*
|
|
20
|
+
* Response:
|
|
21
|
+
* { "content": "A photo of a cat.", "stop": true,
|
|
22
|
+
* "timings": { "prompt_ms": 180.4, "predicted_ms": 423.1 } }
|
|
23
|
+
*
|
|
24
|
+
* Server-side mmproj is loaded via the `--mmproj <path>` flag on
|
|
25
|
+
* llama-server startup. The FFI runtime wrapper passes this flag
|
|
26
|
+
* already for tiers with vision enabled; this backend assumes the
|
|
27
|
+
* server has been started with the right mmproj for the active model.
|
|
28
|
+
*
|
|
29
|
+
* Backend responsibility:
|
|
30
|
+
* - Encode the image as base64 (when not already).
|
|
31
|
+
* - Build the prompt with the `[img-N]` placeholder convention.
|
|
32
|
+
* - POST to `/completion`, parse the text + timings.
|
|
33
|
+
* - Honour AbortSignal by passing it through to the fetch call.
|
|
34
|
+
*
|
|
35
|
+
* Backend explicitly does NOT:
|
|
36
|
+
* - Start / stop the server. That's the FFI runtime wrapper's job.
|
|
37
|
+
* - Resolve the mmproj path — the server already has it. The arbiter's
|
|
38
|
+
* `--mmproj` was set when the text model loaded.
|
|
39
|
+
* - Implement projector-token reuse. llama-server has no API to
|
|
40
|
+
* accept pre-projected tokens; if the WS1 cache hit happens, this
|
|
41
|
+
* backend ignores the hint and re-runs the projector. The cache
|
|
42
|
+
* is more useful with the in-process node-llama-cpp backend.
|
|
43
|
+
*
|
|
44
|
+
* Metal / CUDA validation:
|
|
45
|
+
* The llama-server build embeds the same mtmd_encode path the
|
|
46
|
+
* in-process binding will eventually expose. On a Metal build the
|
|
47
|
+
* image encode dispatches through the Metal compute encoder; on a
|
|
48
|
+
* CUDA build through cuBLAS. We have no GPU on this host — see the
|
|
49
|
+
* `__tests__/vision-describe.test.ts` notes for the GPU smoke check.
|
|
50
|
+
*/
|
|
51
|
+
import type { VisionDescribeBackend } from "./types";
|
|
52
|
+
export interface LlamaServerVisionBackendOptions {
|
|
53
|
+
/**
|
|
54
|
+
* Base URL of the llama-server. The FFI runtime wrapper exposes
|
|
55
|
+
* this via `currentBaseUrl()`; pass the resolved URL here at load
|
|
56
|
+
* time. The backend keeps it as-is across calls.
|
|
57
|
+
*/
|
|
58
|
+
baseUrl: string;
|
|
59
|
+
/**
|
|
60
|
+
* Optional fetch override. Tests inject a fake fetch; production
|
|
61
|
+
* uses global fetch. The signature mirrors `fetch` so the test
|
|
62
|
+
* surface is the same as the real one.
|
|
63
|
+
*/
|
|
64
|
+
fetch?: typeof fetch;
|
|
65
|
+
/**
|
|
66
|
+
* Default `n_predict` budget when the caller doesn't specify
|
|
67
|
+
* `maxTokens`. 256 matches the description-length budget the
|
|
68
|
+
* Florence-2 / VisionManager path uses today.
|
|
69
|
+
*/
|
|
70
|
+
defaultMaxTokens?: number;
|
|
71
|
+
}
|
|
72
|
+
export declare function createLlamaServerVisionBackend(opts: LlamaServerVisionBackendOptions): VisionDescribeBackend;
|
|
73
|
+
//# sourceMappingURL=llama-server.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llama-server.d.ts","sourceRoot":"","sources":["llama-server.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiDG;AAGH,OAAO,KAAK,EACX,qBAAqB,EAGrB,MAAM,SAAS,CAAC;AAEjB,MAAM,WAAW,+BAA+B;IAC/C;;;;OAIG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,KAAK,CAAC,EAAE,OAAO,KAAK,CAAC;IACrB;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,wBAAgB,8BAA8B,CAC7C,IAAI,EAAE,+BAA+B,GACnC,qBAAqB,CAsEvB"}
|