@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/dist/actions/generate-media.d.ts +59 -0
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts +23 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts +29 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts +8 -37
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +38979 -430
- package/dist/index.js.map +217 -0
- package/dist/local-inference-routes.d.ts +47 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts +21 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/routes/compat-helpers.d.ts +18 -0
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts +62 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/dist/routes/index.d.ts +20 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/dist/routes/live-diarization-route.d.ts +33 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts +4 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts +16 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts +7 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts +15 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/dist/routes/transcripts-routes.d.ts +44 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts +62 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts +62 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts +77 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts +16 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/dist/runtime/index.d.ts +15 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/dist/runtime/voice-entity-binding.d.ts +113 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/dist/services/active-model.d.ts +310 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/dist/services/assignments.d.ts +84 -0
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/dist/services/backend.d.ts +440 -0
- package/dist/services/backend.d.ts.map +1 -0
- package/dist/services/bionic-host-loader.d.ts +67 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts +34 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts +206 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts +109 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts +102 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts +142 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts +188 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts +149 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/dist/services/device-tier.d.ts +133 -0
- package/dist/services/device-tier.d.ts.map +1 -0
- package/dist/services/downloader.d.ts +94 -0
- package/dist/services/downloader.d.ts.map +1 -0
- package/dist/services/engine.d.ts +579 -0
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts +17 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/dist/services/ffi-streaming-backend.d.ts +201 -0
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/dist/services/ffi-streaming-runner.d.ts +146 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts +56 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts +72 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts +63 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts +14 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts +118 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts +16 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts +58 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts +74 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts +181 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts +181 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/dist/services/index.d.ts +31 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts +132 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts +59 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts +189 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts +346 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts +96 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts +82 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/dist/services/manifest/schema.d.ts +903 -0
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/dist/services/manifest/types.d.ts +32 -0
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts +66 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/dist/services/memory-arbiter.d.ts +348 -0
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/dist/services/memory-monitor.d.ts +128 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts +130 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts +13 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts +127 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts +6 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts +124 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts +38 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts +110 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts +9 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts +111 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/dist/services/registry.d.ts +33 -0
- package/dist/services/registry.d.ts.map +1 -0
- package/dist/services/router-handler.d.ts +92 -0
- package/dist/services/router-handler.d.ts.map +1 -0
- package/dist/services/routing-policy.d.ts +92 -0
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts +8 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts +98 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/dist/services/service.d.ts +128 -0
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts +72 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts +311 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts +33 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/dist/services/types.d.ts +19 -0
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts +34 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts +8 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts +115 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts +99 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts +47 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts +71 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/dist/services/vision/index.d.ts +95 -0
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts +73 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/dist/services/vision/types.d.ts +162 -0
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts +18 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/dist/services/vision-embedding-cache.d.ts +98 -0
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts +112 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts +199 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts +170 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/dist/services/voice/embedding.d.ts +132 -0
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts +68 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/dist/services/voice/engine-bridge.d.ts +762 -0
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier.d.ts +211 -0
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/dist/services/voice/errors.d.ts +20 -0
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/dist/services/voice/expressive-tags.d.ts +158 -0
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/dist/services/voice/ffi-bindings.d.ts +696 -0
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts +181 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/index.d.ts +96 -0
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts +82 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts +30 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts +135 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/dist/services/voice/mic-source.d.ts +136 -0
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts +109 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/dist/services/voice/partial-stabilizer.d.ts +73 -0
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts +76 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts +62 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts +151 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts +216 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts +123 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts +248 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts +40 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts +24 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts +146 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/dist/services/voice/shared-resources.d.ts +204 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts +75 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts +37 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts +83 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts +73 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/dist/services/voice/transcriber.d.ts +244 -0
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts +37 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/dist/services/voice/transcript-service.d.ts +60 -0
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/dist/services/voice/transcript-store.d.ts +64 -0
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts +183 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/dist/services/voice/types.d.ts +643 -0
- package/dist/services/voice/types.d.ts.map +1 -0
- package/dist/services/voice/vad.d.ts +283 -0
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts +241 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/dist/services/voice/voice-preset-format.d.ts +158 -0
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts +83 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts +364 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/dist/services/voice/wake-word-ggml.d.ts +100 -0
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts +255 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts +240 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts +3 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +101 -15
- package/registry-entry.json +137 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +831 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.test.ts +390 -0
- package/src/local-inference-routes.ts +1625 -0
- package/src/provider.ts +1111 -0
- package/src/routes/compat-helpers.ts +275 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.ts +61 -0
- package/src/routes/live-diarization-route.test.ts +347 -0
- package/src/routes/live-diarization-route.ts +198 -0
- package/src/routes/local-inference-asr-route.test.ts +246 -0
- package/src/routes/local-inference-asr-route.ts +166 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +775 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.test.ts +195 -0
- package/src/routes/transcripts-routes.ts +191 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
- package/src/runtime/ensure-local-inference-handler.ts +1640 -0
- package/src/runtime/index.ts +36 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
- package/src/runtime/mobile-local-inference-gate.ts +99 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
- package/src/runtime/voice-entity-binding.ts +368 -0
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.ts +1416 -0
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +106 -0
- package/src/services/assignments.ts +278 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +791 -0
- package/src/services/bionic-host-loader.test.ts +226 -0
- package/src/services/bionic-host-loader.ts +252 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.test.ts +259 -0
- package/src/services/catalog.ts +33 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.test.ts +458 -0
- package/src/services/device-tier.ts +502 -0
- package/src/services/downloader.test.ts +888 -0
- package/src/services/downloader.ts +1039 -0
- package/src/services/engine-direct-bundle.test.ts +90 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.ts +2096 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +445 -0
- package/src/services/ffi-streaming-backend.ts +418 -0
- package/src/services/ffi-streaming-runner.test.ts +220 -0
- package/src/services/ffi-streaming-runner.ts +407 -0
- package/src/services/ffi-unload-ordering.test.ts +166 -0
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.test.ts +236 -0
- package/src/services/hardware.ts +438 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.ts +715 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.ts +229 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +357 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
- package/src/services/manifest/index.ts +72 -0
- package/src/services/manifest/manifest.test.ts +791 -0
- package/src/services/manifest/schema.ts +761 -0
- package/src/services/manifest/types.ts +61 -0
- package/src/services/manifest/validator.ts +633 -0
- package/src/services/memory-arbiter.test.ts +558 -0
- package/src/services/memory-arbiter.ts +991 -0
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +232 -0
- package/src/services/memory-monitor.ts +309 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.ts +86 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +164 -0
- package/src/services/ram-budget.ts +309 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.ts +157 -0
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +426 -0
- package/src/services/routing-policy.test.ts +352 -0
- package/src/services/routing-policy.ts +367 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +750 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.ts +59 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.ts +163 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +133 -0
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +139 -0
- package/src/services/voice/audio-frame-consumer.test.ts +669 -0
- package/src/services/voice/audio-frame-consumer.ts +651 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +335 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +902 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +242 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2343 -0
- package/src/services/voice/eot-classifier-ggml.ts +569 -0
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +422 -0
- package/src/services/voice/errors.ts +34 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.test.ts +735 -0
- package/src/services/voice/ffi-bindings.ts +3387 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.ts +139 -0
- package/src/services/voice/index.ts +502 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.ts +64 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +622 -0
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.ts +504 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +343 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.test.ts +195 -0
- package/src/services/voice/transcript-service.ts +205 -0
- package/src/services/voice/transcript-store.test.ts +189 -0
- package/src/services/voice/transcript-store.ts +164 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.test.ts +498 -0
- package/src/services/voice/vad.ts +832 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.test.ts +415 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +713 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +280 -0
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +367 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.ts +319 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/src/voice-workbench.ts +71 -0
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vision-describe capability (WS2) — public entry point.
|
|
3
|
+
*
|
|
4
|
+
* This module is what plugin-vision (WS4), the IMAGE_DESCRIPTION
|
|
5
|
+
* handler in `provider.ts`, and computer-use (WS9) import to register
|
|
6
|
+
* vision capability with the WS1 MemoryArbiter.
|
|
7
|
+
*
|
|
8
|
+
* Wiring:
|
|
9
|
+
*
|
|
10
|
+
* const arbiter = service.getMemoryArbiter();
|
|
11
|
+
* const registration = createVisionCapabilityRegistration({
|
|
12
|
+
* loader: createDefaultVisionLoader({ ... }),
|
|
13
|
+
* arbiterCache: arbiter,
|
|
14
|
+
* });
|
|
15
|
+
* arbiter.registerCapability(registration);
|
|
16
|
+
*
|
|
17
|
+
* `createVisionCapabilityRegistration` wraps the underlying backend so
|
|
18
|
+
* the arbiter's `run(request)` path:
|
|
19
|
+
*
|
|
20
|
+
* 1. Hashes the request's image bytes (model-family-scoped).
|
|
21
|
+
* 2. Checks the arbiter's vision-embedding cache.
|
|
22
|
+
* 3. On miss: calls `backend.describe(request)`, lets the backend
|
|
23
|
+
* run its own projector + decoder. Backends that cannot expose projected
|
|
24
|
+
* tokens return decoder text only, so the cache stays empty for this hash.
|
|
25
|
+
* The decoder text is what the caller wanted anyway.
|
|
26
|
+
* 4. On hit: calls `backend.describe(request, { projectedTokens })`.
|
|
27
|
+
* Backends that support pre-projected token reuse skip the
|
|
28
|
+
* projector entirely. Backends that don't ignore the hint; the
|
|
29
|
+
* result is still correct but the projector cost is paid again.
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
export {
|
|
33
|
+
type AospLlamaMtmdBinding,
|
|
34
|
+
type AospMtmdHandle,
|
|
35
|
+
type LoadAospVisionBackendOptions,
|
|
36
|
+
loadAospVisionBackend,
|
|
37
|
+
} from "./aosp-unavailable";
|
|
38
|
+
export {
|
|
39
|
+
type CapacitorLlamaMtmdBinding,
|
|
40
|
+
type CapacitorLlamaMtmdHandle,
|
|
41
|
+
type CapacitorLlamaVisionBackendOptions,
|
|
42
|
+
loadCapacitorLlamaVisionBackend,
|
|
43
|
+
VisionBackendUnavailableError,
|
|
44
|
+
type VisionManagerLike,
|
|
45
|
+
} from "./capacitor-llama";
|
|
46
|
+
export {
|
|
47
|
+
classifyLocalVisionError,
|
|
48
|
+
type LocalImageDescriptionHandler,
|
|
49
|
+
type LocalVisionOutcome,
|
|
50
|
+
type VisionCloudFallbackOptions,
|
|
51
|
+
type VisionFallbackReason,
|
|
52
|
+
type WrappedImageDescriptionHandler,
|
|
53
|
+
wrapImageDescriptionHandlerWithCloudFallback,
|
|
54
|
+
} from "./cloud-fallback";
|
|
55
|
+
export {
|
|
56
|
+
hashImageBytes,
|
|
57
|
+
hashRawPixels,
|
|
58
|
+
hashVisionInput,
|
|
59
|
+
resolveImageBytes,
|
|
60
|
+
} from "./hash";
|
|
61
|
+
export {
|
|
62
|
+
createLlamaServerVisionBackend,
|
|
63
|
+
type LlamaServerVisionBackendOptions,
|
|
64
|
+
} from "./llama-server";
|
|
65
|
+
export type {
|
|
66
|
+
VisionDescribeBackend,
|
|
67
|
+
VisionDescribeBackendLoader,
|
|
68
|
+
VisionDescribeBackendOptions,
|
|
69
|
+
VisionDescribeLoadArgs,
|
|
70
|
+
VisionDescribeRequest,
|
|
71
|
+
VisionDescribeResult,
|
|
72
|
+
VisionImageChannelOrder,
|
|
73
|
+
VisionImageInput,
|
|
74
|
+
} from "./types";
|
|
75
|
+
export {
|
|
76
|
+
type VisionVastFallbackOptions,
|
|
77
|
+
wrapImageDescriptionHandlerWithVastFallback,
|
|
78
|
+
} from "./vast-fallback";
|
|
79
|
+
|
|
80
|
+
import type {
|
|
81
|
+
ArbiterCapability,
|
|
82
|
+
CapabilityRegistration,
|
|
83
|
+
} from "../memory-arbiter";
|
|
84
|
+
import { hashVisionInput } from "./hash";
|
|
85
|
+
import type {
|
|
86
|
+
VisionDescribeBackend,
|
|
87
|
+
VisionDescribeBackendLoader,
|
|
88
|
+
VisionDescribeRequest,
|
|
89
|
+
VisionDescribeResult,
|
|
90
|
+
} from "./types";
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Minimal arbiter shape we need from the cache. Lets tests inject a
|
|
94
|
+
* fake cache without pulling in the whole MemoryArbiter.
|
|
95
|
+
*/
|
|
96
|
+
export interface VisionEmbeddingCacheLike {
|
|
97
|
+
getCachedVisionEmbedding(hash: string): {
|
|
98
|
+
tokens: Float32Array;
|
|
99
|
+
tokenCount: number;
|
|
100
|
+
hiddenSize: number;
|
|
101
|
+
live?: boolean;
|
|
102
|
+
} | null;
|
|
103
|
+
setCachedVisionEmbedding(
|
|
104
|
+
hash: string,
|
|
105
|
+
entry: {
|
|
106
|
+
tokens: Float32Array;
|
|
107
|
+
tokenCount: number;
|
|
108
|
+
hiddenSize: number;
|
|
109
|
+
},
|
|
110
|
+
ttlMs?: number,
|
|
111
|
+
): void;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export interface CreateVisionCapabilityRegistrationOptions {
|
|
115
|
+
/**
|
|
116
|
+
* The arbiter (or any object with the cache passthroughs). When
|
|
117
|
+
* provided the wrapper performs hash → cache lookup before calling
|
|
118
|
+
* the backend's `describe`.
|
|
119
|
+
*/
|
|
120
|
+
arbiterCache?: VisionEmbeddingCacheLike;
|
|
121
|
+
loader: VisionDescribeBackendLoader;
|
|
122
|
+
/** Default model family for the cache key. Defaults to `gemma-vl`. */
|
|
123
|
+
modelFamily?: string;
|
|
124
|
+
estimatedMb?: number;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Build a `CapabilityRegistration` ready to feed to
|
|
129
|
+
* `arbiter.registerCapability()`. The wrapper plumbs the cache hint
|
|
130
|
+
* into the backend's describe call so backends that support
|
|
131
|
+
* pre-projected tokens skip the projector.
|
|
132
|
+
*/
|
|
133
|
+
export function createVisionCapabilityRegistration(
|
|
134
|
+
opts: CreateVisionCapabilityRegistrationOptions,
|
|
135
|
+
): CapabilityRegistration<
|
|
136
|
+
VisionDescribeBackend,
|
|
137
|
+
VisionDescribeRequest,
|
|
138
|
+
VisionDescribeResult
|
|
139
|
+
> {
|
|
140
|
+
const capability: ArbiterCapability = "vision-describe";
|
|
141
|
+
const family = opts.modelFamily ?? "gemma-vl";
|
|
142
|
+
const cache = opts.arbiterCache;
|
|
143
|
+
const loader = opts.loader;
|
|
144
|
+
return {
|
|
145
|
+
capability,
|
|
146
|
+
residentRole: "vision",
|
|
147
|
+
estimatedMb: opts.estimatedMb ?? 600,
|
|
148
|
+
async load(modelKey) {
|
|
149
|
+
return await loader(modelKey);
|
|
150
|
+
},
|
|
151
|
+
async unload(backend) {
|
|
152
|
+
await backend.dispose();
|
|
153
|
+
},
|
|
154
|
+
async run(backend, request) {
|
|
155
|
+
const effectiveFamily = request.modelFamily ?? family;
|
|
156
|
+
const cached = (() => {
|
|
157
|
+
if (!cache) return null;
|
|
158
|
+
if (request.image.kind === "url") {
|
|
159
|
+
// URL inputs can't be hashed without first fetching; skip
|
|
160
|
+
// the cache lookup rather than paying the fetch cost twice.
|
|
161
|
+
return null;
|
|
162
|
+
}
|
|
163
|
+
try {
|
|
164
|
+
const hash = hashVisionInput(request.image, effectiveFamily);
|
|
165
|
+
const hit = cache.getCachedVisionEmbedding(hash);
|
|
166
|
+
if (hit && hit.live !== false) return { hash, hit };
|
|
167
|
+
} catch {
|
|
168
|
+
// Hashing failed (malformed data URL etc.); proceed without
|
|
169
|
+
// cache rather than failing the request.
|
|
170
|
+
}
|
|
171
|
+
return null;
|
|
172
|
+
})();
|
|
173
|
+
const projected = cached?.hit
|
|
174
|
+
? {
|
|
175
|
+
tokens: cached.hit.tokens,
|
|
176
|
+
tokenCount: cached.hit.tokenCount,
|
|
177
|
+
hiddenSize: cached.hit.hiddenSize,
|
|
178
|
+
}
|
|
179
|
+
: undefined;
|
|
180
|
+
const result = await backend.describe(request, {
|
|
181
|
+
projectedTokens: projected,
|
|
182
|
+
});
|
|
183
|
+
return {
|
|
184
|
+
...result,
|
|
185
|
+
cacheHit: Boolean(projected),
|
|
186
|
+
};
|
|
187
|
+
},
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
import type {
|
|
192
|
+
IAgentRuntime,
|
|
193
|
+
ImageDescriptionParams,
|
|
194
|
+
ImageDescriptionResult,
|
|
195
|
+
} from "@elizaos/core";
|
|
196
|
+
import {
|
|
197
|
+
type LocalImageDescriptionHandler,
|
|
198
|
+
type VisionCloudFallbackOptions,
|
|
199
|
+
wrapImageDescriptionHandlerWithCloudFallback,
|
|
200
|
+
} from "./cloud-fallback";
|
|
201
|
+
import {
|
|
202
|
+
type VisionVastFallbackOptions,
|
|
203
|
+
wrapImageDescriptionHandlerWithVastFallback,
|
|
204
|
+
} from "./vast-fallback";
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Compose the full local → cloud → vast IMAGE_DESCRIPTION chain and
|
|
208
|
+
* terminate it as a runtime-shaped `ImageDescriptionHandler`. When all
|
|
209
|
+
* three paths return `{ kind: "fallback" }`, the terminator throws the
|
|
210
|
+
* underlying cause (or a structured upstream-fail message) so the runtime
|
|
211
|
+
* surfaces the failure cleanly rather than serving a sentinel result.
|
|
212
|
+
*
|
|
213
|
+
* This is the single entry point `ensure-local-inference-handler.ts`
|
|
214
|
+
* uses at the IMAGE_DESCRIPTION model registration site. Tests
|
|
215
|
+
* exercise the composition via the individual `wrap*` helpers; this
|
|
216
|
+
* function is the production wiring.
|
|
217
|
+
*/
|
|
218
|
+
export function withVisionFallbackChain(
|
|
219
|
+
local: LocalImageDescriptionHandler,
|
|
220
|
+
options: {
|
|
221
|
+
cloud?: VisionCloudFallbackOptions;
|
|
222
|
+
vast?: VisionVastFallbackOptions;
|
|
223
|
+
} = {},
|
|
224
|
+
): (
|
|
225
|
+
runtime: IAgentRuntime,
|
|
226
|
+
params: ImageDescriptionParams | string,
|
|
227
|
+
) => Promise<ImageDescriptionResult> {
|
|
228
|
+
const wrapped = wrapImageDescriptionHandlerWithVastFallback(
|
|
229
|
+
wrapImageDescriptionHandlerWithCloudFallback(local, options.cloud),
|
|
230
|
+
options.vast,
|
|
231
|
+
);
|
|
232
|
+
return async (_runtime, params) => {
|
|
233
|
+
const outcome = await wrapped(params);
|
|
234
|
+
if (
|
|
235
|
+
outcome &&
|
|
236
|
+
typeof outcome === "object" &&
|
|
237
|
+
"kind" in outcome &&
|
|
238
|
+
outcome.kind === "fallback"
|
|
239
|
+
) {
|
|
240
|
+
const causeMsg = outcome.cause?.message ?? outcome.reason;
|
|
241
|
+
const err = new Error(
|
|
242
|
+
`[VisionFallback] all IMAGE_DESCRIPTION providers exhausted (reason=${outcome.reason}): ${causeMsg}`,
|
|
243
|
+
);
|
|
244
|
+
if (outcome.cause) {
|
|
245
|
+
(err as Error & { cause?: unknown }).cause = outcome.cause;
|
|
246
|
+
}
|
|
247
|
+
throw err;
|
|
248
|
+
}
|
|
249
|
+
return outcome as ImageDescriptionResult;
|
|
250
|
+
};
|
|
251
|
+
}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* llama-server vision-describe backend (WS2).
|
|
3
|
+
*
|
|
4
|
+
* Wraps the out-of-process llama-server's `/completion` endpoint with
|
|
5
|
+
* the `image_data` array (base64-encoded payloads) and shapes the
|
|
6
|
+
* response to the WS2 `VisionDescribeBackend` contract.
|
|
7
|
+
*
|
|
8
|
+
* llama-server image-data API recap (verified against llama.cpp commit
|
|
9
|
+
* b8198+, May 2026):
|
|
10
|
+
*
|
|
11
|
+
* POST /completion
|
|
12
|
+
* { "prompt": "<...>USER: [img-12] What's in this image?\nASSISTANT:",
|
|
13
|
+
* "image_data": [
|
|
14
|
+
* { "data": "<base64 png/jpeg>", "id": 12 }
|
|
15
|
+
* ],
|
|
16
|
+
* "n_predict": 256,
|
|
17
|
+
* "temperature": 0.2,
|
|
18
|
+
* "stream": false }
|
|
19
|
+
*
|
|
20
|
+
* Response:
|
|
21
|
+
* { "content": "A photo of a cat.", "stop": true,
|
|
22
|
+
* "timings": { "prompt_ms": 180.4, "predicted_ms": 423.1 } }
|
|
23
|
+
*
|
|
24
|
+
* Server-side mmproj is loaded via the `--mmproj <path>` flag on
|
|
25
|
+
* llama-server startup. The FFI runtime wrapper passes this flag
|
|
26
|
+
* already for tiers with vision enabled; this backend assumes the
|
|
27
|
+
* server has been started with the right mmproj for the active model.
|
|
28
|
+
*
|
|
29
|
+
* Backend responsibility:
|
|
30
|
+
* - Encode the image as base64 (when not already).
|
|
31
|
+
* - Build the prompt with the `[img-N]` placeholder convention.
|
|
32
|
+
* - POST to `/completion`, parse the text + timings.
|
|
33
|
+
* - Honour AbortSignal by passing it through to the fetch call.
|
|
34
|
+
*
|
|
35
|
+
* Backend explicitly does NOT:
|
|
36
|
+
* - Start / stop the server. That's the FFI runtime wrapper's job.
|
|
37
|
+
* - Resolve the mmproj path — the server already has it. The arbiter's
|
|
38
|
+
* `--mmproj` was set when the text model loaded.
|
|
39
|
+
* - Implement projector-token reuse. llama-server has no API to
|
|
40
|
+
* accept pre-projected tokens; if the WS1 cache hit happens, this
|
|
41
|
+
* backend ignores the hint and re-runs the projector. The cache
|
|
42
|
+
* is more useful with the in-process node-llama-cpp backend.
|
|
43
|
+
*
|
|
44
|
+
* Metal / CUDA validation:
|
|
45
|
+
* The llama-server build embeds the same mtmd_encode path the
|
|
46
|
+
* in-process binding will eventually expose. On a Metal build the
|
|
47
|
+
* image encode dispatches through the Metal compute encoder; on a
|
|
48
|
+
* CUDA build through cuBLAS. We have no GPU on this host — see the
|
|
49
|
+
* `__tests__/vision-describe.test.ts` notes for the GPU smoke check.
|
|
50
|
+
*/
|
|
51
|
+
|
|
52
|
+
import { resolveImageBytes } from "./hash";
|
|
53
|
+
import type {
|
|
54
|
+
VisionDescribeBackend,
|
|
55
|
+
VisionDescribeRequest,
|
|
56
|
+
VisionDescribeResult,
|
|
57
|
+
} from "./types";
|
|
58
|
+
|
|
59
|
+
export interface LlamaServerVisionBackendOptions {
|
|
60
|
+
/**
|
|
61
|
+
* Base URL of the llama-server. The FFI runtime wrapper exposes
|
|
62
|
+
* this via `currentBaseUrl()`; pass the resolved URL here at load
|
|
63
|
+
* time. The backend keeps it as-is across calls.
|
|
64
|
+
*/
|
|
65
|
+
baseUrl: string;
|
|
66
|
+
/**
|
|
67
|
+
* Optional fetch override. Tests inject a fake fetch; production
|
|
68
|
+
* uses global fetch. The signature mirrors `fetch` so the test
|
|
69
|
+
* surface is the same as the real one.
|
|
70
|
+
*/
|
|
71
|
+
fetch?: typeof fetch;
|
|
72
|
+
/**
|
|
73
|
+
* Default `n_predict` budget when the caller doesn't specify
|
|
74
|
+
* `maxTokens`. 256 matches the description-length budget the
|
|
75
|
+
* Florence-2 / VisionManager path uses today.
|
|
76
|
+
*/
|
|
77
|
+
defaultMaxTokens?: number;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export function createLlamaServerVisionBackend(
|
|
81
|
+
opts: LlamaServerVisionBackendOptions,
|
|
82
|
+
): VisionDescribeBackend {
|
|
83
|
+
const fetchImpl = opts.fetch ?? globalThis.fetch;
|
|
84
|
+
const defaultMaxTokens = opts.defaultMaxTokens ?? 256;
|
|
85
|
+
let baseUrl = opts.baseUrl.replace(/\/$/, "");
|
|
86
|
+
|
|
87
|
+
if (!baseUrl) {
|
|
88
|
+
throw new Error(
|
|
89
|
+
"[vision/llama-server] baseUrl is required; pass FFI runtime's currentBaseUrl()",
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
id: "llama-server",
|
|
95
|
+
async describe(
|
|
96
|
+
request: VisionDescribeRequest,
|
|
97
|
+
): Promise<VisionDescribeResult> {
|
|
98
|
+
const { bytes, mimeType } = resolveImageBytes(request.image);
|
|
99
|
+
const base64 = Buffer.from(bytes).toString("base64");
|
|
100
|
+
const prompt = buildVisionPrompt(request.prompt);
|
|
101
|
+
const startMs = Date.now();
|
|
102
|
+
const body = JSON.stringify({
|
|
103
|
+
prompt,
|
|
104
|
+
image_data: [{ data: base64, id: 12 }],
|
|
105
|
+
n_predict: request.maxTokens ?? defaultMaxTokens,
|
|
106
|
+
temperature: request.temperature ?? 0.2,
|
|
107
|
+
stream: false,
|
|
108
|
+
// `cache_prompt: false` here so each describe call gets a
|
|
109
|
+
// fresh slot; the WS1 vision-embedding cache handles repeat-
|
|
110
|
+
// frame reuse on the JS side, and the server-side prompt
|
|
111
|
+
// cache would only conflict with that (different KV state
|
|
112
|
+
// for the same projector tokens).
|
|
113
|
+
cache_prompt: false,
|
|
114
|
+
});
|
|
115
|
+
const res = await fetchImpl(`${baseUrl}/completion`, {
|
|
116
|
+
method: "POST",
|
|
117
|
+
headers: {
|
|
118
|
+
"content-type": "application/json",
|
|
119
|
+
...(mimeType ? { "x-image-mime": mimeType } : {}),
|
|
120
|
+
},
|
|
121
|
+
body,
|
|
122
|
+
signal: request.signal,
|
|
123
|
+
});
|
|
124
|
+
if (!res.ok) {
|
|
125
|
+
const text = await res.text().catch(() => "<unreadable>");
|
|
126
|
+
throw new Error(
|
|
127
|
+
`[vision/llama-server] /completion returned ${res.status}: ${text.slice(0, 200)}`,
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
const payload = (await res.json()) as {
|
|
131
|
+
content?: unknown;
|
|
132
|
+
timings?: { prompt_ms?: number; predicted_ms?: number };
|
|
133
|
+
};
|
|
134
|
+
if (typeof payload.content !== "string") {
|
|
135
|
+
throw new Error(
|
|
136
|
+
"[vision/llama-server] /completion response missing string `content`",
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
const elapsed = Date.now() - startMs;
|
|
140
|
+
return shape(payload.content, {
|
|
141
|
+
projectorMs: payload.timings?.prompt_ms,
|
|
142
|
+
decodeMs: payload.timings?.predicted_ms ?? elapsed,
|
|
143
|
+
cacheHit: false,
|
|
144
|
+
});
|
|
145
|
+
},
|
|
146
|
+
async dispose() {
|
|
147
|
+
// llama-server lifetime is owned by the FFI runtime wrapper.
|
|
148
|
+
// This backend just holds the baseUrl; nothing to free.
|
|
149
|
+
baseUrl = "";
|
|
150
|
+
},
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
function buildVisionPrompt(userPrompt?: string): string {
|
|
155
|
+
const ask = userPrompt?.trim() || "Describe what is in this image.";
|
|
156
|
+
// `[img-N]` is the placeholder llama-server's mtmd path replaces with
|
|
157
|
+
// the encoded image tokens. The `N` must match the `image_data[*].id`
|
|
158
|
+
// we send in the body; we use 12 because llama-server's stock
|
|
159
|
+
// example uses small integer ids — any positive integer works.
|
|
160
|
+
return `<start_of_turn>user\n[img-12]\n${ask}<end_of_turn>\n<start_of_turn>model\n`;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function shape(
|
|
164
|
+
text: string,
|
|
165
|
+
telemetry: { projectorMs?: number; decodeMs?: number; cacheHit?: boolean },
|
|
166
|
+
): VisionDescribeResult {
|
|
167
|
+
const trimmed = text.trim();
|
|
168
|
+
if (!trimmed) {
|
|
169
|
+
throw new Error("[vision/llama-server] empty text from /completion");
|
|
170
|
+
}
|
|
171
|
+
const title = trimmed.split(/[.!?]/, 1)[0]?.trim() || "Image";
|
|
172
|
+
return {
|
|
173
|
+
title,
|
|
174
|
+
description: trimmed,
|
|
175
|
+
...telemetry,
|
|
176
|
+
};
|
|
177
|
+
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vision-language describe-image types (WS2).
|
|
3
|
+
*
|
|
4
|
+
* Two layers live here:
|
|
5
|
+
*
|
|
6
|
+
* 1. The **request/result** contract every WS2 backend implements
|
|
7
|
+
* (`VisionDescribeRequest`, `VisionDescribeResult`). Callers pass
|
|
8
|
+
* raw image bytes + a prompt; backends return a title+description.
|
|
9
|
+
*
|
|
10
|
+
* 2. The **backend** interface (`VisionDescribeBackend`) that the
|
|
11
|
+
* `MemoryArbiter` registers as a capability handler. One backend
|
|
12
|
+
* per binding family (node-llama-cpp / llama-server / AOSP libllama
|
|
13
|
+
* shim). All three implement the same `load → describe → unload`
|
|
14
|
+
* shape so the arbiter can swap between them without caring how
|
|
15
|
+
* the projector is wired underneath.
|
|
16
|
+
*
|
|
17
|
+
* Why a separate file: the arbiter's `CapabilityRegistration<TBackend,
|
|
18
|
+
* TRequest, TResult>` is generic; pinning concrete shapes here keeps
|
|
19
|
+
* the registration sites short and removes a dozen casts at the
|
|
20
|
+
* call-site.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Channel order for the raw pixel buffer. Most platforms hand us RGBA
|
|
25
|
+
* (HTMLCanvasElement, Capacitor `Camera`, the desktop `puppeteer-core`
|
|
26
|
+
* screenshot pipeline). The encoder normalizes internally; this enum
|
|
27
|
+
* stays so the hash step can pick a stable byte layout that doesn't
|
|
28
|
+
* depend on the platform-provided buffer order.
|
|
29
|
+
*/
|
|
30
|
+
export type VisionImageChannelOrder = "rgba" | "rgb" | "bgra" | "bgr";
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* The raw image data the backend will encode. The arbiter does not see
|
|
34
|
+
* this — it gets handed straight to the backend's `run()`. The reason
|
|
35
|
+
* we accept multiple wrappers (URL / base64 / bytes) is that the three
|
|
36
|
+
* upstream entry points (HTTP route, agent runtime model handler,
|
|
37
|
+
* computer-use frame loop) each prefer a different shape. The backend
|
|
38
|
+
* resolves to bytes once.
|
|
39
|
+
*/
|
|
40
|
+
export type VisionImageInput =
|
|
41
|
+
| { kind: "bytes"; bytes: Uint8Array; mimeType?: string }
|
|
42
|
+
| { kind: "base64"; base64: string; mimeType?: string }
|
|
43
|
+
| { kind: "dataUrl"; dataUrl: string }
|
|
44
|
+
| { kind: "url"; url: string; mimeType?: string };
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Caller request to `describeImage`. The `modelFamily` distinguishes
|
|
48
|
+
* projected-token cache entries from different VL families that share
|
|
49
|
+
* the same hash space — Gemma-VL tokens are not interchangeable with
|
|
50
|
+
* Florence-2 tokens. Default is `gemma-vl` (the WS2 deliverable);
|
|
51
|
+
* each additional family registers under its own identifier.
|
|
52
|
+
*/
|
|
53
|
+
export interface VisionDescribeRequest {
|
|
54
|
+
image: VisionImageInput;
|
|
55
|
+
prompt?: string;
|
|
56
|
+
/**
|
|
57
|
+
* The model family identifier. Used to namespace the projector cache
|
|
58
|
+
* so swapping the backend's model family invalidates cached tokens.
|
|
59
|
+
* Defaults to `"gemma-vl"` when omitted.
|
|
60
|
+
*/
|
|
61
|
+
modelFamily?: string;
|
|
62
|
+
/** Max output tokens; defaults to 256 (description-length budget). */
|
|
63
|
+
maxTokens?: number;
|
|
64
|
+
/** 0..1, default 0.2 (descriptions should be deterministic-ish). */
|
|
65
|
+
temperature?: number;
|
|
66
|
+
signal?: AbortSignal;
|
|
67
|
+
/**
|
|
68
|
+
* Per-token callback. When set and the backend exposes streaming vision
|
|
69
|
+
* (the fused ABI-v13 path), the description is decoded token-by-token and
|
|
70
|
+
* each piece is delivered here as it generates — the same pipe as chat text.
|
|
71
|
+
* Backends without streaming describe ignore it and return the final result.
|
|
72
|
+
*/
|
|
73
|
+
onTextChunk?: (chunk: string) => void | Promise<void>;
|
|
74
|
+
/** Per-step token cap for streaming describe (smaller = finer-grained UI). */
|
|
75
|
+
maxTokensPerStep?: number;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** Backend response — same shape that ImageDescriptionResult expects. */
|
|
79
|
+
export interface VisionDescribeResult {
|
|
80
|
+
title: string;
|
|
81
|
+
description: string;
|
|
82
|
+
/** Best-effort: ms spent in the projector (for arbiter telemetry). */
|
|
83
|
+
projectorMs?: number;
|
|
84
|
+
/** Best-effort: ms spent in the decoder. */
|
|
85
|
+
decodeMs?: number;
|
|
86
|
+
/** Whether the projected tokens came from the WS1 vision cache. */
|
|
87
|
+
cacheHit?: boolean;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Per-load arguments for a vision-describe backend. The arbiter's
|
|
92
|
+
* `load(modelKey)` only carries an opaque key; the binding resolves
|
|
93
|
+
* that key to real model+mmproj paths through this struct, which
|
|
94
|
+
* `createVisionCapabilityRegistration` populates from the catalog.
|
|
95
|
+
*/
|
|
96
|
+
export interface VisionDescribeLoadArgs {
|
|
97
|
+
/** Absolute path to the text decoder GGUF (the "main" model). */
|
|
98
|
+
modelPath: string;
|
|
99
|
+
/** Absolute path to the matching mmproj projector GGUF. */
|
|
100
|
+
mmprojPath: string;
|
|
101
|
+
/**
|
|
102
|
+
* GPU offload preference. The backend translates this to its native
|
|
103
|
+
* knob: node-llama-cpp `gpuLayers`, llama-server `--n-gpu-layers`,
|
|
104
|
+
* AOSP libllama shim `eliza_llama_model_params_set_n_gpu_layers`.
|
|
105
|
+
* `"auto"` lets the binding decide; numeric is honoured verbatim.
|
|
106
|
+
*/
|
|
107
|
+
gpuLayers?: number | "auto" | "max";
|
|
108
|
+
/** Max sampled context window in tokens. Defaults to 4096. */
|
|
109
|
+
contextSize?: number;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* The contract every WS2 backend implements. The shape is intentionally
|
|
114
|
+
* narrow: the arbiter only ever calls `describe`. `dispose` is wrapped
|
|
115
|
+
* by the arbiter's `unload` so the backend can free GPU/VRAM and drop
|
|
116
|
+
* file descriptors on eviction.
|
|
117
|
+
*/
|
|
118
|
+
export interface VisionDescribeBackend {
|
|
119
|
+
/** Stable identifier — `"capacitor-llama"`, `"llama-server"`, `"aosp"`, or `"fake"` (tests). */
|
|
120
|
+
readonly id: "capacitor-llama" | "llama-server" | "aosp" | "fake";
|
|
121
|
+
/**
|
|
122
|
+
* Run a describe pass. Backends MAY consult an injected projector cache
|
|
123
|
+
* via `args.projectedTokens` (when the caller's hash already produced
|
|
124
|
+
* a cache hit) instead of running the projector again; backends that
|
|
125
|
+
* don't implement projector-token reuse ignore the field.
|
|
126
|
+
*/
|
|
127
|
+
describe(
|
|
128
|
+
request: VisionDescribeRequest,
|
|
129
|
+
args?: VisionDescribeBackendOptions,
|
|
130
|
+
): Promise<VisionDescribeResult>;
|
|
131
|
+
/** Release the loaded weights. Idempotent. */
|
|
132
|
+
dispose(): Promise<void>;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Per-call options the arbiter wrapper passes into the backend. Lives
|
|
137
|
+
* here (rather than on `VisionDescribeRequest`) so the caller-facing
|
|
138
|
+
* request type stays free of arbiter implementation details.
|
|
139
|
+
*/
|
|
140
|
+
export interface VisionDescribeBackendOptions {
|
|
141
|
+
/**
|
|
142
|
+
* Pre-computed projected tokens from the WS1 vision-embedding cache.
|
|
143
|
+
* When present the backend SHOULD skip its own projector step and
|
|
144
|
+
* decode against these tokens directly. Backends that can't do this
|
|
145
|
+
* still produce a correct result by ignoring the field; the arbiter's
|
|
146
|
+
* wrapper will measure `cacheHit: false` in that case.
|
|
147
|
+
*/
|
|
148
|
+
projectedTokens?: {
|
|
149
|
+
tokens: Float32Array;
|
|
150
|
+
tokenCount: number;
|
|
151
|
+
hiddenSize: number;
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Capability handler load function. The arbiter calls it with a model
|
|
157
|
+
* key (e.g. `"gemma-vl-4b"`); the implementation resolves to a real
|
|
158
|
+
* `(modelPath, mmprojPath)` pair from the catalog + installed registry
|
|
159
|
+
* and returns a live backend.
|
|
160
|
+
*/
|
|
161
|
+
export type VisionDescribeBackendLoader = (
|
|
162
|
+
modelKey: string,
|
|
163
|
+
) => Promise<VisionDescribeBackend>;
|