@elizaos/plugin-local-inference 2.0.3-beta.2 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -10
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +39647 -0
- package/dist/index.js.map +217 -0
- package/{src → dist}/local-inference-routes.d.ts +9 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts.map +1 -0
- package/{src → dist}/routes/compat-helpers.d.ts +1 -1
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/{src → dist}/routes/index.d.ts +1 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/{src → dist}/routes/live-diarization-route.d.ts +7 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/{src → dist}/routes/transcripts-routes.d.ts +8 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/{src → dist}/runtime/ensure-local-inference-handler.d.ts +8 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/{src → dist}/runtime/index.d.ts +1 -1
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/{src → dist}/runtime/voice-entity-binding.d.ts +10 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/{src → dist}/services/active-model.d.ts +28 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/{src → dist}/services/assignments.d.ts +16 -3
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/{src → dist}/services/backend.d.ts +110 -16
- package/dist/services/backend.d.ts.map +1 -0
- package/{src → dist}/services/bionic-host-loader.d.ts +21 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/{src → dist}/services/desktop-fused-ffi-backend-runtime.d.ts +22 -6
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/{src → dist}/services/device-tier.d.ts +19 -1
- package/dist/services/device-tier.d.ts.map +1 -0
- package/{src → dist}/services/downloader.d.ts +16 -4
- package/dist/services/downloader.d.ts.map +1 -0
- package/{src → dist}/services/engine.d.ts +43 -4
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-backend.d.ts +28 -7
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-runner.d.ts +24 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/{src → dist}/services/imagegen/sd-cpp.d.ts +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/{src → dist}/services/index.d.ts +3 -1
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/{src → dist}/services/manifest/schema.d.ts +196 -6
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/{src → dist}/services/manifest/types.d.ts +3 -1
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/{src → dist}/services/memory-arbiter.d.ts +33 -3
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/{src → dist}/services/memory-monitor.d.ts +6 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/{src → dist}/services/registry.d.ts +11 -13
- package/dist/services/registry.d.ts.map +1 -0
- package/{src → dist}/services/router-handler.d.ts +2 -2
- package/dist/services/router-handler.d.ts.map +1 -0
- package/{src → dist}/services/routing-policy.d.ts +32 -9
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/{src → dist}/services/service.d.ts +1 -1
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/{src → dist}/services/types.d.ts +1 -1
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/{src → dist}/services/vision/index.d.ts +1 -1
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/{src → dist}/services/vision/types.d.ts +13 -4
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/{src → dist}/services/vision-embedding-cache.d.ts +1 -1
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/audio-frame-consumer.d.ts +82 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/{src → dist}/services/voice/eliza1-eot-scorer.d.ts +8 -8
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/{src → dist}/services/voice/embedding.d.ts +2 -3
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/engine-bridge.d.ts +8 -5
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier-ggml.d.ts +22 -22
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier.d.ts +9 -12
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/{src → dist}/services/voice/errors.d.ts +1 -1
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/{src → dist}/services/voice/expressive-tags.d.ts +5 -5
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/{src → dist}/services/voice/ffi-bindings.d.ts +26 -4
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/{src → dist}/services/voice/fused-eot-scorer.d.ts +6 -6
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/{src → dist}/services/voice/index.d.ts +8 -3
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-backend.d.ts +15 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-engine-discovery.d.ts +1 -1
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-ffi-runtime.d.ts +3 -3
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/pick-runtime.d.ts +1 -1
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/{src → dist}/services/voice/mic-source.d.ts +1 -1
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/{src → dist}/services/voice/partial-stabilizer.d.ts +1 -1
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/{src → dist}/services/voice/shared-resources.d.ts +14 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcriber.d.ts +4 -4
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-service.d.ts +20 -1
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-store.d.ts +12 -1
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/{src → dist}/services/voice/types.d.ts +6 -6
- package/dist/services/voice/types.d.ts.map +1 -0
- package/{src → dist}/services/voice/vad.d.ts +6 -5
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/{src → dist}/services/voice/voice-preset-format.d.ts +2 -2
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/{src → dist}/services/voice/wake-word-ggml.d.ts +8 -9
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +28 -9
- package/registry-entry.json +137 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +1 -1
- package/src/adapters/capacitor-llama/index.ts +28 -4
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +2 -2
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +1 -1
- package/src/local-inference-routes.test.ts +57 -11
- package/src/local-inference-routes.ts +90 -8
- package/src/provider.ts +32 -3
- package/src/routes/compat-helpers.ts +2 -1
- package/src/routes/index.ts +1 -0
- package/src/routes/live-diarization-route.test.ts +134 -0
- package/src/routes/live-diarization-route.ts +79 -3
- package/src/routes/local-inference-asr-route.test.ts +43 -2
- package/src/routes/local-inference-asr-route.ts +7 -4
- package/src/routes/local-inference-asr-transcribe.test.ts +4 -4
- package/src/routes/local-inference-asr-transcribe.ts +1 -1
- package/src/routes/local-inference-compat-routes.test.ts +3 -3
- package/src/routes/local-inference-compat-routes.ts +23 -56
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcripts-routes.test.ts +51 -0
- package/src/routes/transcripts-routes.ts +35 -3
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +203 -5
- package/src/runtime/ensure-local-inference-handler.ts +203 -11
- package/src/runtime/index.ts +4 -1
- package/src/runtime/mobile-local-inference-gate.test.ts +85 -2
- package/src/runtime/mobile-local-inference-gate.ts +60 -5
- package/src/runtime/voice-entity-binding.transcript.test.ts +29 -0
- package/src/runtime/voice-entity-binding.ts +46 -6
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +2 -2
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model.ts +211 -8
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +26 -0
- package/src/services/assignments.ts +52 -4
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +198 -19
- package/src/services/bionic-host-loader.test.ts +94 -1
- package/src/services/bionic-host-loader.ts +72 -0
- package/src/services/cache-bridge.test.ts +7 -7
- package/src/services/catalog.test.ts +32 -11
- package/src/services/catalog.ts +6 -0
- package/src/services/cloud-fallback.ts +1 -1
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +99 -7
- package/src/services/device-tier.test.ts +89 -2
- package/src/services/device-tier.ts +103 -11
- package/src/services/downloader.test.ts +199 -58
- package/src/services/downloader.ts +141 -27
- package/src/services/engine-direct-bundle.test.ts +38 -6
- package/src/services/engine.ts +291 -104
- package/src/services/ensure-local-artifacts.ts +1 -1
- package/src/services/ffi-llm-streaming-abi.ts +6 -3
- package/src/services/ffi-streaming-backend.ts +44 -8
- package/src/services/ffi-streaming-runner.test.ts +163 -3
- package/src/services/ffi-streaming-runner.ts +54 -1
- package/src/services/ffi-unload-ordering.test.ts +5 -1
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/hardware.test.ts +7 -2
- package/src/services/hardware.ts +28 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/sd-cpp.ts +6 -9
- package/src/services/index.ts +18 -0
- package/src/services/ios-llama-streaming.ts +1 -1
- package/src/services/kv-spill.ts +6 -5
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +84 -2
- package/src/services/manifest/index.ts +6 -0
- package/src/services/manifest/manifest.test.ts +156 -54
- package/src/services/manifest/schema.ts +160 -52
- package/src/services/manifest/types.ts +6 -0
- package/src/services/manifest/validator.ts +91 -25
- package/src/services/memory-arbiter.test.ts +139 -0
- package/src/services/memory-arbiter.ts +81 -15
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +24 -0
- package/src/services/memory-monitor.ts +12 -0
- package/src/services/mtp-doctor.ts +10 -2
- package/src/services/network-policy.ts +5 -5
- package/src/services/ram-budget-cache.test.ts +2 -1
- package/src/services/ram-budget.ts +0 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/registry.ts +25 -19
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.ts +43 -24
- package/src/services/routing-policy.test.ts +211 -23
- package/src/services/routing-policy.ts +92 -22
- package/src/services/service.test.ts +3 -3
- package/src/services/service.ts +22 -7
- package/src/services/transcription-priority.test.ts +2 -2
- package/src/services/types.ts +4 -0
- package/src/services/verify-on-device.test.ts +2 -2
- package/src/services/vision/hash.ts +1 -1
- package/src/services/vision/index.ts +2 -2
- package/src/services/vision/llama-server.ts +1 -1
- package/src/services/vision/types.ts +13 -4
- package/src/services/vision-embedding-cache.ts +1 -1
- package/src/services/voice/VOICE_WORKBENCH.md +71 -26
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +72 -2
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +29 -29
- package/src/services/voice/__tests__/streaming-asr.test.ts +1 -1
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +6 -8
- package/src/services/voice/audio-frame-consumer.test.ts +327 -1
- package/src/services/voice/audio-frame-consumer.ts +165 -5
- package/src/services/voice/barge-in.ts +2 -3
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +2 -2
- package/src/services/voice/e2e-harness.ts +175 -16
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +22 -22
- package/src/services/voice/embedding.ts +2 -3
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.ts +151 -110
- package/src/services/voice/eot-classifier-ggml.ts +42 -39
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +11 -122
- package/src/services/voice/errors.ts +2 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +8 -8
- package/src/services/voice/ffi-bindings.test.ts +10 -3
- package/src/services/voice/ffi-bindings.ts +177 -15
- package/src/services/voice/fused-eot-scorer.ts +17 -13
- package/src/services/voice/index.ts +33 -12
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +112 -1
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +88 -3
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +37 -201
- package/src/services/voice/kokoro/kokoro-backend.ts +16 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +1 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +3 -3
- package/src/services/voice/kokoro/pick-runtime.ts +1 -1
- package/src/services/voice/kokoro/runtime-selection.ts +28 -201
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +335 -2
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.ts +1 -1
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/partial-stabilizer.ts +1 -1
- package/src/services/voice/pipeline.ts +3 -4
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +23 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +85 -22
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.ts +4 -4
- package/src/services/voice/transcript-service.test.ts +58 -0
- package/src/services/voice/transcript-service.ts +64 -0
- package/src/services/voice/transcript-store.test.ts +36 -0
- package/src/services/voice/transcript-store.ts +32 -0
- package/src/services/voice/types.ts +7 -7
- package/src/services/voice/vad.test.ts +33 -15
- package/src/services/voice/vad.ts +25 -20
- package/src/services/voice/voice-budget.test.ts +0 -3
- package/src/services/voice/voice-budget.ts +6 -6
- package/src/services/voice/voice-duet.test.ts +1 -1
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +17 -4
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +133 -7
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-workbench-report.ts +58 -17
- package/src/services/voice/wake-word-ggml.ts +12 -13
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice-prewarm.ts +1 -1
- package/src/voice-workbench.ts +71 -0
- package/src/actions/generate-media.d.ts.map +0 -1
- package/src/actions/identify-speaker.d.ts.map +0 -1
- package/src/actions/transcription-control.d.ts.map +0 -1
- package/src/index.d.ts.map +0 -1
- package/src/local-inference-routes.d.ts.map +0 -1
- package/src/provider.d.ts.map +0 -1
- package/src/routes/compat-helpers.d.ts.map +0 -1
- package/src/routes/family-member-route.d.ts.map +0 -1
- package/src/routes/index.d.ts.map +0 -1
- package/src/routes/live-diarization-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-transcribe.d.ts.map +0 -1
- package/src/routes/local-inference-compat-routes.d.ts.map +0 -1
- package/src/routes/local-inference-tts-route.d.ts.map +0 -1
- package/src/routes/transcript-audio-store.d.ts.map +0 -1
- package/src/routes/transcripts-routes.d.ts.map +0 -1
- package/src/routes/voice-first-run-routes.d.ts.map +0 -1
- package/src/routes/voice-models-routes.d.ts.map +0 -1
- package/src/routes/voice-profile-plugin-routes.d.ts.map +0 -1
- package/src/routes/voice-profiles-management-routes.d.ts.map +0 -1
- package/src/routes/voice-speaker-profile-routes.d.ts.map +0 -1
- package/src/runtime/embedding-manager-support.d.ts.map +0 -1
- package/src/runtime/embedding-presets.d.ts.map +0 -1
- package/src/runtime/embedding-warmup-policy.d.ts.map +0 -1
- package/src/runtime/ensure-local-inference-handler.d.ts.map +0 -1
- package/src/runtime/index.d.ts.map +0 -1
- package/src/runtime/mobile-local-inference-gate.d.ts +0 -31
- package/src/runtime/mobile-local-inference-gate.d.ts.map +0 -1
- package/src/runtime/voice-entity-binding.d.ts.map +0 -1
- package/src/services/active-model.d.ts.map +0 -1
- package/src/services/assignments.d.ts.map +0 -1
- package/src/services/backend.d.ts.map +0 -1
- package/src/services/bionic-host-loader.d.ts.map +0 -1
- package/src/services/bundled-models.d.ts.map +0 -1
- package/src/services/cache-bridge.d.ts.map +0 -1
- package/src/services/catalog.d.ts +0 -10
- package/src/services/catalog.d.ts.map +0 -1
- package/src/services/checkpoint-client.d.ts.map +0 -1
- package/src/services/cloud-fallback.d.ts.map +0 -1
- package/src/services/conversation-registry.d.ts.map +0 -1
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +0 -1
- package/src/services/device-bridge.d.ts.map +0 -1
- package/src/services/device-resource-metrics.d.ts.map +0 -1
- package/src/services/device-tier.d.ts.map +0 -1
- package/src/services/downloader.d.ts.map +0 -1
- package/src/services/engine.d.ts.map +0 -1
- package/src/services/external-scanner.d.ts.map +0 -1
- package/src/services/ffi-streaming-backend.d.ts.map +0 -1
- package/src/services/ffi-streaming-runner.d.ts.map +0 -1
- package/src/services/gpu-detect.d.ts.map +0 -1
- package/src/services/handler-registry.d.ts.map +0 -1
- package/src/services/hardware.d.ts.map +0 -1
- package/src/services/hf-search.d.ts +0 -26
- package/src/services/hf-search.d.ts.map +0 -1
- package/src/services/hf-search.test.ts +0 -69
- package/src/services/hf-search.ts +0 -420
- package/src/services/image-description-runtime.d.ts.map +0 -1
- package/src/services/imagegen/aosp-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/backend-selector.d.ts.map +0 -1
- package/src/services/imagegen/coreml-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/errors.d.ts.map +0 -1
- package/src/services/imagegen/index.d.ts.map +0 -1
- package/src/services/imagegen/mflux.d.ts.map +0 -1
- package/src/services/imagegen/sd-cpp.d.ts.map +0 -1
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/types.d.ts.map +0 -1
- package/src/services/index.d.ts.map +0 -1
- package/src/services/inference-capabilities.d.ts.map +0 -1
- package/src/services/inference-telemetry.d.ts.map +0 -1
- package/src/services/kv-spill.d.ts.map +0 -1
- package/src/services/latency-trace.d.ts.map +0 -1
- package/src/services/llm-streaming-binding.d.ts.map +0 -1
- package/src/services/load-args.d.ts.map +0 -1
- package/src/services/manifest/index.d.ts +0 -4
- package/src/services/manifest/index.d.ts.map +0 -1
- package/src/services/manifest/schema.d.ts.map +0 -1
- package/src/services/manifest/types.d.ts.map +0 -1
- package/src/services/manifest/validator.d.ts.map +0 -1
- package/src/services/memory-arbiter.d.ts.map +0 -1
- package/src/services/memory-monitor.d.ts.map +0 -1
- package/src/services/memory-pressure.d.ts.map +0 -1
- package/src/services/mtp-doctor.d.ts.map +0 -1
- package/src/services/network-policy.d.ts.map +0 -1
- package/src/services/paths.d.ts.map +0 -1
- package/src/services/planner-skeleton.d.ts.map +0 -1
- package/src/services/providers.d.ts.map +0 -1
- package/src/services/ram-budget.d.ts.map +0 -1
- package/src/services/readiness.d.ts.map +0 -1
- package/src/services/recommendation.d.ts.map +0 -1
- package/src/services/registry.d.ts.map +0 -1
- package/src/services/router-handler.d.ts.map +0 -1
- package/src/services/routing-policy.d.ts.map +0 -1
- package/src/services/routing-preferences.d.ts.map +0 -1
- package/src/services/runtime-target.d.ts.map +0 -1
- package/src/services/service.d.ts.map +0 -1
- package/src/services/session-pool.d.ts.map +0 -1
- package/src/services/structured-output/deterministic-repair.d.ts.map +0 -1
- package/src/services/structured-output.d.ts.map +0 -1
- package/src/services/system-memory.d.ts.map +0 -1
- package/src/services/types.d.ts.map +0 -1
- package/src/services/verify-on-device.d.ts.map +0 -1
- package/src/services/verify.d.ts.map +0 -1
- package/src/services/vision/aosp-unavailable.d.ts.map +0 -1
- package/src/services/vision/capacitor-llama.d.ts.map +0 -1
- package/src/services/vision/cloud-fallback.d.ts.map +0 -1
- package/src/services/vision/hash.d.ts.map +0 -1
- package/src/services/vision/index.d.ts.map +0 -1
- package/src/services/vision/llama-server.d.ts.map +0 -1
- package/src/services/vision/types.d.ts.map +0 -1
- package/src/services/vision/vast-fallback.d.ts.map +0 -1
- package/src/services/vision-embedding-cache.d.ts.map +0 -1
- package/src/services/voice/audio-frame-consumer.d.ts.map +0 -1
- package/src/services/voice/barge-in.d.ts.map +0 -1
- package/src/services/voice/cancellation-coordinator.d.ts.map +0 -1
- package/src/services/voice/checkpoint-manager.d.ts.map +0 -1
- package/src/services/voice/eager-context-builder.d.ts.map +0 -1
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/embedding.d.ts.map +0 -1
- package/src/services/voice/emotion-attribution.d.ts.map +0 -1
- package/src/services/voice/engine-bridge.d.ts.map +0 -1
- package/src/services/voice/eot-classifier-ggml.d.ts.map +0 -1
- package/src/services/voice/eot-classifier.d.ts.map +0 -1
- package/src/services/voice/errors.d.ts.map +0 -1
- package/src/services/voice/expressive-tags.d.ts.map +0 -1
- package/src/services/voice/ffi-bindings.d.ts.map +0 -1
- package/src/services/voice/first-line-cache.d.ts.map +0 -1
- package/src/services/voice/fused-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/index.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/phonemizer.d.ts.map +0 -1
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/runtime-selection.d.ts +0 -92
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +0 -1
- package/src/services/voice/kokoro/types.d.ts.map +0 -1
- package/src/services/voice/kokoro/voice-presets.d.ts.map +0 -1
- package/src/services/voice/kokoro/voices.d.ts.map +0 -1
- package/src/services/voice/lifecycle.d.ts.map +0 -1
- package/src/services/voice/live-diarization-session.d.ts +0 -96
- package/src/services/voice/live-diarization-session.d.ts.map +0 -1
- package/src/services/voice/mic-source.d.ts.map +0 -1
- package/src/services/voice/optimistic-policy.d.ts.map +0 -1
- package/src/services/voice/partial-stabilizer.d.ts.map +0 -1
- package/src/services/voice/phoneme-tokenizer.d.ts.map +0 -1
- package/src/services/voice/phrase-cache.d.ts.map +0 -1
- package/src/services/voice/phrase-chunker.d.ts.map +0 -1
- package/src/services/voice/pipeline-impls.d.ts.map +0 -1
- package/src/services/voice/pipeline.d.ts.map +0 -1
- package/src/services/voice/prefill-client.d.ts.map +0 -1
- package/src/services/voice/prefix-preserving-queue.d.ts.map +0 -1
- package/src/services/voice/profile-store.d.ts.map +0 -1
- package/src/services/voice/ring-buffer.d.ts.map +0 -1
- package/src/services/voice/rollback-queue.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +0 -1
- package/src/services/voice/scheduler.d.ts.map +0 -1
- package/src/services/voice/shared-resources.d.ts.map +0 -1
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder.d.ts.map +0 -1
- package/src/services/voice/speaker-imprint.d.ts.map +0 -1
- package/src/services/voice/speaker-preset-cache.d.ts.map +0 -1
- package/src/services/voice/system-audio-sink.d.ts.map +0 -1
- package/src/services/voice/transcriber.d.ts.map +0 -1
- package/src/services/voice/transcript-knowledge.d.ts.map +0 -1
- package/src/services/voice/transcript-service.d.ts.map +0 -1
- package/src/services/voice/transcript-store.d.ts.map +0 -1
- package/src/services/voice/turn-controller.d.ts.map +0 -1
- package/src/services/voice/types.d.ts.map +0 -1
- package/src/services/voice/vad.d.ts.map +0 -1
- package/src/services/voice/voice-budget.d.ts.map +0 -1
- package/src/services/voice/voice-emotion-classifier.d.ts.map +0 -1
- package/src/services/voice/voice-preset-format.d.ts.map +0 -1
- package/src/services/voice/voice-profile-artifact.d.ts.map +0 -1
- package/src/services/voice/voice-profile-routes.d.ts.map +0 -1
- package/src/services/voice/voice-settings.d.ts +0 -82
- package/src/services/voice/voice-settings.d.ts.map +0 -1
- package/src/services/voice/voice-settings.ts +0 -172
- package/src/services/voice/voice-state-machine.d.ts.map +0 -1
- package/src/services/voice/wake-word-ggml.d.ts.map +0 -1
- package/src/services/voice/wake-word.d.ts.map +0 -1
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +0 -1
- package/src/services/voice-model-updater.d.ts.map +0 -1
- package/src/services/voice-prewarm.d.ts.map +0 -1
- /package/{src → dist}/actions/generate-media.d.ts +0 -0
- /package/{src → dist}/actions/identify-speaker.d.ts +0 -0
- /package/{src → dist}/actions/transcription-control.d.ts +0 -0
- /package/{src → dist}/index.d.ts +0 -0
- /package/{src → dist}/provider.d.ts +0 -0
- /package/{src → dist}/routes/family-member-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-transcribe.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-compat-routes.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-tts-route.d.ts +0 -0
- /package/{src → dist}/routes/transcript-audio-store.d.ts +0 -0
- /package/{src → dist}/routes/voice-first-run-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-models-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profile-plugin-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profiles-management-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-speaker-profile-routes.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-manager-support.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-presets.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-warmup-policy.d.ts +0 -0
- /package/{src → dist}/services/bundled-models.d.ts +0 -0
- /package/{src → dist}/services/cache-bridge.d.ts +0 -0
- /package/{src → dist}/services/checkpoint-client.d.ts +0 -0
- /package/{src → dist}/services/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/conversation-registry.d.ts +0 -0
- /package/{src → dist}/services/device-bridge.d.ts +0 -0
- /package/{src → dist}/services/device-resource-metrics.d.ts +0 -0
- /package/{src → dist}/services/external-scanner.d.ts +0 -0
- /package/{src → dist}/services/gpu-detect.d.ts +0 -0
- /package/{src → dist}/services/handler-registry.d.ts +0 -0
- /package/{src → dist}/services/hardware.d.ts +0 -0
- /package/{src → dist}/services/image-description-runtime.d.ts +0 -0
- /package/{src → dist}/services/imagegen/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/backend-selector.d.ts +0 -0
- /package/{src → dist}/services/imagegen/coreml-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/errors.d.ts +0 -0
- /package/{src → dist}/services/imagegen/index.d.ts +0 -0
- /package/{src → dist}/services/imagegen/mflux.d.ts +0 -0
- /package/{src → dist}/services/imagegen/tensorrt-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/types.d.ts +0 -0
- /package/{src → dist}/services/inference-capabilities.d.ts +0 -0
- /package/{src → dist}/services/inference-telemetry.d.ts +0 -0
- /package/{src → dist}/services/kv-spill.d.ts +0 -0
- /package/{src → dist}/services/latency-trace.d.ts +0 -0
- /package/{src → dist}/services/llm-streaming-binding.d.ts +0 -0
- /package/{src → dist}/services/load-args.d.ts +0 -0
- /package/{src → dist}/services/manifest/validator.d.ts +0 -0
- /package/{src → dist}/services/memory-pressure.d.ts +0 -0
- /package/{src → dist}/services/mtp-doctor.d.ts +0 -0
- /package/{src → dist}/services/network-policy.d.ts +0 -0
- /package/{src → dist}/services/paths.d.ts +0 -0
- /package/{src → dist}/services/planner-skeleton.d.ts +0 -0
- /package/{src → dist}/services/providers.d.ts +0 -0
- /package/{src → dist}/services/ram-budget.d.ts +0 -0
- /package/{src → dist}/services/readiness.d.ts +0 -0
- /package/{src → dist}/services/recommendation.d.ts +0 -0
- /package/{src → dist}/services/routing-preferences.d.ts +0 -0
- /package/{src → dist}/services/runtime-target.d.ts +0 -0
- /package/{src → dist}/services/session-pool.d.ts +0 -0
- /package/{src → dist}/services/structured-output/deterministic-repair.d.ts +0 -0
- /package/{src → dist}/services/structured-output.d.ts +0 -0
- /package/{src → dist}/services/system-memory.d.ts +0 -0
- /package/{src → dist}/services/verify-on-device.d.ts +0 -0
- /package/{src → dist}/services/verify.d.ts +0 -0
- /package/{src → dist}/services/vision/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/vision/capacitor-llama.d.ts +0 -0
- /package/{src → dist}/services/vision/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/vision/hash.d.ts +0 -0
- /package/{src → dist}/services/vision/llama-server.d.ts +0 -0
- /package/{src → dist}/services/vision/vast-fallback.d.ts +0 -0
- /package/{src → dist}/services/voice/barge-in.d.ts +0 -0
- /package/{src → dist}/services/voice/cancellation-coordinator.d.ts +0 -0
- /package/{src → dist}/services/voice/checkpoint-manager.d.ts +0 -0
- /package/{src → dist}/services/voice/eager-context-builder.d.ts +0 -0
- /package/{src → dist}/services/voice/emotion-attribution.d.ts +0 -0
- /package/{src → dist}/services/voice/first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/kokoro-runtime.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/phonemizer.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/types.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voice-presets.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voices.d.ts +0 -0
- /package/{src → dist}/services/voice/lifecycle.d.ts +0 -0
- /package/{src → dist}/services/voice/optimistic-policy.d.ts +0 -0
- /package/{src → dist}/services/voice/phoneme-tokenizer.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-chunker.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline-impls.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/prefill-client.d.ts +0 -0
- /package/{src → dist}/services/voice/prefix-preserving-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/profile-store.d.ts +0 -0
- /package/{src → dist}/services/voice/ring-buffer.d.ts +0 -0
- /package/{src → dist}/services/voice/rollback-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-placeholder.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-regenerator.d.ts +0 -0
- /package/{src → dist}/services/voice/scheduler.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/attribution-pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-ggml.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-imprint.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-preset-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/system-audio-sink.d.ts +0 -0
- /package/{src → dist}/services/voice/transcript-knowledge.d.ts +0 -0
- /package/{src → dist}/services/voice/turn-controller.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-budget.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-emotion-classifier.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-artifact.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-routes.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-state-machine.d.ts +0 -0
- /package/{src → dist}/services/voice/wake-word.d.ts +0 -0
- /package/{src → dist}/services/voice/wrap-with-first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice-model-updater.d.ts +0 -0
- /package/{src → dist}/services/voice-prewarm.d.ts +0 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
// Coverage for the pure backend-routing decision (#8848). decideBackend is the
|
|
2
|
+
// optimization-kernel enforcement point: a catalog model declares the fused
|
|
3
|
+
// kernels it requires (turbo/qjl/polarquant/…), and the dispatcher computes
|
|
4
|
+
// which of them the installed binary's CAPABILITIES.json does NOT satisfy so the
|
|
5
|
+
// caller can tell the operator to rebuild instead of letting the model silently
|
|
6
|
+
// run de-optimized. These branches gate that contract, so they are pinned here.
|
|
7
|
+
|
|
8
|
+
import type { CatalogModel } from "@elizaos/shared";
|
|
9
|
+
import { describe, expect, it } from "vitest";
|
|
10
|
+
import { decideBackend } from "./backend";
|
|
11
|
+
|
|
12
|
+
/** Minimal CatalogModel carrying only the requiresKernel field decideBackend reads. */
|
|
13
|
+
function catalog(requiresKernel: string[]): CatalogModel {
|
|
14
|
+
return {
|
|
15
|
+
runtime: { optimizations: { requiresKernel } },
|
|
16
|
+
} as unknown as CatalogModel;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
describe("decideBackend", () => {
|
|
20
|
+
it("routes to llama-cpp with reason=env-override when forced, regardless of kernels", () => {
|
|
21
|
+
const d = decideBackend({
|
|
22
|
+
override: "llama-cpp",
|
|
23
|
+
catalog: catalog(["turbo3"]),
|
|
24
|
+
llamaCppAvailable: true,
|
|
25
|
+
});
|
|
26
|
+
expect(d.backend).toBe("llama-cpp");
|
|
27
|
+
expect(d.reason).toBe("env-override");
|
|
28
|
+
expect(d.kernels).toEqual(["turbo3"]);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it("uses reason=default for a model with no required kernels", () => {
|
|
32
|
+
const d = decideBackend({
|
|
33
|
+
override: "auto",
|
|
34
|
+
catalog: undefined,
|
|
35
|
+
llamaCppAvailable: true,
|
|
36
|
+
});
|
|
37
|
+
expect(d.reason).toBe("default");
|
|
38
|
+
expect(d.kernels).toEqual([]);
|
|
39
|
+
expect(d.unsatisfiedKernels).toBeUndefined();
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("uses reason=kernel-required when the catalog declares required kernels", () => {
|
|
43
|
+
const d = decideBackend({
|
|
44
|
+
override: "auto",
|
|
45
|
+
catalog: catalog(["turbo3", "qjl_full"]),
|
|
46
|
+
llamaCppAvailable: true,
|
|
47
|
+
});
|
|
48
|
+
expect(d.reason).toBe("kernel-required");
|
|
49
|
+
expect(d.kernels).toEqual(["turbo3", "qjl_full"]);
|
|
50
|
+
// No CAPABILITIES probe → trust the declaration, defer to load attempt.
|
|
51
|
+
expect(d.unsatisfiedKernels).toBeUndefined();
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it("flags the required kernels the installed binary does not satisfy", () => {
|
|
55
|
+
const d = decideBackend({
|
|
56
|
+
override: "auto",
|
|
57
|
+
catalog: catalog(["turbo3", "qjl_full"]),
|
|
58
|
+
llamaCppAvailable: true,
|
|
59
|
+
binaryKernels: { turbo3: true, qjl_full: false },
|
|
60
|
+
});
|
|
61
|
+
// turbo3 satisfied, qjl_full not → operator must rebuild for qjl_full.
|
|
62
|
+
expect(d.unsatisfiedKernels).toEqual(["qjl_full"]);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it("reports an empty unsatisfied set when every required kernel is present", () => {
|
|
66
|
+
const d = decideBackend({
|
|
67
|
+
override: "auto",
|
|
68
|
+
catalog: catalog(["turbo3"]),
|
|
69
|
+
llamaCppAvailable: true,
|
|
70
|
+
binaryKernels: { turbo3: true },
|
|
71
|
+
});
|
|
72
|
+
expect(d.unsatisfiedKernels).toEqual([]);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it("treats a kernel missing from the probe as unsatisfied (not silently ok)", () => {
|
|
76
|
+
const d = decideBackend({
|
|
77
|
+
override: "auto",
|
|
78
|
+
catalog: catalog(["turbo3", "polarquant"]),
|
|
79
|
+
llamaCppAvailable: true,
|
|
80
|
+
binaryKernels: { turbo3: true }, // polarquant absent from the map
|
|
81
|
+
});
|
|
82
|
+
expect(d.unsatisfiedKernels).toEqual(["polarquant"]);
|
|
83
|
+
});
|
|
84
|
+
});
|
package/src/services/backend.ts
CHANGED
|
@@ -1,21 +1,33 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Local-inference backend interface and dispatcher.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Both shipping backends are served by the SAME in-process FFI
|
|
5
|
+
* `libelizainference` library behind the SAME streaming symbols — the
|
|
6
|
+
* difference is which in-process runtime the FFI's `llm_backend_select` drives:
|
|
5
7
|
*
|
|
6
|
-
* - `llama-cpp`
|
|
8
|
+
* - `llama-cpp` → the optimized in-process FFI llama.cpp path (the default).
|
|
7
9
|
* MTP, n-gram drafter, lookahead, `-ot` MoE offload, TurboQuant KV
|
|
8
|
-
* cache, mlock/no-mmap/mmproj, etc. all live here.
|
|
10
|
+
* cache, mlock/no-mmap/mmproj, etc. all live here. Serves the `.gguf`.
|
|
11
|
+
* - `litert-lm` → the in-process LiteRT-LM backend (Android NPU / GPU
|
|
12
|
+
* delegate, gated `-DELIZA_ENABLE_LITERT`). Serves a `.litertlm` text
|
|
13
|
+
* artifact staged under `<bundleRoot>/text/`. The dispatcher passes
|
|
14
|
+
* `ELIZA_LLM_BACKEND=litert-lm` through the load; the C-side
|
|
15
|
+
* `llm_backend_select` reads it (and probes `text/*.litertlm`) and routes
|
|
16
|
+
* to the LiteRT factory. See `tools/omnivoice/src/llm-backend.h`.
|
|
9
17
|
*
|
|
10
18
|
* The dispatcher decides which one to use per-load based on:
|
|
11
19
|
*
|
|
12
|
-
* 1.
|
|
20
|
+
* 1. `ELIZA_INFERENCE_BACKEND` env override — `llama-cpp` / `litert-lm` /
|
|
21
|
+
* `auto`. A `litert-lm` force is honoured only when the build/platform
|
|
22
|
+
* supports LiteRT and the bundle ships a `.litertlm` (else hard error).
|
|
23
|
+
* 2. A `.litertlm` text artifact in the bundle AND LiteRT support on this
|
|
24
|
+
* build/platform → `litert-lm`. GGUF stays the default whenever the
|
|
25
|
+
* LiteRT artifact or the build support is absent.
|
|
26
|
+
* 3. Catalog `runtime.optimizations.requiresKernel` — if any specialised
|
|
13
27
|
* llama.cpp kernel is required (e.g. `turbo3`), the
|
|
14
|
-
* dispatcher
|
|
28
|
+
* dispatcher picks `llama-cpp`. Legacy bindings cannot
|
|
15
29
|
* provide these kernels at all.
|
|
16
|
-
*
|
|
17
|
-
* compatibility, but generation still routes through `llama-cpp`.
|
|
18
|
-
* 3. Default: optimized llama.cpp FFI.
|
|
30
|
+
* 4. Default: optimized llama.cpp FFI.
|
|
19
31
|
*
|
|
20
32
|
* The dispatcher does NOT own backend internals. It owns selection only,
|
|
21
33
|
* plus a small load-state
|
|
@@ -53,6 +65,13 @@ export interface BackendLoadOverrides {
|
|
|
53
65
|
bundleRoot?: string;
|
|
54
66
|
/** Manifest path for direct bundle loads not present in the registry. */
|
|
55
67
|
manifestPath?: string;
|
|
68
|
+
/**
|
|
69
|
+
* Absolute path to a `.litertlm` LiteRT-LM text artifact staged under
|
|
70
|
+
* `<bundleRoot>/text/`, when the bundle ships one. Presence (plus LiteRT
|
|
71
|
+
* build/platform support) routes the load to the `litert-lm` backend; the
|
|
72
|
+
* `.gguf` `modelPath` stays the GGUF default otherwise.
|
|
73
|
+
*/
|
|
74
|
+
litertModelPath?: string;
|
|
56
75
|
}
|
|
57
76
|
|
|
58
77
|
export interface BackendPlan {
|
|
@@ -112,6 +131,14 @@ export interface GenerateArgs extends StructuredGenerateParams {
|
|
|
112
131
|
* streams even when a `grammar` is set).
|
|
113
132
|
*/
|
|
114
133
|
onTextChunk?: (chunk: string) => void | Promise<void>;
|
|
134
|
+
/**
|
|
135
|
+
* Max tokens the FFI backend decodes per `llmStreamNext` step — i.e. the
|
|
136
|
+
* granularity of `onTextChunk` emission. Smaller ⇒ smoother token-by-token
|
|
137
|
+
* streaming to the UI at the cost of more FFI round-trips per response.
|
|
138
|
+
* Unset ⇒ the backend default (coarse, throughput-tuned). The text/chat
|
|
139
|
+
* handler sets a small value for smooth streaming; voice leaves it unset.
|
|
140
|
+
*/
|
|
141
|
+
maxTokensPerStep?: number;
|
|
115
142
|
/**
|
|
116
143
|
* Whether this generation is user-visible text and therefore eligible for
|
|
117
144
|
* voice-mode TTS. Internal JSON / planner calls must not be spoken.
|
|
@@ -143,6 +170,16 @@ export interface LocalGenerateWithUsageResult {
|
|
|
143
170
|
};
|
|
144
171
|
}
|
|
145
172
|
|
|
173
|
+
/**
|
|
174
|
+
* The in-process runtime the FFI streaming pipe drives for a given load.
|
|
175
|
+
* `llama-cpp` is the default GGUF path; `litert-lm` is the LiteRT-LM
|
|
176
|
+
* `.litertlm` path (same FFI symbols, selected via `ELIZA_LLM_BACKEND` +
|
|
177
|
+
* the C-side `llm_backend_select`). This is the dispatcher's *selection*,
|
|
178
|
+
* distinct from `LocalInferenceBackend.id` (the implementation surface, which
|
|
179
|
+
* stays the single fused FFI backend regardless of the runtime it drives).
|
|
180
|
+
*/
|
|
181
|
+
export type BackendId = "llama-cpp" | "litert-lm";
|
|
182
|
+
|
|
146
183
|
export interface LocalRuntimeLoadConfig {
|
|
147
184
|
modelId: string | null;
|
|
148
185
|
modelPath: string | null;
|
|
@@ -152,7 +189,7 @@ export interface LocalRuntimeLoadConfig {
|
|
|
152
189
|
gpuLayers: number | null;
|
|
153
190
|
parallel: number;
|
|
154
191
|
binaryPath: string | null;
|
|
155
|
-
backend:
|
|
192
|
+
backend: BackendId | null;
|
|
156
193
|
mtp: {
|
|
157
194
|
specType: "draft-mtp";
|
|
158
195
|
draftMin: number;
|
|
@@ -169,7 +206,7 @@ export interface LocalRuntimeLoadConfig {
|
|
|
169
206
|
*/
|
|
170
207
|
export interface LocalInferenceBackend {
|
|
171
208
|
/** Identifier for the concrete backend implementation. */
|
|
172
|
-
readonly id: "
|
|
209
|
+
readonly id: "llama-cpp";
|
|
173
210
|
available(): Promise<boolean>;
|
|
174
211
|
load(plan: BackendPlan): Promise<void>;
|
|
175
212
|
unload(): Promise<void>;
|
|
@@ -201,6 +238,12 @@ export interface LocalInferenceBackend {
|
|
|
201
238
|
maxTokens?: number;
|
|
202
239
|
temperature?: number;
|
|
203
240
|
signal?: AbortSignal;
|
|
241
|
+
/** Per-token callback for streaming vision describe (ABI v13). When set and
|
|
242
|
+
* the backend supports streaming, the description is decoded token-by-token
|
|
243
|
+
* through the same pipe as chat text; otherwise the backend returns the
|
|
244
|
+
* full description and ignores it. */
|
|
245
|
+
onTextChunk?: (chunk: string) => void | Promise<void>;
|
|
246
|
+
maxTokensPerStep?: number;
|
|
204
247
|
}): Promise<{
|
|
205
248
|
text: string;
|
|
206
249
|
projectorMs?: number;
|
|
@@ -249,7 +292,15 @@ export interface LocalInferenceBackend {
|
|
|
249
292
|
currentRuntimeLoadConfig?(): LocalRuntimeLoadConfig | null;
|
|
250
293
|
}
|
|
251
294
|
|
|
252
|
-
export type BackendOverride = "auto" | "llama-cpp";
|
|
295
|
+
export type BackendOverride = "auto" | "llama-cpp" | "litert-lm";
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* The env name the C-side `llm_backend_select` reads to HARD-select an
|
|
299
|
+
* in-process runtime. The dispatcher sets it to `litert-lm` for a LiteRT load
|
|
300
|
+
* and clears it for a llama.cpp load so a prior LiteRT select never leaks into
|
|
301
|
+
* the next GGUF load. Mirrors `tools/omnivoice/src/llm-backend.h`.
|
|
302
|
+
*/
|
|
303
|
+
export const ELIZA_LLM_BACKEND_ENV = "ELIZA_LLM_BACKEND" as const;
|
|
253
304
|
|
|
254
305
|
export function readBackendOverride(): BackendOverride {
|
|
255
306
|
const raw = process.env.ELIZA_INFERENCE_BACKEND?.trim().toLowerCase();
|
|
@@ -257,9 +308,42 @@ export function readBackendOverride(): BackendOverride {
|
|
|
257
308
|
if (raw === "llama-cpp") {
|
|
258
309
|
return "llama-cpp";
|
|
259
310
|
}
|
|
311
|
+
if (raw === "litert-lm" || raw === "litert" || raw === "litert_lm") {
|
|
312
|
+
return "litert-lm";
|
|
313
|
+
}
|
|
260
314
|
return "auto";
|
|
261
315
|
}
|
|
262
316
|
|
|
317
|
+
/**
|
|
318
|
+
* Whether the LiteRT-LM in-process backend is usable on THIS build/platform.
|
|
319
|
+
* The C-side `LlmBackendFactory::available()` is the runtime authority (it is
|
|
320
|
+
* compiled in only under `-DELIZA_ENABLE_LITERT` and reports false when the
|
|
321
|
+
* NPU/GPU delegate is absent), but the TS dispatcher must decide *before* the
|
|
322
|
+
* FFI load whether to route there at all, so we gate on the same signals the
|
|
323
|
+
* build/launcher exports:
|
|
324
|
+
*
|
|
325
|
+
* - `ELIZA_ENABLE_LITERT=1` — the explicit opt-in the LiteRT-enabled build
|
|
326
|
+
* sets (matches the `-DELIZA_ENABLE_LITERT` CMake gate).
|
|
327
|
+
* - `ELIZA_PLATFORM=android` — the NPU/GPU-delegate target where a LiteRT
|
|
328
|
+
* `.litertlm` bundle is the on-device path.
|
|
329
|
+
*
|
|
330
|
+
* A bundle that ships a `.litertlm` but runs on a build without LiteRT support
|
|
331
|
+
* loads the GGUF (`llama-cpp`) instead — the artifact is additive, never a
|
|
332
|
+
* requirement. Returns false unless one of the signals is present, so GGUF
|
|
333
|
+
* stays the default everywhere LiteRT is not wired.
|
|
334
|
+
*/
|
|
335
|
+
export function litertBackendSupported(
|
|
336
|
+
env: NodeJS.ProcessEnv = process.env,
|
|
337
|
+
): boolean {
|
|
338
|
+
if (envFlagIn(env, "ELIZA_ENABLE_LITERT")) return true;
|
|
339
|
+
return env.ELIZA_PLATFORM?.trim().toLowerCase() === "android";
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
function envFlagIn(env: NodeJS.ProcessEnv, name: string): boolean {
|
|
343
|
+
const v = env[name]?.trim().toLowerCase();
|
|
344
|
+
return v === "1" || v === "true" || v === "yes" || v === "on";
|
|
345
|
+
}
|
|
346
|
+
|
|
263
347
|
function envFlag(name: string): boolean {
|
|
264
348
|
const v = process.env[name]?.trim().toLowerCase();
|
|
265
349
|
return v === "1" || v === "true" || v === "yes" || v === "on";
|
|
@@ -310,9 +394,29 @@ export function __resetReducedModeWarnedForTests(): void {
|
|
|
310
394
|
}
|
|
311
395
|
|
|
312
396
|
export interface BackendDecision {
|
|
313
|
-
|
|
397
|
+
/**
|
|
398
|
+
* In-process runtime the dispatcher routes this load to. `llama-cpp` (the
|
|
399
|
+
* GGUF path) is the default; `litert-lm` is selected only when the bundle
|
|
400
|
+
* ships a `.litertlm` AND the build/platform supports LiteRT (or it was
|
|
401
|
+
* forced via `ELIZA_INFERENCE_BACKEND=litert-lm`). Both run through the same
|
|
402
|
+
* fused `libelizainference` FFI — the selection only changes the env the
|
|
403
|
+
* C-side `llm_backend_select` reads.
|
|
404
|
+
*/
|
|
405
|
+
backend: BackendId;
|
|
314
406
|
/** Why this backend was chosen — for diagnostics and warnings. */
|
|
315
|
-
reason:
|
|
407
|
+
reason:
|
|
408
|
+
| "env-override"
|
|
409
|
+
| "kernel-required"
|
|
410
|
+
| "preferred-backend"
|
|
411
|
+
| "litert-artifact"
|
|
412
|
+
| "default";
|
|
413
|
+
/**
|
|
414
|
+
* Absolute path to the selected `.litertlm` artifact when `backend ===
|
|
415
|
+
* "litert-lm"`, else undefined. The dispatcher exports
|
|
416
|
+
* `ELIZA_LLM_BACKEND=litert-lm` for this load so the FFI picks the LiteRT
|
|
417
|
+
* factory; the path is surfaced for diagnostics.
|
|
418
|
+
*/
|
|
419
|
+
litertModelPath?: string;
|
|
316
420
|
/** Required kernels declared by the catalog, when any. */
|
|
317
421
|
kernels: LocalRuntimeKernel[];
|
|
318
422
|
/**
|
|
@@ -331,19 +435,30 @@ export interface BackendDecision {
|
|
|
331
435
|
* Pure decision function. Easy to unit-test without spawning anything.
|
|
332
436
|
*
|
|
333
437
|
* Inputs are deliberately explicit — the caller resolves the catalog entry,
|
|
334
|
-
* the binary availability,
|
|
438
|
+
* the binary availability, the env override, and (for LiteRT) the staged
|
|
439
|
+
* `.litertlm` path + the build/platform support flag before calling us.
|
|
335
440
|
*
|
|
336
441
|
* `binaryKernels`, when present, is the parsed CAPABILITIES.json kernels
|
|
337
442
|
* map from the installed llama.cpp FFI runtime. The dispatcher uses it to
|
|
338
443
|
* compute `unsatisfiedKernels`; null means the binary is older / has no
|
|
339
444
|
* capabilities probe, in which case we trust the model's declaration and
|
|
340
445
|
* let the load attempt clarify.
|
|
446
|
+
*
|
|
447
|
+
* `litertModelPath` is the absolute path to a `.litertlm` text artifact when
|
|
448
|
+
* the bundle ships one (else undefined); `litertSupported` is whether this
|
|
449
|
+
* build/platform can run LiteRT ({@link litertBackendSupported}). LiteRT is
|
|
450
|
+
* selected only when BOTH hold, or when forced via
|
|
451
|
+
* `ELIZA_INFERENCE_BACKEND=litert-lm` (a forced LiteRT select with no
|
|
452
|
+
* `.litertlm` or no support throws — no silent downgrade to GGUF). GGUF stays
|
|
453
|
+
* the default in every other case.
|
|
341
454
|
*/
|
|
342
455
|
export function decideBackend(input: {
|
|
343
456
|
override: BackendOverride;
|
|
344
457
|
catalog: CatalogModel | undefined;
|
|
345
458
|
llamaCppAvailable: boolean;
|
|
346
459
|
binaryKernels?: Partial<Record<LocalRuntimeKernel | string, boolean>> | null;
|
|
460
|
+
litertModelPath?: string | null;
|
|
461
|
+
litertSupported?: boolean;
|
|
347
462
|
}): BackendDecision {
|
|
348
463
|
const { override, catalog } = input;
|
|
349
464
|
const optimizations = catalog?.runtime?.optimizations;
|
|
@@ -352,7 +467,38 @@ export function decideBackend(input: {
|
|
|
352
467
|
kernels,
|
|
353
468
|
input.binaryKernels ?? null,
|
|
354
469
|
);
|
|
470
|
+
const litertModelPath = input.litertModelPath ?? undefined;
|
|
471
|
+
const litertSupported = input.litertSupported ?? false;
|
|
472
|
+
|
|
473
|
+
// `ELIZA_INFERENCE_BACKEND=litert-lm` HARD-forces the LiteRT runtime. It is a
|
|
474
|
+
// real select, not a hint: a forced LiteRT load with no staged `.litertlm`
|
|
475
|
+
// or on a build without LiteRT support is an error, never a silent fall back
|
|
476
|
+
// to GGUF (Commandment 8 — don't paper over a broken pipeline).
|
|
477
|
+
if (override === "litert-lm") {
|
|
478
|
+
if (!litertSupported) {
|
|
479
|
+
throw new Error(
|
|
480
|
+
"[local-inference] ELIZA_INFERENCE_BACKEND=litert-lm forces the LiteRT-LM " +
|
|
481
|
+
"backend, but this build/platform does not support it (set ELIZA_ENABLE_LITERT=1 " +
|
|
482
|
+
"on a LiteRT-enabled build, or run on android). Use llama-cpp, or unset the override.",
|
|
483
|
+
);
|
|
484
|
+
}
|
|
485
|
+
if (!litertModelPath) {
|
|
486
|
+
throw new Error(
|
|
487
|
+
"[local-inference] ELIZA_INFERENCE_BACKEND=litert-lm forces the LiteRT-LM " +
|
|
488
|
+
"backend, but the bundle ships no .litertlm text artifact under text/. " +
|
|
489
|
+
"Stage a .litertlm into the bundle, or use llama-cpp.",
|
|
490
|
+
);
|
|
491
|
+
}
|
|
492
|
+
return {
|
|
493
|
+
backend: "litert-lm",
|
|
494
|
+
reason: "env-override",
|
|
495
|
+
litertModelPath,
|
|
496
|
+
kernels,
|
|
497
|
+
unsatisfiedKernels,
|
|
498
|
+
};
|
|
499
|
+
}
|
|
355
500
|
|
|
501
|
+
// `ELIZA_INFERENCE_BACKEND=llama-cpp` forces the fused GGUF path explicitly.
|
|
356
502
|
if (override === "llama-cpp") {
|
|
357
503
|
return {
|
|
358
504
|
backend: "llama-cpp",
|
|
@@ -362,6 +508,19 @@ export function decideBackend(input: {
|
|
|
362
508
|
};
|
|
363
509
|
}
|
|
364
510
|
|
|
511
|
+
// Auto: when the bundle ships a `.litertlm` AND this build/platform supports
|
|
512
|
+
// LiteRT, route there (it is the on-device NPU/GPU-delegate path). GGUF stays
|
|
513
|
+
// the default whenever the artifact or the support is absent.
|
|
514
|
+
if (litertSupported && litertModelPath) {
|
|
515
|
+
return {
|
|
516
|
+
backend: "litert-lm",
|
|
517
|
+
reason: "litert-artifact",
|
|
518
|
+
litertModelPath,
|
|
519
|
+
kernels,
|
|
520
|
+
unsatisfiedKernels,
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
|
|
365
524
|
if (kernels.length > 0) {
|
|
366
525
|
return {
|
|
367
526
|
backend: "llama-cpp",
|
|
@@ -412,7 +571,7 @@ export function resolveCatalogForPlan(
|
|
|
412
571
|
* they differ.
|
|
413
572
|
*/
|
|
414
573
|
export class BackendDispatcher implements LocalInferenceBackend {
|
|
415
|
-
readonly id = "
|
|
574
|
+
readonly id = "llama-cpp" as const;
|
|
416
575
|
// The dispatcher's `id` is informational; the active backend's id is what
|
|
417
576
|
// matters for diagnostics. We expose `activeBackendId()` for that.
|
|
418
577
|
|
|
@@ -438,7 +597,7 @@ export class BackendDispatcher implements LocalInferenceBackend {
|
|
|
438
597
|
return this.ffiStreaming.available();
|
|
439
598
|
}
|
|
440
599
|
|
|
441
|
-
activeBackendId(): "
|
|
600
|
+
activeBackendId(): "llama-cpp" | null {
|
|
442
601
|
return this.active ? this.active.id : null;
|
|
443
602
|
}
|
|
444
603
|
|
|
@@ -457,13 +616,33 @@ export class BackendDispatcher implements LocalInferenceBackend {
|
|
|
457
616
|
catalog,
|
|
458
617
|
llamaCppAvailable: this.probeFfiAvailable(),
|
|
459
618
|
binaryKernels: this.probeBinaryKernels?.() ?? null,
|
|
619
|
+
litertModelPath: plan.overrides?.litertModelPath ?? null,
|
|
620
|
+
litertSupported: litertBackendSupported(),
|
|
460
621
|
});
|
|
461
622
|
}
|
|
462
623
|
|
|
463
624
|
async load(plan: BackendPlan): Promise<void> {
|
|
464
|
-
let effectivePlan = plan;
|
|
465
625
|
const decision = this.decide(plan);
|
|
466
|
-
|
|
626
|
+
|
|
627
|
+
// Tell the C-side `llm_backend_select` which in-process runtime to drive.
|
|
628
|
+
// `litert-lm` sets the HARD select; the GGUF path clears it so a prior
|
|
629
|
+
// LiteRT select never leaks into the next llama.cpp load. The FFI library
|
|
630
|
+
// is the same singleton either way (`this.ffiStreaming`); only the env
|
|
631
|
+
// (read at `_open`) changes which factory it picks.
|
|
632
|
+
if (decision.backend === "litert-lm") {
|
|
633
|
+
process.env[ELIZA_LLM_BACKEND_ENV] = "litert-lm";
|
|
634
|
+
} else {
|
|
635
|
+
delete process.env[ELIZA_LLM_BACKEND_ENV];
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
let effectivePlan = plan;
|
|
639
|
+
// Kernel-mismatch enforcement is a llama.cpp-only contract — the LiteRT
|
|
640
|
+
// `.litertlm` path uses none of the fork's KV kernels, so skip it there.
|
|
641
|
+
if (
|
|
642
|
+
decision.backend === "llama-cpp" &&
|
|
643
|
+
decision.unsatisfiedKernels &&
|
|
644
|
+
decision.unsatisfiedKernels.length > 0
|
|
645
|
+
) {
|
|
467
646
|
const missing = decision.unsatisfiedKernels.join(", ");
|
|
468
647
|
if (localAllowStockKv()) {
|
|
469
648
|
// Reduced-optimization local mode: the build hasn't dispatched these
|
|
@@ -488,7 +667,7 @@ export class BackendDispatcher implements LocalInferenceBackend {
|
|
|
488
667
|
);
|
|
489
668
|
}
|
|
490
669
|
}
|
|
491
|
-
if (
|
|
670
|
+
if (!this.probeFfiAvailable()) {
|
|
492
671
|
throw new Error(
|
|
493
672
|
"[local-inference] Optimized llama.cpp requires the in-process FFI backend. " +
|
|
494
673
|
"Install/rebuild libelizainference with streaming-LLM + MTP support; " +
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
1
2
|
import net from "node:net";
|
|
3
|
+
import os from "node:os";
|
|
4
|
+
import path from "node:path";
|
|
2
5
|
import { afterEach, describe, expect, it } from "vitest";
|
|
3
6
|
import { BionicHostLoader } from "./bionic-host-loader";
|
|
4
7
|
|
|
@@ -38,14 +41,35 @@ function startHost(
|
|
|
38
41
|
}
|
|
39
42
|
|
|
40
43
|
let host: net.Server | null = null;
|
|
44
|
+
const tempDirs: string[] = [];
|
|
41
45
|
afterEach(() => {
|
|
42
46
|
host?.close();
|
|
43
47
|
host = null;
|
|
48
|
+
for (const dir of tempDirs.splice(0)) {
|
|
49
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
50
|
+
}
|
|
44
51
|
});
|
|
45
52
|
|
|
46
53
|
const SOCK = `eliza-bionic-test-${process.pid}`;
|
|
54
|
+
const describeLinuxOnly =
|
|
55
|
+
process.platform === "linux" ? describe : describe.skip;
|
|
47
56
|
|
|
48
|
-
|
|
57
|
+
function makeBundleModelPath(manifest: unknown = {}): string {
|
|
58
|
+
const bundleRoot = fs.mkdtempSync(
|
|
59
|
+
path.join(os.tmpdir(), "eliza-bionic-bundle-"),
|
|
60
|
+
);
|
|
61
|
+
tempDirs.push(bundleRoot);
|
|
62
|
+
fs.mkdirSync(path.join(bundleRoot, "text"), { recursive: true });
|
|
63
|
+
fs.mkdirSync(path.join(bundleRoot, "asr"), { recursive: true });
|
|
64
|
+
fs.writeFileSync(path.join(bundleRoot, "asr", "gemma-asr.gguf"), "asr");
|
|
65
|
+
fs.writeFileSync(
|
|
66
|
+
path.join(bundleRoot, "eliza-1.manifest.json"),
|
|
67
|
+
JSON.stringify(manifest),
|
|
68
|
+
);
|
|
69
|
+
return path.join(bundleRoot, "text", "model.gguf");
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
describeLinuxOnly("BionicHostLoader (real abstract-UDS)", () => {
|
|
49
73
|
it("round-trips a buffered generate and returns the host completion", async () => {
|
|
50
74
|
let seen: Record<string, unknown> | null = null;
|
|
51
75
|
host = startHost(SOCK, (req) => {
|
|
@@ -130,4 +154,73 @@ describe("BionicHostLoader (real abstract-UDS)", () => {
|
|
|
130
154
|
await loader.loadModel({ modelPath: "/m/text/x.gguf" });
|
|
131
155
|
await expect(loader.generate({ prompt: "x" })).rejects.toThrow();
|
|
132
156
|
});
|
|
157
|
+
|
|
158
|
+
it("transcribe forwards op=asr with pcm + sampleRate and returns the transcript", async () => {
|
|
159
|
+
let seen: Record<string, unknown> | null = null;
|
|
160
|
+
host = startHost(SOCK, (req) => {
|
|
161
|
+
seen = req;
|
|
162
|
+
return JSON.stringify({ ok: true, text: "the quick brown fox" });
|
|
163
|
+
});
|
|
164
|
+
const loader = new BionicHostLoader(SOCK);
|
|
165
|
+
const modelPath = makeBundleModelPath();
|
|
166
|
+
await loader.loadModel({ modelPath });
|
|
167
|
+
const out = await loader.transcribe({
|
|
168
|
+
pcmBase64: "AAAA",
|
|
169
|
+
sampleRate: 16000,
|
|
170
|
+
});
|
|
171
|
+
expect(out).toBe("the quick brown fox");
|
|
172
|
+
expect(seen).toMatchObject({
|
|
173
|
+
op: "asr",
|
|
174
|
+
pcmBase64: "AAAA",
|
|
175
|
+
sampleRate: 16000,
|
|
176
|
+
bundleDir: path.dirname(path.dirname(modelPath)),
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
it("transcribe refuses Qwen ASR provenance before contacting the host", async () => {
|
|
181
|
+
const loader = new BionicHostLoader(SOCK);
|
|
182
|
+
await loader.loadModel({
|
|
183
|
+
modelPath: makeBundleModelPath({
|
|
184
|
+
lineage: { asr: { base: "Qwen3-ASR" } },
|
|
185
|
+
}),
|
|
186
|
+
});
|
|
187
|
+
await expect(
|
|
188
|
+
loader.transcribe({ pcmBase64: "AAAA", sampleRate: 16000 }),
|
|
189
|
+
).rejects.toThrow(/Qwen ASR provenance/);
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
it("describeImage forwards op=image with bytes + prompt and returns the description", async () => {
|
|
193
|
+
let seen: Record<string, unknown> | null = null;
|
|
194
|
+
host = startHost(SOCK, (req) => {
|
|
195
|
+
seen = req;
|
|
196
|
+
return JSON.stringify({ ok: true, text: "a cat on a desk" });
|
|
197
|
+
});
|
|
198
|
+
const loader = new BionicHostLoader(SOCK);
|
|
199
|
+
await loader.loadModel({
|
|
200
|
+
modelPath: "/data/x/eliza-1/bundle/text/model.gguf",
|
|
201
|
+
});
|
|
202
|
+
const out = await loader.describeImage({
|
|
203
|
+
imageBase64: "iVBORw0K",
|
|
204
|
+
prompt: "what is this?",
|
|
205
|
+
});
|
|
206
|
+
expect(out).toBe("a cat on a desk");
|
|
207
|
+
expect(seen).toMatchObject({
|
|
208
|
+
op: "image",
|
|
209
|
+
imageBase64: "iVBORw0K",
|
|
210
|
+
prompt: "what is this?",
|
|
211
|
+
mmprojPath: "",
|
|
212
|
+
bundleDir: "/data/x/eliza-1/bundle",
|
|
213
|
+
});
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
it("transcribe throws on host ok:false", async () => {
|
|
217
|
+
host = startHost(SOCK, () =>
|
|
218
|
+
JSON.stringify({ ok: false, error: "no asr weights staged" }),
|
|
219
|
+
);
|
|
220
|
+
const loader = new BionicHostLoader(SOCK);
|
|
221
|
+
await loader.loadModel({ modelPath: makeBundleModelPath() });
|
|
222
|
+
await expect(
|
|
223
|
+
loader.transcribe({ pcmBase64: "AAAA", sampleRate: 16000 }),
|
|
224
|
+
).rejects.toThrow(/no asr weights staged/);
|
|
225
|
+
});
|
|
133
226
|
});
|
|
@@ -28,6 +28,10 @@ import type {
|
|
|
28
28
|
LocalInferenceLoadArgs,
|
|
29
29
|
LocalInferenceLoader,
|
|
30
30
|
} from "./active-model";
|
|
31
|
+
import {
|
|
32
|
+
bundleHasAsrModelFiles,
|
|
33
|
+
readBundleAsrProvenanceBlockers,
|
|
34
|
+
} from "./asr-provenance";
|
|
31
35
|
|
|
32
36
|
/** Connect + full round-trip budget. A cold GPU decode of a long reply fits. */
|
|
33
37
|
const REQUEST_TIMEOUT_MS = 120_000;
|
|
@@ -43,6 +47,13 @@ interface BionicGenerateResponse {
|
|
|
43
47
|
tokS?: number;
|
|
44
48
|
}
|
|
45
49
|
|
|
50
|
+
/** {ok, text} response for the asr / image ops (transcript / description). */
|
|
51
|
+
interface BionicTextResponse {
|
|
52
|
+
ok: boolean;
|
|
53
|
+
text?: string;
|
|
54
|
+
error?: string;
|
|
55
|
+
}
|
|
56
|
+
|
|
46
57
|
/**
|
|
47
58
|
* Derive the fused-bundle root from a model GGUF path. The host's
|
|
48
59
|
* `eliza_inference_create(bundleDir)` expects the directory that contains
|
|
@@ -106,6 +117,67 @@ export class BionicHostLoader implements LocalInferenceLoader {
|
|
|
106
117
|
return res.text ?? "";
|
|
107
118
|
}
|
|
108
119
|
|
|
120
|
+
/**
|
|
121
|
+
* On-device STT: transcribe mono fp32 PCM via the bionic host's fused
|
|
122
|
+
* Gemma ASR path (op="asr"). The musl agent can't load the fused lib, so
|
|
123
|
+
* the TRANSCRIPTION delegate routes the audio here over the UDS and gets
|
|
124
|
+
* the transcript back. `pcm` is little-endian fp32 already base64-encoded.
|
|
125
|
+
*/
|
|
126
|
+
async transcribe(args: {
|
|
127
|
+
pcmBase64: string;
|
|
128
|
+
sampleRate: number;
|
|
129
|
+
}): Promise<string> {
|
|
130
|
+
if (!this.bundleDir || !bundleHasAsrModelFiles(this.bundleDir)) {
|
|
131
|
+
throw new Error(
|
|
132
|
+
"[BionicHostLoader] host asr requires an active Gemma ASR-capable bundle; refusing to use the bionic host default bundle",
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
const blockers = readBundleAsrProvenanceBlockers(this.bundleDir);
|
|
136
|
+
if (blockers.length > 0) {
|
|
137
|
+
throw new Error(
|
|
138
|
+
`[BionicHostLoader] host asr refused non-Gemma ASR provenance: ${blockers.join("; ")}`,
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
const res = await this.roundTrip<BionicTextResponse>({
|
|
142
|
+
op: "asr",
|
|
143
|
+
bundleDir: this.bundleDir,
|
|
144
|
+
pcmBase64: args.pcmBase64,
|
|
145
|
+
sampleRate: args.sampleRate,
|
|
146
|
+
});
|
|
147
|
+
if (!res.ok) {
|
|
148
|
+
throw new Error(
|
|
149
|
+
`[BionicHostLoader] host asr failed: ${res.error ?? "unknown error"}`,
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
return res.text ?? "";
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* On-device vision / screen-recognition: describe a raw image (PNG/JPEG/WebP
|
|
157
|
+
* bytes, base64) via the bionic host's mmproj describe-image (op="image").
|
|
158
|
+
* `mmprojPath` may be empty — the host resolves the projector from the
|
|
159
|
+
* bundle's `vision/` dir.
|
|
160
|
+
*/
|
|
161
|
+
async describeImage(args: {
|
|
162
|
+
imageBase64: string;
|
|
163
|
+
mmprojPath?: string;
|
|
164
|
+
prompt?: string;
|
|
165
|
+
}): Promise<string> {
|
|
166
|
+
const res = await this.roundTrip<BionicTextResponse>({
|
|
167
|
+
op: "image",
|
|
168
|
+
bundleDir: this.bundleDir,
|
|
169
|
+
imageBase64: args.imageBase64,
|
|
170
|
+
mmprojPath: args.mmprojPath ?? "",
|
|
171
|
+
prompt: args.prompt ?? "",
|
|
172
|
+
});
|
|
173
|
+
if (!res.ok) {
|
|
174
|
+
throw new Error(
|
|
175
|
+
`[BionicHostLoader] host image describe failed: ${res.error ?? "unknown error"}`,
|
|
176
|
+
);
|
|
177
|
+
}
|
|
178
|
+
return res.text ?? "";
|
|
179
|
+
}
|
|
180
|
+
|
|
109
181
|
/**
|
|
110
182
|
* One request → one response over a fresh connection. Length-prefixed frames:
|
|
111
183
|
* `[int32 BE byte length][UTF-8 JSON]` in each direction.
|
|
@@ -109,28 +109,28 @@ describe("cache-bridge path layout", () => {
|
|
|
109
109
|
|
|
110
110
|
it("buildModelHash is stable + sensitive to its inputs", () => {
|
|
111
111
|
const a = buildModelHash({
|
|
112
|
-
targetModelPath: "/models/eliza-1-
|
|
113
|
-
drafterModelPath: "/models/eliza-1-
|
|
112
|
+
targetModelPath: "/models/eliza-1-2b.gguf",
|
|
113
|
+
drafterModelPath: "/models/eliza-1-2b-drafter.gguf",
|
|
114
114
|
cacheTypeK: "f16",
|
|
115
115
|
cacheTypeV: "f16",
|
|
116
116
|
});
|
|
117
117
|
const b = buildModelHash({
|
|
118
|
-
targetModelPath: "/models/eliza-1-
|
|
119
|
-
drafterModelPath: "/models/eliza-1-
|
|
118
|
+
targetModelPath: "/models/eliza-1-2b.gguf",
|
|
119
|
+
drafterModelPath: "/models/eliza-1-2b-drafter.gguf",
|
|
120
120
|
cacheTypeK: "f16",
|
|
121
121
|
cacheTypeV: "f16",
|
|
122
122
|
});
|
|
123
123
|
expect(a).toBe(b);
|
|
124
124
|
const c = buildModelHash({
|
|
125
|
-
targetModelPath: "/models/eliza-1-
|
|
125
|
+
targetModelPath: "/models/eliza-1-2b.gguf",
|
|
126
126
|
drafterModelPath: "/models/other-drafter.gguf",
|
|
127
127
|
cacheTypeK: "f16",
|
|
128
128
|
cacheTypeV: "f16",
|
|
129
129
|
});
|
|
130
130
|
expect(c).not.toBe(a);
|
|
131
131
|
const d = buildModelHash({
|
|
132
|
-
targetModelPath: "/models/eliza-1-
|
|
133
|
-
drafterModelPath: "/models/eliza-1-
|
|
132
|
+
targetModelPath: "/models/eliza-1-2b.gguf",
|
|
133
|
+
drafterModelPath: "/models/eliza-1-2b-drafter.gguf",
|
|
134
134
|
cacheTypeK: "q8_0",
|
|
135
135
|
cacheTypeV: "f16",
|
|
136
136
|
});
|