@elizaos/plugin-local-inference 2.0.3-beta.2 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -10
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +39647 -0
- package/dist/index.js.map +217 -0
- package/{src → dist}/local-inference-routes.d.ts +9 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts.map +1 -0
- package/{src → dist}/routes/compat-helpers.d.ts +1 -1
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/{src → dist}/routes/index.d.ts +1 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/{src → dist}/routes/live-diarization-route.d.ts +7 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/{src → dist}/routes/transcripts-routes.d.ts +8 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/{src → dist}/runtime/ensure-local-inference-handler.d.ts +8 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/{src → dist}/runtime/index.d.ts +1 -1
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/{src → dist}/runtime/voice-entity-binding.d.ts +10 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/{src → dist}/services/active-model.d.ts +28 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/{src → dist}/services/assignments.d.ts +16 -3
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/{src → dist}/services/backend.d.ts +110 -16
- package/dist/services/backend.d.ts.map +1 -0
- package/{src → dist}/services/bionic-host-loader.d.ts +21 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/{src → dist}/services/desktop-fused-ffi-backend-runtime.d.ts +22 -6
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/{src → dist}/services/device-tier.d.ts +19 -1
- package/dist/services/device-tier.d.ts.map +1 -0
- package/{src → dist}/services/downloader.d.ts +16 -4
- package/dist/services/downloader.d.ts.map +1 -0
- package/{src → dist}/services/engine.d.ts +43 -4
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-backend.d.ts +28 -7
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-runner.d.ts +24 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/{src → dist}/services/imagegen/sd-cpp.d.ts +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/{src → dist}/services/index.d.ts +3 -1
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/{src → dist}/services/manifest/schema.d.ts +196 -6
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/{src → dist}/services/manifest/types.d.ts +3 -1
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/{src → dist}/services/memory-arbiter.d.ts +33 -3
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/{src → dist}/services/memory-monitor.d.ts +6 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/{src → dist}/services/registry.d.ts +11 -13
- package/dist/services/registry.d.ts.map +1 -0
- package/{src → dist}/services/router-handler.d.ts +2 -2
- package/dist/services/router-handler.d.ts.map +1 -0
- package/{src → dist}/services/routing-policy.d.ts +32 -9
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/{src → dist}/services/service.d.ts +1 -1
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/{src → dist}/services/types.d.ts +1 -1
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/{src → dist}/services/vision/index.d.ts +1 -1
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/{src → dist}/services/vision/types.d.ts +13 -4
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/{src → dist}/services/vision-embedding-cache.d.ts +1 -1
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/audio-frame-consumer.d.ts +82 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/{src → dist}/services/voice/eliza1-eot-scorer.d.ts +8 -8
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/{src → dist}/services/voice/embedding.d.ts +2 -3
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/engine-bridge.d.ts +8 -5
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier-ggml.d.ts +22 -22
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier.d.ts +9 -12
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/{src → dist}/services/voice/errors.d.ts +1 -1
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/{src → dist}/services/voice/expressive-tags.d.ts +5 -5
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/{src → dist}/services/voice/ffi-bindings.d.ts +26 -4
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/{src → dist}/services/voice/fused-eot-scorer.d.ts +6 -6
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/{src → dist}/services/voice/index.d.ts +8 -3
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-backend.d.ts +15 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-engine-discovery.d.ts +1 -1
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-ffi-runtime.d.ts +3 -3
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/pick-runtime.d.ts +1 -1
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/{src → dist}/services/voice/mic-source.d.ts +1 -1
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/{src → dist}/services/voice/partial-stabilizer.d.ts +1 -1
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/{src → dist}/services/voice/shared-resources.d.ts +14 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcriber.d.ts +4 -4
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-service.d.ts +20 -1
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-store.d.ts +12 -1
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/{src → dist}/services/voice/types.d.ts +6 -6
- package/dist/services/voice/types.d.ts.map +1 -0
- package/{src → dist}/services/voice/vad.d.ts +6 -5
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/{src → dist}/services/voice/voice-preset-format.d.ts +2 -2
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/{src → dist}/services/voice/wake-word-ggml.d.ts +8 -9
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +28 -9
- package/registry-entry.json +137 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +1 -1
- package/src/adapters/capacitor-llama/index.ts +28 -4
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +2 -2
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +1 -1
- package/src/local-inference-routes.test.ts +57 -11
- package/src/local-inference-routes.ts +90 -8
- package/src/provider.ts +32 -3
- package/src/routes/compat-helpers.ts +2 -1
- package/src/routes/index.ts +1 -0
- package/src/routes/live-diarization-route.test.ts +134 -0
- package/src/routes/live-diarization-route.ts +79 -3
- package/src/routes/local-inference-asr-route.test.ts +43 -2
- package/src/routes/local-inference-asr-route.ts +7 -4
- package/src/routes/local-inference-asr-transcribe.test.ts +4 -4
- package/src/routes/local-inference-asr-transcribe.ts +1 -1
- package/src/routes/local-inference-compat-routes.test.ts +3 -3
- package/src/routes/local-inference-compat-routes.ts +23 -56
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcripts-routes.test.ts +51 -0
- package/src/routes/transcripts-routes.ts +35 -3
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +203 -5
- package/src/runtime/ensure-local-inference-handler.ts +203 -11
- package/src/runtime/index.ts +4 -1
- package/src/runtime/mobile-local-inference-gate.test.ts +85 -2
- package/src/runtime/mobile-local-inference-gate.ts +60 -5
- package/src/runtime/voice-entity-binding.transcript.test.ts +29 -0
- package/src/runtime/voice-entity-binding.ts +46 -6
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +2 -2
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model.ts +211 -8
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +26 -0
- package/src/services/assignments.ts +52 -4
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +198 -19
- package/src/services/bionic-host-loader.test.ts +94 -1
- package/src/services/bionic-host-loader.ts +72 -0
- package/src/services/cache-bridge.test.ts +7 -7
- package/src/services/catalog.test.ts +32 -11
- package/src/services/catalog.ts +6 -0
- package/src/services/cloud-fallback.ts +1 -1
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +99 -7
- package/src/services/device-tier.test.ts +89 -2
- package/src/services/device-tier.ts +103 -11
- package/src/services/downloader.test.ts +199 -58
- package/src/services/downloader.ts +141 -27
- package/src/services/engine-direct-bundle.test.ts +38 -6
- package/src/services/engine.ts +291 -104
- package/src/services/ensure-local-artifacts.ts +1 -1
- package/src/services/ffi-llm-streaming-abi.ts +6 -3
- package/src/services/ffi-streaming-backend.ts +44 -8
- package/src/services/ffi-streaming-runner.test.ts +163 -3
- package/src/services/ffi-streaming-runner.ts +54 -1
- package/src/services/ffi-unload-ordering.test.ts +5 -1
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/hardware.test.ts +7 -2
- package/src/services/hardware.ts +28 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/sd-cpp.ts +6 -9
- package/src/services/index.ts +18 -0
- package/src/services/ios-llama-streaming.ts +1 -1
- package/src/services/kv-spill.ts +6 -5
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +84 -2
- package/src/services/manifest/index.ts +6 -0
- package/src/services/manifest/manifest.test.ts +156 -54
- package/src/services/manifest/schema.ts +160 -52
- package/src/services/manifest/types.ts +6 -0
- package/src/services/manifest/validator.ts +91 -25
- package/src/services/memory-arbiter.test.ts +139 -0
- package/src/services/memory-arbiter.ts +81 -15
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +24 -0
- package/src/services/memory-monitor.ts +12 -0
- package/src/services/mtp-doctor.ts +10 -2
- package/src/services/network-policy.ts +5 -5
- package/src/services/ram-budget-cache.test.ts +2 -1
- package/src/services/ram-budget.ts +0 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/registry.ts +25 -19
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.ts +43 -24
- package/src/services/routing-policy.test.ts +211 -23
- package/src/services/routing-policy.ts +92 -22
- package/src/services/service.test.ts +3 -3
- package/src/services/service.ts +22 -7
- package/src/services/transcription-priority.test.ts +2 -2
- package/src/services/types.ts +4 -0
- package/src/services/verify-on-device.test.ts +2 -2
- package/src/services/vision/hash.ts +1 -1
- package/src/services/vision/index.ts +2 -2
- package/src/services/vision/llama-server.ts +1 -1
- package/src/services/vision/types.ts +13 -4
- package/src/services/vision-embedding-cache.ts +1 -1
- package/src/services/voice/VOICE_WORKBENCH.md +71 -26
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +72 -2
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +29 -29
- package/src/services/voice/__tests__/streaming-asr.test.ts +1 -1
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +6 -8
- package/src/services/voice/audio-frame-consumer.test.ts +327 -1
- package/src/services/voice/audio-frame-consumer.ts +165 -5
- package/src/services/voice/barge-in.ts +2 -3
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +2 -2
- package/src/services/voice/e2e-harness.ts +175 -16
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +22 -22
- package/src/services/voice/embedding.ts +2 -3
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.ts +151 -110
- package/src/services/voice/eot-classifier-ggml.ts +42 -39
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +11 -122
- package/src/services/voice/errors.ts +2 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +8 -8
- package/src/services/voice/ffi-bindings.test.ts +10 -3
- package/src/services/voice/ffi-bindings.ts +177 -15
- package/src/services/voice/fused-eot-scorer.ts +17 -13
- package/src/services/voice/index.ts +33 -12
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +112 -1
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +88 -3
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +37 -201
- package/src/services/voice/kokoro/kokoro-backend.ts +16 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +1 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +3 -3
- package/src/services/voice/kokoro/pick-runtime.ts +1 -1
- package/src/services/voice/kokoro/runtime-selection.ts +28 -201
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +335 -2
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.ts +1 -1
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/partial-stabilizer.ts +1 -1
- package/src/services/voice/pipeline.ts +3 -4
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +23 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +85 -22
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.ts +4 -4
- package/src/services/voice/transcript-service.test.ts +58 -0
- package/src/services/voice/transcript-service.ts +64 -0
- package/src/services/voice/transcript-store.test.ts +36 -0
- package/src/services/voice/transcript-store.ts +32 -0
- package/src/services/voice/types.ts +7 -7
- package/src/services/voice/vad.test.ts +33 -15
- package/src/services/voice/vad.ts +25 -20
- package/src/services/voice/voice-budget.test.ts +0 -3
- package/src/services/voice/voice-budget.ts +6 -6
- package/src/services/voice/voice-duet.test.ts +1 -1
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +17 -4
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +133 -7
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-workbench-report.ts +58 -17
- package/src/services/voice/wake-word-ggml.ts +12 -13
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice-prewarm.ts +1 -1
- package/src/voice-workbench.ts +71 -0
- package/src/actions/generate-media.d.ts.map +0 -1
- package/src/actions/identify-speaker.d.ts.map +0 -1
- package/src/actions/transcription-control.d.ts.map +0 -1
- package/src/index.d.ts.map +0 -1
- package/src/local-inference-routes.d.ts.map +0 -1
- package/src/provider.d.ts.map +0 -1
- package/src/routes/compat-helpers.d.ts.map +0 -1
- package/src/routes/family-member-route.d.ts.map +0 -1
- package/src/routes/index.d.ts.map +0 -1
- package/src/routes/live-diarization-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-transcribe.d.ts.map +0 -1
- package/src/routes/local-inference-compat-routes.d.ts.map +0 -1
- package/src/routes/local-inference-tts-route.d.ts.map +0 -1
- package/src/routes/transcript-audio-store.d.ts.map +0 -1
- package/src/routes/transcripts-routes.d.ts.map +0 -1
- package/src/routes/voice-first-run-routes.d.ts.map +0 -1
- package/src/routes/voice-models-routes.d.ts.map +0 -1
- package/src/routes/voice-profile-plugin-routes.d.ts.map +0 -1
- package/src/routes/voice-profiles-management-routes.d.ts.map +0 -1
- package/src/routes/voice-speaker-profile-routes.d.ts.map +0 -1
- package/src/runtime/embedding-manager-support.d.ts.map +0 -1
- package/src/runtime/embedding-presets.d.ts.map +0 -1
- package/src/runtime/embedding-warmup-policy.d.ts.map +0 -1
- package/src/runtime/ensure-local-inference-handler.d.ts.map +0 -1
- package/src/runtime/index.d.ts.map +0 -1
- package/src/runtime/mobile-local-inference-gate.d.ts +0 -31
- package/src/runtime/mobile-local-inference-gate.d.ts.map +0 -1
- package/src/runtime/voice-entity-binding.d.ts.map +0 -1
- package/src/services/active-model.d.ts.map +0 -1
- package/src/services/assignments.d.ts.map +0 -1
- package/src/services/backend.d.ts.map +0 -1
- package/src/services/bionic-host-loader.d.ts.map +0 -1
- package/src/services/bundled-models.d.ts.map +0 -1
- package/src/services/cache-bridge.d.ts.map +0 -1
- package/src/services/catalog.d.ts +0 -10
- package/src/services/catalog.d.ts.map +0 -1
- package/src/services/checkpoint-client.d.ts.map +0 -1
- package/src/services/cloud-fallback.d.ts.map +0 -1
- package/src/services/conversation-registry.d.ts.map +0 -1
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +0 -1
- package/src/services/device-bridge.d.ts.map +0 -1
- package/src/services/device-resource-metrics.d.ts.map +0 -1
- package/src/services/device-tier.d.ts.map +0 -1
- package/src/services/downloader.d.ts.map +0 -1
- package/src/services/engine.d.ts.map +0 -1
- package/src/services/external-scanner.d.ts.map +0 -1
- package/src/services/ffi-streaming-backend.d.ts.map +0 -1
- package/src/services/ffi-streaming-runner.d.ts.map +0 -1
- package/src/services/gpu-detect.d.ts.map +0 -1
- package/src/services/handler-registry.d.ts.map +0 -1
- package/src/services/hardware.d.ts.map +0 -1
- package/src/services/hf-search.d.ts +0 -26
- package/src/services/hf-search.d.ts.map +0 -1
- package/src/services/hf-search.test.ts +0 -69
- package/src/services/hf-search.ts +0 -420
- package/src/services/image-description-runtime.d.ts.map +0 -1
- package/src/services/imagegen/aosp-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/backend-selector.d.ts.map +0 -1
- package/src/services/imagegen/coreml-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/errors.d.ts.map +0 -1
- package/src/services/imagegen/index.d.ts.map +0 -1
- package/src/services/imagegen/mflux.d.ts.map +0 -1
- package/src/services/imagegen/sd-cpp.d.ts.map +0 -1
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/types.d.ts.map +0 -1
- package/src/services/index.d.ts.map +0 -1
- package/src/services/inference-capabilities.d.ts.map +0 -1
- package/src/services/inference-telemetry.d.ts.map +0 -1
- package/src/services/kv-spill.d.ts.map +0 -1
- package/src/services/latency-trace.d.ts.map +0 -1
- package/src/services/llm-streaming-binding.d.ts.map +0 -1
- package/src/services/load-args.d.ts.map +0 -1
- package/src/services/manifest/index.d.ts +0 -4
- package/src/services/manifest/index.d.ts.map +0 -1
- package/src/services/manifest/schema.d.ts.map +0 -1
- package/src/services/manifest/types.d.ts.map +0 -1
- package/src/services/manifest/validator.d.ts.map +0 -1
- package/src/services/memory-arbiter.d.ts.map +0 -1
- package/src/services/memory-monitor.d.ts.map +0 -1
- package/src/services/memory-pressure.d.ts.map +0 -1
- package/src/services/mtp-doctor.d.ts.map +0 -1
- package/src/services/network-policy.d.ts.map +0 -1
- package/src/services/paths.d.ts.map +0 -1
- package/src/services/planner-skeleton.d.ts.map +0 -1
- package/src/services/providers.d.ts.map +0 -1
- package/src/services/ram-budget.d.ts.map +0 -1
- package/src/services/readiness.d.ts.map +0 -1
- package/src/services/recommendation.d.ts.map +0 -1
- package/src/services/registry.d.ts.map +0 -1
- package/src/services/router-handler.d.ts.map +0 -1
- package/src/services/routing-policy.d.ts.map +0 -1
- package/src/services/routing-preferences.d.ts.map +0 -1
- package/src/services/runtime-target.d.ts.map +0 -1
- package/src/services/service.d.ts.map +0 -1
- package/src/services/session-pool.d.ts.map +0 -1
- package/src/services/structured-output/deterministic-repair.d.ts.map +0 -1
- package/src/services/structured-output.d.ts.map +0 -1
- package/src/services/system-memory.d.ts.map +0 -1
- package/src/services/types.d.ts.map +0 -1
- package/src/services/verify-on-device.d.ts.map +0 -1
- package/src/services/verify.d.ts.map +0 -1
- package/src/services/vision/aosp-unavailable.d.ts.map +0 -1
- package/src/services/vision/capacitor-llama.d.ts.map +0 -1
- package/src/services/vision/cloud-fallback.d.ts.map +0 -1
- package/src/services/vision/hash.d.ts.map +0 -1
- package/src/services/vision/index.d.ts.map +0 -1
- package/src/services/vision/llama-server.d.ts.map +0 -1
- package/src/services/vision/types.d.ts.map +0 -1
- package/src/services/vision/vast-fallback.d.ts.map +0 -1
- package/src/services/vision-embedding-cache.d.ts.map +0 -1
- package/src/services/voice/audio-frame-consumer.d.ts.map +0 -1
- package/src/services/voice/barge-in.d.ts.map +0 -1
- package/src/services/voice/cancellation-coordinator.d.ts.map +0 -1
- package/src/services/voice/checkpoint-manager.d.ts.map +0 -1
- package/src/services/voice/eager-context-builder.d.ts.map +0 -1
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/embedding.d.ts.map +0 -1
- package/src/services/voice/emotion-attribution.d.ts.map +0 -1
- package/src/services/voice/engine-bridge.d.ts.map +0 -1
- package/src/services/voice/eot-classifier-ggml.d.ts.map +0 -1
- package/src/services/voice/eot-classifier.d.ts.map +0 -1
- package/src/services/voice/errors.d.ts.map +0 -1
- package/src/services/voice/expressive-tags.d.ts.map +0 -1
- package/src/services/voice/ffi-bindings.d.ts.map +0 -1
- package/src/services/voice/first-line-cache.d.ts.map +0 -1
- package/src/services/voice/fused-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/index.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/phonemizer.d.ts.map +0 -1
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/runtime-selection.d.ts +0 -92
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +0 -1
- package/src/services/voice/kokoro/types.d.ts.map +0 -1
- package/src/services/voice/kokoro/voice-presets.d.ts.map +0 -1
- package/src/services/voice/kokoro/voices.d.ts.map +0 -1
- package/src/services/voice/lifecycle.d.ts.map +0 -1
- package/src/services/voice/live-diarization-session.d.ts +0 -96
- package/src/services/voice/live-diarization-session.d.ts.map +0 -1
- package/src/services/voice/mic-source.d.ts.map +0 -1
- package/src/services/voice/optimistic-policy.d.ts.map +0 -1
- package/src/services/voice/partial-stabilizer.d.ts.map +0 -1
- package/src/services/voice/phoneme-tokenizer.d.ts.map +0 -1
- package/src/services/voice/phrase-cache.d.ts.map +0 -1
- package/src/services/voice/phrase-chunker.d.ts.map +0 -1
- package/src/services/voice/pipeline-impls.d.ts.map +0 -1
- package/src/services/voice/pipeline.d.ts.map +0 -1
- package/src/services/voice/prefill-client.d.ts.map +0 -1
- package/src/services/voice/prefix-preserving-queue.d.ts.map +0 -1
- package/src/services/voice/profile-store.d.ts.map +0 -1
- package/src/services/voice/ring-buffer.d.ts.map +0 -1
- package/src/services/voice/rollback-queue.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +0 -1
- package/src/services/voice/scheduler.d.ts.map +0 -1
- package/src/services/voice/shared-resources.d.ts.map +0 -1
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder.d.ts.map +0 -1
- package/src/services/voice/speaker-imprint.d.ts.map +0 -1
- package/src/services/voice/speaker-preset-cache.d.ts.map +0 -1
- package/src/services/voice/system-audio-sink.d.ts.map +0 -1
- package/src/services/voice/transcriber.d.ts.map +0 -1
- package/src/services/voice/transcript-knowledge.d.ts.map +0 -1
- package/src/services/voice/transcript-service.d.ts.map +0 -1
- package/src/services/voice/transcript-store.d.ts.map +0 -1
- package/src/services/voice/turn-controller.d.ts.map +0 -1
- package/src/services/voice/types.d.ts.map +0 -1
- package/src/services/voice/vad.d.ts.map +0 -1
- package/src/services/voice/voice-budget.d.ts.map +0 -1
- package/src/services/voice/voice-emotion-classifier.d.ts.map +0 -1
- package/src/services/voice/voice-preset-format.d.ts.map +0 -1
- package/src/services/voice/voice-profile-artifact.d.ts.map +0 -1
- package/src/services/voice/voice-profile-routes.d.ts.map +0 -1
- package/src/services/voice/voice-settings.d.ts +0 -82
- package/src/services/voice/voice-settings.d.ts.map +0 -1
- package/src/services/voice/voice-settings.ts +0 -172
- package/src/services/voice/voice-state-machine.d.ts.map +0 -1
- package/src/services/voice/wake-word-ggml.d.ts.map +0 -1
- package/src/services/voice/wake-word.d.ts.map +0 -1
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +0 -1
- package/src/services/voice-model-updater.d.ts.map +0 -1
- package/src/services/voice-prewarm.d.ts.map +0 -1
- /package/{src → dist}/actions/generate-media.d.ts +0 -0
- /package/{src → dist}/actions/identify-speaker.d.ts +0 -0
- /package/{src → dist}/actions/transcription-control.d.ts +0 -0
- /package/{src → dist}/index.d.ts +0 -0
- /package/{src → dist}/provider.d.ts +0 -0
- /package/{src → dist}/routes/family-member-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-transcribe.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-compat-routes.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-tts-route.d.ts +0 -0
- /package/{src → dist}/routes/transcript-audio-store.d.ts +0 -0
- /package/{src → dist}/routes/voice-first-run-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-models-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profile-plugin-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profiles-management-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-speaker-profile-routes.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-manager-support.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-presets.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-warmup-policy.d.ts +0 -0
- /package/{src → dist}/services/bundled-models.d.ts +0 -0
- /package/{src → dist}/services/cache-bridge.d.ts +0 -0
- /package/{src → dist}/services/checkpoint-client.d.ts +0 -0
- /package/{src → dist}/services/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/conversation-registry.d.ts +0 -0
- /package/{src → dist}/services/device-bridge.d.ts +0 -0
- /package/{src → dist}/services/device-resource-metrics.d.ts +0 -0
- /package/{src → dist}/services/external-scanner.d.ts +0 -0
- /package/{src → dist}/services/gpu-detect.d.ts +0 -0
- /package/{src → dist}/services/handler-registry.d.ts +0 -0
- /package/{src → dist}/services/hardware.d.ts +0 -0
- /package/{src → dist}/services/image-description-runtime.d.ts +0 -0
- /package/{src → dist}/services/imagegen/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/backend-selector.d.ts +0 -0
- /package/{src → dist}/services/imagegen/coreml-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/errors.d.ts +0 -0
- /package/{src → dist}/services/imagegen/index.d.ts +0 -0
- /package/{src → dist}/services/imagegen/mflux.d.ts +0 -0
- /package/{src → dist}/services/imagegen/tensorrt-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/types.d.ts +0 -0
- /package/{src → dist}/services/inference-capabilities.d.ts +0 -0
- /package/{src → dist}/services/inference-telemetry.d.ts +0 -0
- /package/{src → dist}/services/kv-spill.d.ts +0 -0
- /package/{src → dist}/services/latency-trace.d.ts +0 -0
- /package/{src → dist}/services/llm-streaming-binding.d.ts +0 -0
- /package/{src → dist}/services/load-args.d.ts +0 -0
- /package/{src → dist}/services/manifest/validator.d.ts +0 -0
- /package/{src → dist}/services/memory-pressure.d.ts +0 -0
- /package/{src → dist}/services/mtp-doctor.d.ts +0 -0
- /package/{src → dist}/services/network-policy.d.ts +0 -0
- /package/{src → dist}/services/paths.d.ts +0 -0
- /package/{src → dist}/services/planner-skeleton.d.ts +0 -0
- /package/{src → dist}/services/providers.d.ts +0 -0
- /package/{src → dist}/services/ram-budget.d.ts +0 -0
- /package/{src → dist}/services/readiness.d.ts +0 -0
- /package/{src → dist}/services/recommendation.d.ts +0 -0
- /package/{src → dist}/services/routing-preferences.d.ts +0 -0
- /package/{src → dist}/services/runtime-target.d.ts +0 -0
- /package/{src → dist}/services/session-pool.d.ts +0 -0
- /package/{src → dist}/services/structured-output/deterministic-repair.d.ts +0 -0
- /package/{src → dist}/services/structured-output.d.ts +0 -0
- /package/{src → dist}/services/system-memory.d.ts +0 -0
- /package/{src → dist}/services/verify-on-device.d.ts +0 -0
- /package/{src → dist}/services/verify.d.ts +0 -0
- /package/{src → dist}/services/vision/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/vision/capacitor-llama.d.ts +0 -0
- /package/{src → dist}/services/vision/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/vision/hash.d.ts +0 -0
- /package/{src → dist}/services/vision/llama-server.d.ts +0 -0
- /package/{src → dist}/services/vision/vast-fallback.d.ts +0 -0
- /package/{src → dist}/services/voice/barge-in.d.ts +0 -0
- /package/{src → dist}/services/voice/cancellation-coordinator.d.ts +0 -0
- /package/{src → dist}/services/voice/checkpoint-manager.d.ts +0 -0
- /package/{src → dist}/services/voice/eager-context-builder.d.ts +0 -0
- /package/{src → dist}/services/voice/emotion-attribution.d.ts +0 -0
- /package/{src → dist}/services/voice/first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/kokoro-runtime.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/phonemizer.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/types.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voice-presets.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voices.d.ts +0 -0
- /package/{src → dist}/services/voice/lifecycle.d.ts +0 -0
- /package/{src → dist}/services/voice/optimistic-policy.d.ts +0 -0
- /package/{src → dist}/services/voice/phoneme-tokenizer.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-chunker.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline-impls.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/prefill-client.d.ts +0 -0
- /package/{src → dist}/services/voice/prefix-preserving-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/profile-store.d.ts +0 -0
- /package/{src → dist}/services/voice/ring-buffer.d.ts +0 -0
- /package/{src → dist}/services/voice/rollback-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-placeholder.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-regenerator.d.ts +0 -0
- /package/{src → dist}/services/voice/scheduler.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/attribution-pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-ggml.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-imprint.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-preset-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/system-audio-sink.d.ts +0 -0
- /package/{src → dist}/services/voice/transcript-knowledge.d.ts +0 -0
- /package/{src → dist}/services/voice/turn-controller.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-budget.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-emotion-classifier.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-artifact.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-routes.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-state-machine.d.ts +0 -0
- /package/{src → dist}/services/voice/wake-word.d.ts +0 -0
- /package/{src → dist}/services/voice/wrap-with-first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice-model-updater.d.ts +0 -0
- /package/{src → dist}/services/voice-prewarm.d.ts +0 -0
|
@@ -29,11 +29,22 @@ import path from "node:path";
|
|
|
29
29
|
import { resolveStateDir } from "@elizaos/core";
|
|
30
30
|
import {
|
|
31
31
|
type AttributedTurn,
|
|
32
|
+
type AttributionPipelineLike,
|
|
32
33
|
AudioFrameConsumer,
|
|
33
34
|
type AudioFrameConsumerConfig,
|
|
35
|
+
type AudioFrameConsumerDeps,
|
|
34
36
|
type AudioFrameEvent,
|
|
37
|
+
decodeAudioFramePcm,
|
|
38
|
+
type EchoReferenceProvider,
|
|
35
39
|
type RuntimeEventSink,
|
|
40
|
+
type TurnTranscriber,
|
|
41
|
+
type VadSegmenter,
|
|
36
42
|
} from "./audio-frame-consumer.js";
|
|
43
|
+
import {
|
|
44
|
+
estimateEchoDelaySamples,
|
|
45
|
+
platformPlaybackDelaySamples,
|
|
46
|
+
} from "./echo-delay.js";
|
|
47
|
+
import { EchoReferenceBuffer } from "./echo-reference-buffer.js";
|
|
37
48
|
import type {
|
|
38
49
|
ElizaInferenceContextHandle,
|
|
39
50
|
ElizaInferenceFfi,
|
|
@@ -99,6 +110,17 @@ export interface LiveDiarizationStatus {
|
|
|
99
110
|
framesDropped: number;
|
|
100
111
|
/** Turns segmented + attributed so far. */
|
|
101
112
|
turnsObserved: number;
|
|
113
|
+
/** Live AEC wiring status. Echo cancellation runs only when this is true. */
|
|
114
|
+
aec: {
|
|
115
|
+
echoReferenceWired: boolean;
|
|
116
|
+
/** Playback→mic delay (samples @16 kHz) currently applied to align the
|
|
117
|
+
* far-end reference — self-calibrated from real echo when confident,
|
|
118
|
+
* otherwise the `ELIZA_VOICE_ECHO_DELAY_MS` seed (default 0). */
|
|
119
|
+
echoDelaySamples: number;
|
|
120
|
+
/** Peak cross-correlation [0,1] of the last accepted delay calibration;
|
|
121
|
+
* 0 until a confident estimate replaces the seed. */
|
|
122
|
+
echoDelayConfidence: number;
|
|
123
|
+
};
|
|
102
124
|
/** The most recent attributed turns (capped), for device-evidence reads. */
|
|
103
125
|
recentTurns: LiveDiarizationTurnSummary[];
|
|
104
126
|
/** Populated only when readiness failed — the precise blocker. */
|
|
@@ -122,6 +144,120 @@ export interface LiveDiarizationTurnSummary {
|
|
|
122
144
|
|
|
123
145
|
const MAX_RECENT_TURNS = 20;
|
|
124
146
|
|
|
147
|
+
export interface LiveDiarizationSessionOptions {
|
|
148
|
+
/**
|
|
149
|
+
* Agent-playback PCM provider for AEC. The caller owns playback capture and
|
|
150
|
+
* delay calibration when supplied. Without an external provider, the session
|
|
151
|
+
* uses its built-in playback buffer fed by /api/voice/playback-frames.
|
|
152
|
+
*/
|
|
153
|
+
echoReference?: EchoReferenceProvider | null;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export interface LiveDiarizationConsumerDepsInput {
|
|
157
|
+
vad: VadSegmenter;
|
|
158
|
+
pipeline: AttributionPipelineLike;
|
|
159
|
+
runtime: RuntimeEventSink;
|
|
160
|
+
transcribe?: TurnTranscriber | null;
|
|
161
|
+
echoReference?: EchoReferenceProvider | null;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export function buildLiveDiarizationConsumerDeps({
|
|
165
|
+
vad,
|
|
166
|
+
pipeline,
|
|
167
|
+
runtime,
|
|
168
|
+
transcribe,
|
|
169
|
+
echoReference,
|
|
170
|
+
}: LiveDiarizationConsumerDepsInput): AudioFrameConsumerDeps {
|
|
171
|
+
return {
|
|
172
|
+
vad,
|
|
173
|
+
pipeline,
|
|
174
|
+
runtime,
|
|
175
|
+
...(transcribe ? { transcribe } : {}),
|
|
176
|
+
...(echoReference ? { echoReference } : {}),
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const AUDIO_FRAME_SAMPLE_RATE = 16_000;
|
|
181
|
+
|
|
182
|
+
/** Echo-delay self-calibration (#9583/#9586). */
|
|
183
|
+
/** Accumulate this many playback-active samples before estimating the delay
|
|
184
|
+
* (~0.75 s @16 kHz — enough correlated echo for a stable cross-correlation). */
|
|
185
|
+
const ECHO_CAL_TARGET_SAMPLES = 12_000;
|
|
186
|
+
/** Bound the rolling calibration window so a long talk-over doesn't grow it. */
|
|
187
|
+
const ECHO_CAL_MAX_SAMPLES = 24_000;
|
|
188
|
+
/** Accept a calibrated delay only above this normalized cross-correlation; below
|
|
189
|
+
* it the near/far are independent (user talking, no echo) — keep the seed. */
|
|
190
|
+
const ECHO_CAL_MIN_CONFIDENCE = 0.3;
|
|
191
|
+
/** Largest playback→mic delay to search (300 ms @16 kHz). */
|
|
192
|
+
const ECHO_CAL_MAX_LAG_SAMPLES = 4_800;
|
|
193
|
+
/** Far-end mean-square floor below which a frame is "no playback" (skip). */
|
|
194
|
+
const ECHO_CAL_FAR_ENERGY_FLOOR = 1e-7;
|
|
195
|
+
|
|
196
|
+
function concatFloat32(chunks: Float32Array[]): Float32Array {
|
|
197
|
+
let total = 0;
|
|
198
|
+
for (const c of chunks) total += c.length;
|
|
199
|
+
const out = new Float32Array(total);
|
|
200
|
+
let off = 0;
|
|
201
|
+
for (const c of chunks) {
|
|
202
|
+
out.set(c, off);
|
|
203
|
+
off += c.length;
|
|
204
|
+
}
|
|
205
|
+
return out;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Playback→mic transport delay used to time-align the far-end echo reference,
|
|
210
|
+
* in samples @ 16 kHz. Device-tunable via `ELIZA_VOICE_ECHO_DELAY_MS`:
|
|
211
|
+
* - a positive number → that many milliseconds, exactly;
|
|
212
|
+
* - the literal `"auto"` → seed from a per-platform default
|
|
213
|
+
* (`platformPlaybackDelaySamples`, #9583), useful on iOS/macOS where the
|
|
214
|
+
* CoreAudio / AVAudioEngine transport delay is small but non-zero;
|
|
215
|
+
* - unset / anything else → 0 (the default — the canceller aligns to the
|
|
216
|
+
* most-recently-rendered playback and the NLMS filter adapts the residual).
|
|
217
|
+
*
|
|
218
|
+
* Either way the on-device calibration (`estimateEchoDelaySamples`, #9586)
|
|
219
|
+
* refines this seed at runtime once enough correlated echo is observed.
|
|
220
|
+
*/
|
|
221
|
+
function resolveEchoDelaySamples(): number {
|
|
222
|
+
const raw = process.env.ELIZA_VOICE_ECHO_DELAY_MS;
|
|
223
|
+
if (raw && raw.trim().toLowerCase() === "auto") {
|
|
224
|
+
// Resolve the platform id the way the rest of this plugin does
|
|
225
|
+
// (service.ts / backend-selector.ts): the mobile shells report
|
|
226
|
+
// `ELIZA_PLATFORM=ios|android`, where `process.platform` is `darwin`/
|
|
227
|
+
// `linux`. Using the resolved id makes the iOS (25 ms) / AOSP-Android
|
|
228
|
+
// (45 ms) seeds in the #9653 table reachable on device instead of
|
|
229
|
+
// collapsing to the host's darwin (20 ms) / linux (30 ms) seed.
|
|
230
|
+
const platformId =
|
|
231
|
+
process.env.ELIZA_PLATFORM === "ios"
|
|
232
|
+
? "ios"
|
|
233
|
+
: process.env.ELIZA_PLATFORM === "android"
|
|
234
|
+
? "android"
|
|
235
|
+
: process.platform;
|
|
236
|
+
return platformPlaybackDelaySamples(platformId, AUDIO_FRAME_SAMPLE_RATE);
|
|
237
|
+
}
|
|
238
|
+
const ms = Number(raw);
|
|
239
|
+
if (!Number.isFinite(ms) || ms <= 0) return 0;
|
|
240
|
+
return Math.round((ms / 1000) * AUDIO_FRAME_SAMPLE_RATE);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Opt-in residual-echo suppressor, off by default (#9583/#9649). Device-tunable
|
|
245
|
+
* via `ELIZA_VOICE_RESIDUAL_SUPPRESSION`:
|
|
246
|
+
* - `"1"` / `"true"` / `"on"` → enable with the canceller's default gain;
|
|
247
|
+
* - a number in (0,1] → enable with that residual gain (lower = stronger);
|
|
248
|
+
* - unset / anything else → disabled (the canceller does linear NLMS only).
|
|
249
|
+
* Left off until validated with real device audio, per #9649 item 2.
|
|
250
|
+
*/
|
|
251
|
+
function resolveResidualSuppression(): boolean | { gain: number } | undefined {
|
|
252
|
+
const raw =
|
|
253
|
+
process.env.ELIZA_VOICE_RESIDUAL_SUPPRESSION?.trim().toLowerCase();
|
|
254
|
+
if (!raw) return undefined;
|
|
255
|
+
if (raw === "1" || raw === "true" || raw === "on") return true;
|
|
256
|
+
const gain = Number(raw);
|
|
257
|
+
if (Number.isFinite(gain) && gain > 0 && gain <= 1) return { gain };
|
|
258
|
+
return undefined;
|
|
259
|
+
}
|
|
260
|
+
|
|
125
261
|
/**
|
|
126
262
|
* Owns the single live diarization consumer for the agent process. Built
|
|
127
263
|
* lazily on first frame batch so it does not load voice models at boot.
|
|
@@ -139,8 +275,37 @@ export class LiveDiarizationSession {
|
|
|
139
275
|
private readonly recentTurns: LiveDiarizationTurnSummary[] = [];
|
|
140
276
|
private resolvedLibPath: string | null = null;
|
|
141
277
|
private buildError: string | null = null;
|
|
278
|
+
/** True once the fused ASR region is mmap-acquired for per-turn transcribe. */
|
|
279
|
+
private asrRegionAcquired = false;
|
|
280
|
+
/**
|
|
281
|
+
* Far-end (agent TTS playback) alignment buffer for echo cancellation
|
|
282
|
+
* (#9583/#9455). Fed by {@link pushPlayback}; read per mic frame via the
|
|
283
|
+
* consumer's `echoReference` seam. Inert (zero far-end ⇒ NLMS passthrough)
|
|
284
|
+
* until the device streams playback, so wiring it never regresses the
|
|
285
|
+
* no-playback case.
|
|
286
|
+
*/
|
|
287
|
+
private readonly echoBuffer = new EchoReferenceBuffer();
|
|
288
|
+
/**
|
|
289
|
+
* Playback→mic delay applied when reading the far-end reference. Seeded from
|
|
290
|
+
* `ELIZA_VOICE_ECHO_DELAY_MS` (default 0) and then SELF-CALIBRATED on the live
|
|
291
|
+
* path: once enough playback-active echo is observed, `estimateEchoDelaySamples`
|
|
292
|
+
* (#9586) recovers the bulk transport lag by cross-correlation and replaces the
|
|
293
|
+
* seed (#9583). Mutable for that reason.
|
|
294
|
+
*/
|
|
295
|
+
private echoDelaySamples = resolveEchoDelaySamples();
|
|
296
|
+
private echoDelayConfidence = 0;
|
|
297
|
+
private echoDelayCalibrated = false;
|
|
298
|
+
/** Rolling near/far windows accumulated only while the far-end is active, used
|
|
299
|
+
* once to estimate the playback→mic delay. Cleared after a confident estimate
|
|
300
|
+
* and on {@link resetPlayback}. */
|
|
301
|
+
private calNear: Float32Array[] = [];
|
|
302
|
+
private calFar: Float32Array[] = [];
|
|
303
|
+
private calSampleCount = 0;
|
|
142
304
|
|
|
143
|
-
constructor(
|
|
305
|
+
constructor(
|
|
306
|
+
private readonly runtime: RuntimeEventSink,
|
|
307
|
+
private readonly options: LiveDiarizationSessionOptions = {},
|
|
308
|
+
) {}
|
|
144
309
|
|
|
145
310
|
/** Ensure the real-deps consumer exists; idempotent + concurrency-safe. */
|
|
146
311
|
private ensureBuilt(): Promise<void> {
|
|
@@ -207,19 +372,64 @@ export class LiveDiarizationSession {
|
|
|
207
372
|
diarizer,
|
|
208
373
|
profileStore: store,
|
|
209
374
|
});
|
|
375
|
+
const residualSuppression = resolveResidualSuppression();
|
|
210
376
|
const config: AudioFrameConsumerConfig = {
|
|
211
377
|
source: { kind: "local_mic", deviceId: "android-audioframe" },
|
|
212
378
|
preRollSeconds: 0.3,
|
|
213
379
|
maxTurnSeconds: 30,
|
|
380
|
+
...(residualSuppression ? { residualSuppression } : {}),
|
|
214
381
|
};
|
|
382
|
+
// Join the fused batch ASR so the live path carries the real transcript
|
|
383
|
+
// on VOICE_TURN_OBSERVED (#8786). Null when the fused build has no ASR
|
|
384
|
+
// decoder — the path then stays diarization-only, as before.
|
|
385
|
+
const transcribe = this.buildTurnTranscriber(ffi, ctx);
|
|
215
386
|
const consumer = new AudioFrameConsumer(
|
|
216
|
-
{
|
|
387
|
+
buildLiveDiarizationConsumerDeps({
|
|
388
|
+
vad: detector,
|
|
389
|
+
pipeline,
|
|
390
|
+
runtime: this.runtime,
|
|
391
|
+
transcribe,
|
|
392
|
+
// Cancel the agent's own TTS playback before VAD/attribution so the
|
|
393
|
+
// live path never transcribes its echo (#9455/#9583). Hosts may
|
|
394
|
+
// provide their own live reference; otherwise the session uses the
|
|
395
|
+
// built-in playback buffer fed by pushPlayback.
|
|
396
|
+
echoReference:
|
|
397
|
+
this.options.echoReference ??
|
|
398
|
+
((timestampMs, samples) =>
|
|
399
|
+
this.echoReferenceFrame(timestampMs, samples)),
|
|
400
|
+
}),
|
|
217
401
|
config,
|
|
218
402
|
);
|
|
219
403
|
consumer.onTurn((turn) => this.recordTurn(turn));
|
|
220
404
|
this.consumer = consumer;
|
|
221
405
|
}
|
|
222
406
|
|
|
407
|
+
/**
|
|
408
|
+
* Build a per-turn ASR transcriber over the fused batch decoder
|
|
409
|
+
* (`eliza_inference_asr_transcribe`). Returns null when the fused build
|
|
410
|
+
* exposes no ASR decoder; acquiring the ASR mmap region is best-effort (a
|
|
411
|
+
* missing bundled ASR model leaves the path diarization-only rather than
|
|
412
|
+
* failing the whole session). One batch decode per finalized turn — the turn
|
|
413
|
+
* is already fully buffered for attribution, so no streaming state is needed.
|
|
414
|
+
*/
|
|
415
|
+
private buildTurnTranscriber(
|
|
416
|
+
ffi: ElizaInferenceFfi,
|
|
417
|
+
ctx: ElizaInferenceContextHandle,
|
|
418
|
+
): TurnTranscriber | null {
|
|
419
|
+
if (typeof ffi.asrTranscribe !== "function") return null;
|
|
420
|
+
try {
|
|
421
|
+
ffi.mmapAcquire(ctx, "asr");
|
|
422
|
+
} catch {
|
|
423
|
+
return null;
|
|
424
|
+
}
|
|
425
|
+
this.asrRegionAcquired = true;
|
|
426
|
+
return (pcm) => {
|
|
427
|
+
const text = ffi.asrTranscribe({ ctx, pcm, sampleRateHz: 16_000 });
|
|
428
|
+
const trimmed = text.trim();
|
|
429
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
430
|
+
};
|
|
431
|
+
}
|
|
432
|
+
|
|
223
433
|
private recordTurn(turn: AttributedTurn): void {
|
|
224
434
|
this.turnsObserved += 1;
|
|
225
435
|
const speaker = turn.output.primarySpeaker;
|
|
@@ -240,12 +450,121 @@ export class LiveDiarizationSession {
|
|
|
240
450
|
if (this.recentTurns.length > MAX_RECENT_TURNS) this.recentTurns.shift();
|
|
241
451
|
}
|
|
242
452
|
|
|
453
|
+
/**
|
|
454
|
+
* The far-end (agent TTS playback) reference aligned to a mic frame of
|
|
455
|
+
* `samples` samples — the consumer's `echoReference` seam (#9455/#9583).
|
|
456
|
+
* Reads the alignment buffer at the configured playback→mic delay; the slice
|
|
457
|
+
* is zero-filled (⇒ NLMS passthrough) until the device streams playback.
|
|
458
|
+
* Public so the wiring is unit-testable without the fused FFI.
|
|
459
|
+
*/
|
|
460
|
+
echoReferenceFrame(timestampMs: number, samples: number): Float32Array {
|
|
461
|
+
return this.echoBuffer.referenceAt(
|
|
462
|
+
timestampMs,
|
|
463
|
+
samples,
|
|
464
|
+
this.echoDelaySamples,
|
|
465
|
+
);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
/** Current self-calibrated AEC delay state (for status + tests). */
|
|
469
|
+
aecDelayState(): {
|
|
470
|
+
delaySamples: number;
|
|
471
|
+
confidence: number;
|
|
472
|
+
calibrated: boolean;
|
|
473
|
+
} {
|
|
474
|
+
return {
|
|
475
|
+
delaySamples: this.echoDelaySamples,
|
|
476
|
+
confidence: this.echoDelayConfidence,
|
|
477
|
+
calibrated: this.echoDelayCalibrated,
|
|
478
|
+
};
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
/**
|
|
482
|
+
* Self-calibrate the playback→mic delay (#9583/#9586) from real echo. Called
|
|
483
|
+
* per mic frame while uncalibrated: when the far-end is active (the agent is
|
|
484
|
+
* playing TTS), accumulate the time-aligned near/far windows; once ~0.75 s of
|
|
485
|
+
* playback-active audio is buffered, recover the bulk transport lag by
|
|
486
|
+
* cross-correlation and, if confident, replace the static seed. One-shot — the
|
|
487
|
+
* device's speaker→mic path is stable, so we lock the first confident estimate
|
|
488
|
+
* and stop re-measuring. Public so it can be unit-tested without the fused FFI.
|
|
489
|
+
*/
|
|
490
|
+
observeForDelayCalibration(nearPcm: Float32Array, timestampMs: number): void {
|
|
491
|
+
if (this.echoDelayCalibrated || nearPcm.length === 0) return;
|
|
492
|
+
// Read the RAW far-end at this frame (delay 0) — calibration recovers the
|
|
493
|
+
// delay, so it must not pre-apply the value it is trying to measure.
|
|
494
|
+
const far = this.echoBuffer.referenceAt(timestampMs, nearPcm.length, 0);
|
|
495
|
+
let farEnergy = 0;
|
|
496
|
+
for (let i = 0; i < far.length; i++) farEnergy += far[i] * far[i];
|
|
497
|
+
if (farEnergy / Math.max(1, far.length) < ECHO_CAL_FAR_ENERGY_FLOOR) {
|
|
498
|
+
return; // no playback → nothing to calibrate against
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
this.calNear.push(nearPcm.slice());
|
|
502
|
+
this.calFar.push(far);
|
|
503
|
+
this.calSampleCount += nearPcm.length;
|
|
504
|
+
while (
|
|
505
|
+
this.calSampleCount > ECHO_CAL_MAX_SAMPLES &&
|
|
506
|
+
this.calNear.length > 1
|
|
507
|
+
) {
|
|
508
|
+
this.calSampleCount -= (this.calNear.shift() as Float32Array).length;
|
|
509
|
+
this.calFar.shift();
|
|
510
|
+
}
|
|
511
|
+
if (this.calSampleCount < ECHO_CAL_TARGET_SAMPLES) return;
|
|
512
|
+
|
|
513
|
+
const near = concatFloat32(this.calNear);
|
|
514
|
+
const farWin = concatFloat32(this.calFar);
|
|
515
|
+
const est = estimateEchoDelaySamples(near, farWin, {
|
|
516
|
+
maxLagSamples: ECHO_CAL_MAX_LAG_SAMPLES,
|
|
517
|
+
});
|
|
518
|
+
if (est.confidence >= ECHO_CAL_MIN_CONFIDENCE) {
|
|
519
|
+
this.echoDelaySamples = est.lagSamples;
|
|
520
|
+
this.echoDelayConfidence = est.confidence;
|
|
521
|
+
this.echoDelayCalibrated = true;
|
|
522
|
+
}
|
|
523
|
+
this.calNear = [];
|
|
524
|
+
this.calFar = [];
|
|
525
|
+
this.calSampleCount = 0;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
/**
|
|
529
|
+
* Feed a batch of agent-playback (far-end) frames for echo cancellation. The
|
|
530
|
+
* device captures the agent's TTS output in the SAME base64 LE-s16 16 kHz
|
|
531
|
+
* mono wire format as the mic and POSTs it in real time as it renders; we
|
|
532
|
+
* decode + append to the alignment buffer. The device MUST also call
|
|
533
|
+
* {@link resetPlayback} when playback stops (or on barge-in) so the canceller
|
|
534
|
+
* never aligns a later mic frame to stale, no-longer-playing audio.
|
|
535
|
+
*/
|
|
536
|
+
pushPlayback(frames: AudioFrameEvent[]): void {
|
|
537
|
+
for (const frame of frames) {
|
|
538
|
+
this.echoBuffer.pushAt(frame.timestamp, decodeAudioFramePcm(frame));
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
/** Drop buffered far-end playback (playback stopped / barge-in). Also clears
|
|
543
|
+
* the in-progress delay-calibration window (it would otherwise straddle a
|
|
544
|
+
* playback gap); the already-learned delay is kept. */
|
|
545
|
+
resetPlayback(): void {
|
|
546
|
+
this.echoBuffer.reset();
|
|
547
|
+
this.calNear = [];
|
|
548
|
+
this.calFar = [];
|
|
549
|
+
this.calSampleCount = 0;
|
|
550
|
+
}
|
|
551
|
+
|
|
243
552
|
/** Feed a batch of WebView-captured frames; resolves once VAD has processed them. */
|
|
244
553
|
async ingest(frames: AudioFrameEvent[]): Promise<void> {
|
|
245
554
|
await this.ensureBuilt();
|
|
246
555
|
if (!this.consumer) return;
|
|
247
556
|
for (const frame of frames) {
|
|
248
557
|
this.framesReceived += 1;
|
|
558
|
+
if (!this.echoDelayCalibrated) {
|
|
559
|
+
try {
|
|
560
|
+
this.observeForDelayCalibration(
|
|
561
|
+
decodeAudioFramePcm(frame),
|
|
562
|
+
frame.timestamp,
|
|
563
|
+
);
|
|
564
|
+
} catch {
|
|
565
|
+
// Let AudioFrameConsumer own decode-error accounting below.
|
|
566
|
+
}
|
|
567
|
+
}
|
|
249
568
|
await this.consumer.onAudioFrame(frame);
|
|
250
569
|
}
|
|
251
570
|
}
|
|
@@ -269,6 +588,12 @@ export class LiveDiarizationSession {
|
|
|
269
588
|
framesReceived: this.framesReceived,
|
|
270
589
|
framesDropped: this.consumer?.droppedFrames ?? 0,
|
|
271
590
|
turnsObserved: this.turnsObserved,
|
|
591
|
+
aec: {
|
|
592
|
+
echoReferenceWired:
|
|
593
|
+
this.consumer != null || this.options.echoReference != null,
|
|
594
|
+
echoDelaySamples: this.echoDelaySamples,
|
|
595
|
+
echoDelayConfidence: this.echoDelayConfidence,
|
|
596
|
+
},
|
|
272
597
|
recentTurns: [...this.recentTurns],
|
|
273
598
|
...(this.buildError ? { error: this.buildError } : {}),
|
|
274
599
|
};
|
|
@@ -277,6 +602,14 @@ export class LiveDiarizationSession {
|
|
|
277
602
|
/** Release native handles + listeners. */
|
|
278
603
|
async close(): Promise<void> {
|
|
279
604
|
await this.consumer?.close();
|
|
605
|
+
if (this.asrRegionAcquired && this.ffi && this.ctx !== null) {
|
|
606
|
+
try {
|
|
607
|
+
this.ffi.mmapEvict(this.ctx, "asr");
|
|
608
|
+
} catch {
|
|
609
|
+
// Best-effort release; the context is destroyed below regardless.
|
|
610
|
+
}
|
|
611
|
+
this.asrRegionAcquired = false;
|
|
612
|
+
}
|
|
280
613
|
await this.encoder?.dispose();
|
|
281
614
|
await this.diarizer?.dispose();
|
|
282
615
|
this.vad?.close();
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { percentile, round1, round4 } from "./metric-math";
|
|
3
|
+
|
|
4
|
+
describe("round1", () => {
|
|
5
|
+
it("rounds to one decimal place", () => {
|
|
6
|
+
expect(round1(12.34)).toBe(12.3);
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
it("rounds half up", () => {
|
|
10
|
+
expect(round1(0.25)).toBe(0.3);
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
it("leaves integers unchanged", () => {
|
|
14
|
+
expect(round1(42)).toBe(42);
|
|
15
|
+
});
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
describe("round4", () => {
|
|
19
|
+
it("rounds to four decimal places", () => {
|
|
20
|
+
expect(round4(0.123456)).toBe(0.1235);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
it("leaves shorter values unchanged", () => {
|
|
24
|
+
expect(round4(0.12)).toBe(0.12);
|
|
25
|
+
});
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
describe("percentile", () => {
|
|
29
|
+
it("computes the nearest-rank percentile", () => {
|
|
30
|
+
const sample = [10, 20, 30, 40, 50];
|
|
31
|
+
// p50 over 5 elements: rank = ceil(0.5 * 5) = 3 -> index 2 -> 30
|
|
32
|
+
expect(percentile(sample, 50)).toBe(30);
|
|
33
|
+
// p95: rank = ceil(0.95 * 5) = 5 -> index 4 -> 50
|
|
34
|
+
expect(percentile(sample, 95)).toBe(50);
|
|
35
|
+
// p0: rank = ceil(0) = 0 -> clamped to index 0 -> 10
|
|
36
|
+
expect(percentile(sample, 0)).toBe(10);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("sorts before ranking", () => {
|
|
40
|
+
expect(percentile([50, 10, 30, 20, 40], 50)).toBe(30);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it("filters non-finite values before ranking", () => {
|
|
44
|
+
const sample = [10, Number.NaN, 20, Number.POSITIVE_INFINITY, 30];
|
|
45
|
+
// finite = [10, 20, 30]; p50: rank = ceil(0.5 * 3) = 2 -> index 1 -> 20
|
|
46
|
+
expect(percentile(sample, 50)).toBe(20);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("rounds the result to one decimal place", () => {
|
|
50
|
+
// finite single sample -> returned via round1
|
|
51
|
+
expect(percentile([12.34], 50)).toBe(12.3);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it("returns null for an empty sample", () => {
|
|
55
|
+
expect(percentile([], 50)).toBeNull();
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("returns null when every value is non-finite", () => {
|
|
59
|
+
expect(percentile([Number.NaN, Number.POSITIVE_INFINITY], 50)).toBeNull();
|
|
60
|
+
});
|
|
61
|
+
});
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure rounding + percentile helpers shared by the voice E2E harness and the
|
|
3
|
+
* voice workbench report (#8785). No models, filesystem, or network — just
|
|
4
|
+
* numeric formatting so both consumers report identical metric values.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export function round1(value: number): number {
|
|
8
|
+
return Math.round(value * 10) / 10;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function round4(value: number): number {
|
|
12
|
+
return Math.round(value * 1e4) / 1e4;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/** Nearest-rank percentile over a sample (null when empty), non-finite filtered. */
|
|
16
|
+
export function percentile(
|
|
17
|
+
values: ReadonlyArray<number>,
|
|
18
|
+
p: number,
|
|
19
|
+
): number | null {
|
|
20
|
+
const finite = values.filter((v) => Number.isFinite(v));
|
|
21
|
+
if (finite.length === 0) return null;
|
|
22
|
+
const sorted = [...finite].sort((a, b) => a - b);
|
|
23
|
+
const rank = Math.ceil((p / 100) * sorted.length);
|
|
24
|
+
return round1(sorted[Math.min(sorted.length - 1, Math.max(0, rank - 1))]);
|
|
25
|
+
}
|
|
@@ -243,7 +243,7 @@ abstract class BaseMicSource implements MicSource {
|
|
|
243
243
|
}
|
|
244
244
|
|
|
245
245
|
export interface DesktopMicSourceOptions {
|
|
246
|
-
/** Capture sample rate. Default 16 kHz (what
|
|
246
|
+
/** Capture sample rate. Default 16 kHz (what VAD + Gemma ASR expect). */
|
|
247
247
|
sampleRate?: number;
|
|
248
248
|
/** Frame duration in ms. Default 32 ms (one Silero window @ 16 kHz). */
|
|
249
249
|
frameMs?: number;
|