@elizaos/plugin-local-inference 2.0.3-beta.2 → 2.0.3-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -10
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +39647 -0
- package/dist/index.js.map +217 -0
- package/{src → dist}/local-inference-routes.d.ts +9 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts.map +1 -0
- package/{src → dist}/routes/compat-helpers.d.ts +1 -1
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/{src → dist}/routes/index.d.ts +1 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/{src → dist}/routes/live-diarization-route.d.ts +7 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/{src → dist}/routes/transcripts-routes.d.ts +8 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/{src → dist}/runtime/ensure-local-inference-handler.d.ts +8 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/{src → dist}/runtime/index.d.ts +1 -1
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/{src → dist}/runtime/voice-entity-binding.d.ts +10 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/{src → dist}/services/active-model.d.ts +28 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/{src → dist}/services/assignments.d.ts +16 -3
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/{src → dist}/services/backend.d.ts +110 -16
- package/dist/services/backend.d.ts.map +1 -0
- package/{src → dist}/services/bionic-host-loader.d.ts +21 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/{src → dist}/services/desktop-fused-ffi-backend-runtime.d.ts +22 -6
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/{src → dist}/services/device-tier.d.ts +19 -1
- package/dist/services/device-tier.d.ts.map +1 -0
- package/{src → dist}/services/downloader.d.ts +16 -4
- package/dist/services/downloader.d.ts.map +1 -0
- package/{src → dist}/services/engine.d.ts +43 -4
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-backend.d.ts +28 -7
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-runner.d.ts +24 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/{src → dist}/services/imagegen/sd-cpp.d.ts +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/{src → dist}/services/index.d.ts +3 -1
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/{src → dist}/services/manifest/schema.d.ts +196 -6
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/{src → dist}/services/manifest/types.d.ts +3 -1
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/{src → dist}/services/memory-arbiter.d.ts +33 -3
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/{src → dist}/services/memory-monitor.d.ts +6 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/{src → dist}/services/registry.d.ts +11 -13
- package/dist/services/registry.d.ts.map +1 -0
- package/{src → dist}/services/router-handler.d.ts +2 -2
- package/dist/services/router-handler.d.ts.map +1 -0
- package/{src → dist}/services/routing-policy.d.ts +32 -9
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/{src → dist}/services/service.d.ts +1 -1
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/{src → dist}/services/types.d.ts +1 -1
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/{src → dist}/services/vision/index.d.ts +1 -1
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/{src → dist}/services/vision/types.d.ts +13 -4
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/{src → dist}/services/vision-embedding-cache.d.ts +1 -1
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/audio-frame-consumer.d.ts +82 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/{src → dist}/services/voice/eliza1-eot-scorer.d.ts +8 -8
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/{src → dist}/services/voice/embedding.d.ts +2 -3
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/engine-bridge.d.ts +8 -5
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier-ggml.d.ts +22 -22
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier.d.ts +9 -12
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/{src → dist}/services/voice/errors.d.ts +1 -1
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/{src → dist}/services/voice/expressive-tags.d.ts +5 -5
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/{src → dist}/services/voice/ffi-bindings.d.ts +26 -4
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/{src → dist}/services/voice/fused-eot-scorer.d.ts +6 -6
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/{src → dist}/services/voice/index.d.ts +8 -3
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-backend.d.ts +15 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-engine-discovery.d.ts +1 -1
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-ffi-runtime.d.ts +3 -3
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/pick-runtime.d.ts +1 -1
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/{src → dist}/services/voice/mic-source.d.ts +1 -1
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/{src → dist}/services/voice/partial-stabilizer.d.ts +1 -1
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/{src → dist}/services/voice/shared-resources.d.ts +14 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcriber.d.ts +4 -4
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-service.d.ts +20 -1
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-store.d.ts +12 -1
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/{src → dist}/services/voice/types.d.ts +6 -6
- package/dist/services/voice/types.d.ts.map +1 -0
- package/{src → dist}/services/voice/vad.d.ts +6 -5
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/{src → dist}/services/voice/voice-preset-format.d.ts +2 -2
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/{src → dist}/services/voice/wake-word-ggml.d.ts +8 -9
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +28 -9
- package/registry-entry.json +137 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +1 -1
- package/src/adapters/capacitor-llama/index.ts +28 -4
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +2 -2
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +1 -1
- package/src/local-inference-routes.test.ts +57 -11
- package/src/local-inference-routes.ts +90 -8
- package/src/provider.ts +32 -3
- package/src/routes/compat-helpers.ts +2 -1
- package/src/routes/index.ts +1 -0
- package/src/routes/live-diarization-route.test.ts +134 -0
- package/src/routes/live-diarization-route.ts +79 -3
- package/src/routes/local-inference-asr-route.test.ts +43 -2
- package/src/routes/local-inference-asr-route.ts +7 -4
- package/src/routes/local-inference-asr-transcribe.test.ts +4 -4
- package/src/routes/local-inference-asr-transcribe.ts +1 -1
- package/src/routes/local-inference-compat-routes.test.ts +3 -3
- package/src/routes/local-inference-compat-routes.ts +23 -56
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcripts-routes.test.ts +51 -0
- package/src/routes/transcripts-routes.ts +35 -3
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +203 -5
- package/src/runtime/ensure-local-inference-handler.ts +203 -11
- package/src/runtime/index.ts +4 -1
- package/src/runtime/mobile-local-inference-gate.test.ts +85 -2
- package/src/runtime/mobile-local-inference-gate.ts +60 -5
- package/src/runtime/voice-entity-binding.transcript.test.ts +29 -0
- package/src/runtime/voice-entity-binding.ts +46 -6
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +2 -2
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model.ts +211 -8
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +26 -0
- package/src/services/assignments.ts +52 -4
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +198 -19
- package/src/services/bionic-host-loader.test.ts +94 -1
- package/src/services/bionic-host-loader.ts +72 -0
- package/src/services/cache-bridge.test.ts +7 -7
- package/src/services/catalog.test.ts +32 -11
- package/src/services/catalog.ts +6 -0
- package/src/services/cloud-fallback.ts +1 -1
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +99 -7
- package/src/services/device-tier.test.ts +89 -2
- package/src/services/device-tier.ts +103 -11
- package/src/services/downloader.test.ts +199 -58
- package/src/services/downloader.ts +141 -27
- package/src/services/engine-direct-bundle.test.ts +38 -6
- package/src/services/engine.ts +291 -104
- package/src/services/ensure-local-artifacts.ts +1 -1
- package/src/services/ffi-llm-streaming-abi.ts +6 -3
- package/src/services/ffi-streaming-backend.ts +44 -8
- package/src/services/ffi-streaming-runner.test.ts +163 -3
- package/src/services/ffi-streaming-runner.ts +54 -1
- package/src/services/ffi-unload-ordering.test.ts +5 -1
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/hardware.test.ts +7 -2
- package/src/services/hardware.ts +28 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/sd-cpp.ts +6 -9
- package/src/services/index.ts +18 -0
- package/src/services/ios-llama-streaming.ts +1 -1
- package/src/services/kv-spill.ts +6 -5
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +84 -2
- package/src/services/manifest/index.ts +6 -0
- package/src/services/manifest/manifest.test.ts +156 -54
- package/src/services/manifest/schema.ts +160 -52
- package/src/services/manifest/types.ts +6 -0
- package/src/services/manifest/validator.ts +91 -25
- package/src/services/memory-arbiter.test.ts +139 -0
- package/src/services/memory-arbiter.ts +81 -15
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +24 -0
- package/src/services/memory-monitor.ts +12 -0
- package/src/services/mtp-doctor.ts +10 -2
- package/src/services/network-policy.ts +5 -5
- package/src/services/ram-budget-cache.test.ts +2 -1
- package/src/services/ram-budget.ts +0 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/registry.ts +25 -19
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.ts +43 -24
- package/src/services/routing-policy.test.ts +211 -23
- package/src/services/routing-policy.ts +92 -22
- package/src/services/service.test.ts +3 -3
- package/src/services/service.ts +22 -7
- package/src/services/transcription-priority.test.ts +2 -2
- package/src/services/types.ts +4 -0
- package/src/services/verify-on-device.test.ts +2 -2
- package/src/services/vision/hash.ts +1 -1
- package/src/services/vision/index.ts +2 -2
- package/src/services/vision/llama-server.ts +1 -1
- package/src/services/vision/types.ts +13 -4
- package/src/services/vision-embedding-cache.ts +1 -1
- package/src/services/voice/VOICE_WORKBENCH.md +71 -26
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +72 -2
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +29 -29
- package/src/services/voice/__tests__/streaming-asr.test.ts +1 -1
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +6 -8
- package/src/services/voice/audio-frame-consumer.test.ts +327 -1
- package/src/services/voice/audio-frame-consumer.ts +165 -5
- package/src/services/voice/barge-in.ts +2 -3
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +2 -2
- package/src/services/voice/e2e-harness.ts +175 -16
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +22 -22
- package/src/services/voice/embedding.ts +2 -3
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.ts +151 -110
- package/src/services/voice/eot-classifier-ggml.ts +42 -39
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +11 -122
- package/src/services/voice/errors.ts +2 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +8 -8
- package/src/services/voice/ffi-bindings.test.ts +10 -3
- package/src/services/voice/ffi-bindings.ts +177 -15
- package/src/services/voice/fused-eot-scorer.ts +17 -13
- package/src/services/voice/index.ts +33 -12
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +112 -1
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +88 -3
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +37 -201
- package/src/services/voice/kokoro/kokoro-backend.ts +16 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +1 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +3 -3
- package/src/services/voice/kokoro/pick-runtime.ts +1 -1
- package/src/services/voice/kokoro/runtime-selection.ts +28 -201
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +335 -2
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.ts +1 -1
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/partial-stabilizer.ts +1 -1
- package/src/services/voice/pipeline.ts +3 -4
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +23 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +85 -22
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.ts +4 -4
- package/src/services/voice/transcript-service.test.ts +58 -0
- package/src/services/voice/transcript-service.ts +64 -0
- package/src/services/voice/transcript-store.test.ts +36 -0
- package/src/services/voice/transcript-store.ts +32 -0
- package/src/services/voice/types.ts +7 -7
- package/src/services/voice/vad.test.ts +33 -15
- package/src/services/voice/vad.ts +25 -20
- package/src/services/voice/voice-budget.test.ts +0 -3
- package/src/services/voice/voice-budget.ts +6 -6
- package/src/services/voice/voice-duet.test.ts +1 -1
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +17 -4
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +133 -7
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-workbench-report.ts +58 -17
- package/src/services/voice/wake-word-ggml.ts +12 -13
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice-prewarm.ts +1 -1
- package/src/voice-workbench.ts +71 -0
- package/src/actions/generate-media.d.ts.map +0 -1
- package/src/actions/identify-speaker.d.ts.map +0 -1
- package/src/actions/transcription-control.d.ts.map +0 -1
- package/src/index.d.ts.map +0 -1
- package/src/local-inference-routes.d.ts.map +0 -1
- package/src/provider.d.ts.map +0 -1
- package/src/routes/compat-helpers.d.ts.map +0 -1
- package/src/routes/family-member-route.d.ts.map +0 -1
- package/src/routes/index.d.ts.map +0 -1
- package/src/routes/live-diarization-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-transcribe.d.ts.map +0 -1
- package/src/routes/local-inference-compat-routes.d.ts.map +0 -1
- package/src/routes/local-inference-tts-route.d.ts.map +0 -1
- package/src/routes/transcript-audio-store.d.ts.map +0 -1
- package/src/routes/transcripts-routes.d.ts.map +0 -1
- package/src/routes/voice-first-run-routes.d.ts.map +0 -1
- package/src/routes/voice-models-routes.d.ts.map +0 -1
- package/src/routes/voice-profile-plugin-routes.d.ts.map +0 -1
- package/src/routes/voice-profiles-management-routes.d.ts.map +0 -1
- package/src/routes/voice-speaker-profile-routes.d.ts.map +0 -1
- package/src/runtime/embedding-manager-support.d.ts.map +0 -1
- package/src/runtime/embedding-presets.d.ts.map +0 -1
- package/src/runtime/embedding-warmup-policy.d.ts.map +0 -1
- package/src/runtime/ensure-local-inference-handler.d.ts.map +0 -1
- package/src/runtime/index.d.ts.map +0 -1
- package/src/runtime/mobile-local-inference-gate.d.ts +0 -31
- package/src/runtime/mobile-local-inference-gate.d.ts.map +0 -1
- package/src/runtime/voice-entity-binding.d.ts.map +0 -1
- package/src/services/active-model.d.ts.map +0 -1
- package/src/services/assignments.d.ts.map +0 -1
- package/src/services/backend.d.ts.map +0 -1
- package/src/services/bionic-host-loader.d.ts.map +0 -1
- package/src/services/bundled-models.d.ts.map +0 -1
- package/src/services/cache-bridge.d.ts.map +0 -1
- package/src/services/catalog.d.ts +0 -10
- package/src/services/catalog.d.ts.map +0 -1
- package/src/services/checkpoint-client.d.ts.map +0 -1
- package/src/services/cloud-fallback.d.ts.map +0 -1
- package/src/services/conversation-registry.d.ts.map +0 -1
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +0 -1
- package/src/services/device-bridge.d.ts.map +0 -1
- package/src/services/device-resource-metrics.d.ts.map +0 -1
- package/src/services/device-tier.d.ts.map +0 -1
- package/src/services/downloader.d.ts.map +0 -1
- package/src/services/engine.d.ts.map +0 -1
- package/src/services/external-scanner.d.ts.map +0 -1
- package/src/services/ffi-streaming-backend.d.ts.map +0 -1
- package/src/services/ffi-streaming-runner.d.ts.map +0 -1
- package/src/services/gpu-detect.d.ts.map +0 -1
- package/src/services/handler-registry.d.ts.map +0 -1
- package/src/services/hardware.d.ts.map +0 -1
- package/src/services/hf-search.d.ts +0 -26
- package/src/services/hf-search.d.ts.map +0 -1
- package/src/services/hf-search.test.ts +0 -69
- package/src/services/hf-search.ts +0 -420
- package/src/services/image-description-runtime.d.ts.map +0 -1
- package/src/services/imagegen/aosp-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/backend-selector.d.ts.map +0 -1
- package/src/services/imagegen/coreml-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/errors.d.ts.map +0 -1
- package/src/services/imagegen/index.d.ts.map +0 -1
- package/src/services/imagegen/mflux.d.ts.map +0 -1
- package/src/services/imagegen/sd-cpp.d.ts.map +0 -1
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/types.d.ts.map +0 -1
- package/src/services/index.d.ts.map +0 -1
- package/src/services/inference-capabilities.d.ts.map +0 -1
- package/src/services/inference-telemetry.d.ts.map +0 -1
- package/src/services/kv-spill.d.ts.map +0 -1
- package/src/services/latency-trace.d.ts.map +0 -1
- package/src/services/llm-streaming-binding.d.ts.map +0 -1
- package/src/services/load-args.d.ts.map +0 -1
- package/src/services/manifest/index.d.ts +0 -4
- package/src/services/manifest/index.d.ts.map +0 -1
- package/src/services/manifest/schema.d.ts.map +0 -1
- package/src/services/manifest/types.d.ts.map +0 -1
- package/src/services/manifest/validator.d.ts.map +0 -1
- package/src/services/memory-arbiter.d.ts.map +0 -1
- package/src/services/memory-monitor.d.ts.map +0 -1
- package/src/services/memory-pressure.d.ts.map +0 -1
- package/src/services/mtp-doctor.d.ts.map +0 -1
- package/src/services/network-policy.d.ts.map +0 -1
- package/src/services/paths.d.ts.map +0 -1
- package/src/services/planner-skeleton.d.ts.map +0 -1
- package/src/services/providers.d.ts.map +0 -1
- package/src/services/ram-budget.d.ts.map +0 -1
- package/src/services/readiness.d.ts.map +0 -1
- package/src/services/recommendation.d.ts.map +0 -1
- package/src/services/registry.d.ts.map +0 -1
- package/src/services/router-handler.d.ts.map +0 -1
- package/src/services/routing-policy.d.ts.map +0 -1
- package/src/services/routing-preferences.d.ts.map +0 -1
- package/src/services/runtime-target.d.ts.map +0 -1
- package/src/services/service.d.ts.map +0 -1
- package/src/services/session-pool.d.ts.map +0 -1
- package/src/services/structured-output/deterministic-repair.d.ts.map +0 -1
- package/src/services/structured-output.d.ts.map +0 -1
- package/src/services/system-memory.d.ts.map +0 -1
- package/src/services/types.d.ts.map +0 -1
- package/src/services/verify-on-device.d.ts.map +0 -1
- package/src/services/verify.d.ts.map +0 -1
- package/src/services/vision/aosp-unavailable.d.ts.map +0 -1
- package/src/services/vision/capacitor-llama.d.ts.map +0 -1
- package/src/services/vision/cloud-fallback.d.ts.map +0 -1
- package/src/services/vision/hash.d.ts.map +0 -1
- package/src/services/vision/index.d.ts.map +0 -1
- package/src/services/vision/llama-server.d.ts.map +0 -1
- package/src/services/vision/types.d.ts.map +0 -1
- package/src/services/vision/vast-fallback.d.ts.map +0 -1
- package/src/services/vision-embedding-cache.d.ts.map +0 -1
- package/src/services/voice/audio-frame-consumer.d.ts.map +0 -1
- package/src/services/voice/barge-in.d.ts.map +0 -1
- package/src/services/voice/cancellation-coordinator.d.ts.map +0 -1
- package/src/services/voice/checkpoint-manager.d.ts.map +0 -1
- package/src/services/voice/eager-context-builder.d.ts.map +0 -1
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/embedding.d.ts.map +0 -1
- package/src/services/voice/emotion-attribution.d.ts.map +0 -1
- package/src/services/voice/engine-bridge.d.ts.map +0 -1
- package/src/services/voice/eot-classifier-ggml.d.ts.map +0 -1
- package/src/services/voice/eot-classifier.d.ts.map +0 -1
- package/src/services/voice/errors.d.ts.map +0 -1
- package/src/services/voice/expressive-tags.d.ts.map +0 -1
- package/src/services/voice/ffi-bindings.d.ts.map +0 -1
- package/src/services/voice/first-line-cache.d.ts.map +0 -1
- package/src/services/voice/fused-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/index.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/phonemizer.d.ts.map +0 -1
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/runtime-selection.d.ts +0 -92
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +0 -1
- package/src/services/voice/kokoro/types.d.ts.map +0 -1
- package/src/services/voice/kokoro/voice-presets.d.ts.map +0 -1
- package/src/services/voice/kokoro/voices.d.ts.map +0 -1
- package/src/services/voice/lifecycle.d.ts.map +0 -1
- package/src/services/voice/live-diarization-session.d.ts +0 -96
- package/src/services/voice/live-diarization-session.d.ts.map +0 -1
- package/src/services/voice/mic-source.d.ts.map +0 -1
- package/src/services/voice/optimistic-policy.d.ts.map +0 -1
- package/src/services/voice/partial-stabilizer.d.ts.map +0 -1
- package/src/services/voice/phoneme-tokenizer.d.ts.map +0 -1
- package/src/services/voice/phrase-cache.d.ts.map +0 -1
- package/src/services/voice/phrase-chunker.d.ts.map +0 -1
- package/src/services/voice/pipeline-impls.d.ts.map +0 -1
- package/src/services/voice/pipeline.d.ts.map +0 -1
- package/src/services/voice/prefill-client.d.ts.map +0 -1
- package/src/services/voice/prefix-preserving-queue.d.ts.map +0 -1
- package/src/services/voice/profile-store.d.ts.map +0 -1
- package/src/services/voice/ring-buffer.d.ts.map +0 -1
- package/src/services/voice/rollback-queue.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +0 -1
- package/src/services/voice/scheduler.d.ts.map +0 -1
- package/src/services/voice/shared-resources.d.ts.map +0 -1
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder.d.ts.map +0 -1
- package/src/services/voice/speaker-imprint.d.ts.map +0 -1
- package/src/services/voice/speaker-preset-cache.d.ts.map +0 -1
- package/src/services/voice/system-audio-sink.d.ts.map +0 -1
- package/src/services/voice/transcriber.d.ts.map +0 -1
- package/src/services/voice/transcript-knowledge.d.ts.map +0 -1
- package/src/services/voice/transcript-service.d.ts.map +0 -1
- package/src/services/voice/transcript-store.d.ts.map +0 -1
- package/src/services/voice/turn-controller.d.ts.map +0 -1
- package/src/services/voice/types.d.ts.map +0 -1
- package/src/services/voice/vad.d.ts.map +0 -1
- package/src/services/voice/voice-budget.d.ts.map +0 -1
- package/src/services/voice/voice-emotion-classifier.d.ts.map +0 -1
- package/src/services/voice/voice-preset-format.d.ts.map +0 -1
- package/src/services/voice/voice-profile-artifact.d.ts.map +0 -1
- package/src/services/voice/voice-profile-routes.d.ts.map +0 -1
- package/src/services/voice/voice-settings.d.ts +0 -82
- package/src/services/voice/voice-settings.d.ts.map +0 -1
- package/src/services/voice/voice-settings.ts +0 -172
- package/src/services/voice/voice-state-machine.d.ts.map +0 -1
- package/src/services/voice/wake-word-ggml.d.ts.map +0 -1
- package/src/services/voice/wake-word.d.ts.map +0 -1
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +0 -1
- package/src/services/voice-model-updater.d.ts.map +0 -1
- package/src/services/voice-prewarm.d.ts.map +0 -1
- /package/{src → dist}/actions/generate-media.d.ts +0 -0
- /package/{src → dist}/actions/identify-speaker.d.ts +0 -0
- /package/{src → dist}/actions/transcription-control.d.ts +0 -0
- /package/{src → dist}/index.d.ts +0 -0
- /package/{src → dist}/provider.d.ts +0 -0
- /package/{src → dist}/routes/family-member-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-transcribe.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-compat-routes.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-tts-route.d.ts +0 -0
- /package/{src → dist}/routes/transcript-audio-store.d.ts +0 -0
- /package/{src → dist}/routes/voice-first-run-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-models-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profile-plugin-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profiles-management-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-speaker-profile-routes.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-manager-support.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-presets.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-warmup-policy.d.ts +0 -0
- /package/{src → dist}/services/bundled-models.d.ts +0 -0
- /package/{src → dist}/services/cache-bridge.d.ts +0 -0
- /package/{src → dist}/services/checkpoint-client.d.ts +0 -0
- /package/{src → dist}/services/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/conversation-registry.d.ts +0 -0
- /package/{src → dist}/services/device-bridge.d.ts +0 -0
- /package/{src → dist}/services/device-resource-metrics.d.ts +0 -0
- /package/{src → dist}/services/external-scanner.d.ts +0 -0
- /package/{src → dist}/services/gpu-detect.d.ts +0 -0
- /package/{src → dist}/services/handler-registry.d.ts +0 -0
- /package/{src → dist}/services/hardware.d.ts +0 -0
- /package/{src → dist}/services/image-description-runtime.d.ts +0 -0
- /package/{src → dist}/services/imagegen/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/backend-selector.d.ts +0 -0
- /package/{src → dist}/services/imagegen/coreml-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/errors.d.ts +0 -0
- /package/{src → dist}/services/imagegen/index.d.ts +0 -0
- /package/{src → dist}/services/imagegen/mflux.d.ts +0 -0
- /package/{src → dist}/services/imagegen/tensorrt-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/types.d.ts +0 -0
- /package/{src → dist}/services/inference-capabilities.d.ts +0 -0
- /package/{src → dist}/services/inference-telemetry.d.ts +0 -0
- /package/{src → dist}/services/kv-spill.d.ts +0 -0
- /package/{src → dist}/services/latency-trace.d.ts +0 -0
- /package/{src → dist}/services/llm-streaming-binding.d.ts +0 -0
- /package/{src → dist}/services/load-args.d.ts +0 -0
- /package/{src → dist}/services/manifest/validator.d.ts +0 -0
- /package/{src → dist}/services/memory-pressure.d.ts +0 -0
- /package/{src → dist}/services/mtp-doctor.d.ts +0 -0
- /package/{src → dist}/services/network-policy.d.ts +0 -0
- /package/{src → dist}/services/paths.d.ts +0 -0
- /package/{src → dist}/services/planner-skeleton.d.ts +0 -0
- /package/{src → dist}/services/providers.d.ts +0 -0
- /package/{src → dist}/services/ram-budget.d.ts +0 -0
- /package/{src → dist}/services/readiness.d.ts +0 -0
- /package/{src → dist}/services/recommendation.d.ts +0 -0
- /package/{src → dist}/services/routing-preferences.d.ts +0 -0
- /package/{src → dist}/services/runtime-target.d.ts +0 -0
- /package/{src → dist}/services/session-pool.d.ts +0 -0
- /package/{src → dist}/services/structured-output/deterministic-repair.d.ts +0 -0
- /package/{src → dist}/services/structured-output.d.ts +0 -0
- /package/{src → dist}/services/system-memory.d.ts +0 -0
- /package/{src → dist}/services/verify-on-device.d.ts +0 -0
- /package/{src → dist}/services/verify.d.ts +0 -0
- /package/{src → dist}/services/vision/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/vision/capacitor-llama.d.ts +0 -0
- /package/{src → dist}/services/vision/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/vision/hash.d.ts +0 -0
- /package/{src → dist}/services/vision/llama-server.d.ts +0 -0
- /package/{src → dist}/services/vision/vast-fallback.d.ts +0 -0
- /package/{src → dist}/services/voice/barge-in.d.ts +0 -0
- /package/{src → dist}/services/voice/cancellation-coordinator.d.ts +0 -0
- /package/{src → dist}/services/voice/checkpoint-manager.d.ts +0 -0
- /package/{src → dist}/services/voice/eager-context-builder.d.ts +0 -0
- /package/{src → dist}/services/voice/emotion-attribution.d.ts +0 -0
- /package/{src → dist}/services/voice/first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/kokoro-runtime.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/phonemizer.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/types.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voice-presets.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voices.d.ts +0 -0
- /package/{src → dist}/services/voice/lifecycle.d.ts +0 -0
- /package/{src → dist}/services/voice/optimistic-policy.d.ts +0 -0
- /package/{src → dist}/services/voice/phoneme-tokenizer.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-chunker.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline-impls.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/prefill-client.d.ts +0 -0
- /package/{src → dist}/services/voice/prefix-preserving-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/profile-store.d.ts +0 -0
- /package/{src → dist}/services/voice/ring-buffer.d.ts +0 -0
- /package/{src → dist}/services/voice/rollback-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-placeholder.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-regenerator.d.ts +0 -0
- /package/{src → dist}/services/voice/scheduler.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/attribution-pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-ggml.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-imprint.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-preset-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/system-audio-sink.d.ts +0 -0
- /package/{src → dist}/services/voice/transcript-knowledge.d.ts +0 -0
- /package/{src → dist}/services/voice/turn-controller.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-budget.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-emotion-classifier.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-artifact.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-routes.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-state-machine.d.ts +0 -0
- /package/{src → dist}/services/voice/wake-word.d.ts +0 -0
- /package/{src → dist}/services/voice/wrap-with-first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice-model-updater.d.ts +0 -0
- /package/{src → dist}/services/voice-prewarm.d.ts +0 -0
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
*/
|
|
17
17
|
|
|
18
18
|
import { describe, expect, it } from "vitest";
|
|
19
|
+
import type { HandleLiveVoiceAttributionOptions } from "../../runtime/voice-entity-binding";
|
|
19
20
|
import {
|
|
20
21
|
type AttributionPipelineLike,
|
|
21
22
|
AudioFrameConsumer,
|
|
@@ -23,8 +24,12 @@ import {
|
|
|
23
24
|
type AudioFrameEvent,
|
|
24
25
|
decodeAudioFramePcm,
|
|
25
26
|
type RuntimeEventSink,
|
|
27
|
+
type SelfVoiceSimilarityResolver,
|
|
28
|
+
type TurnTranscriber,
|
|
29
|
+
type VadSegmenter,
|
|
26
30
|
} from "./audio-frame-consumer";
|
|
27
31
|
import type { VoiceAttributionOutput } from "./speaker/attribution-pipeline";
|
|
32
|
+
import type { PcmFrame, VadEvent } from "./types";
|
|
28
33
|
import { VadDetector } from "./vad";
|
|
29
34
|
|
|
30
35
|
const SR = 16_000;
|
|
@@ -119,6 +124,9 @@ function makeFrame(opts: {
|
|
|
119
124
|
function buildHarness(
|
|
120
125
|
probs: readonly number[],
|
|
121
126
|
entityId: string | null = "entity-x",
|
|
127
|
+
transcribe?: TurnTranscriber,
|
|
128
|
+
attributionOptions?: Partial<HandleLiveVoiceAttributionOptions>,
|
|
129
|
+
resolveSelfVoiceSimilarity?: SelfVoiceSimilarityResolver,
|
|
122
130
|
) {
|
|
123
131
|
const silero = new ScriptedSilero(probs);
|
|
124
132
|
const vad = new VadDetector(silero, {
|
|
@@ -131,13 +139,20 @@ function buildHarness(
|
|
|
131
139
|
const pipeline = new FakePipeline(entityId);
|
|
132
140
|
const runtime = new FakeRuntime();
|
|
133
141
|
const consumer = new AudioFrameConsumer(
|
|
134
|
-
{
|
|
142
|
+
{
|
|
143
|
+
vad,
|
|
144
|
+
pipeline,
|
|
145
|
+
runtime,
|
|
146
|
+
...(transcribe ? { transcribe } : {}),
|
|
147
|
+
...(resolveSelfVoiceSimilarity ? { resolveSelfVoiceSimilarity } : {}),
|
|
148
|
+
},
|
|
135
149
|
{
|
|
136
150
|
source: { kind: "device", deviceId: "pixel" },
|
|
137
151
|
attributionOptions: {
|
|
138
152
|
ownerEntityId: "entity-x",
|
|
139
153
|
knownSpeakerEntityIds: ["entity-x"],
|
|
140
154
|
endOfTurnProbability: 0.95,
|
|
155
|
+
...attributionOptions,
|
|
141
156
|
},
|
|
142
157
|
preRollSeconds: 0, // deterministic buffering for assertions
|
|
143
158
|
maxTurnSeconds: 30,
|
|
@@ -146,6 +161,26 @@ function buildHarness(
|
|
|
146
161
|
return { consumer, pipeline, runtime, vad };
|
|
147
162
|
}
|
|
148
163
|
|
|
164
|
+
/** Feed one loud turn (speech then silence) and flush, so exactly one turn
|
|
165
|
+
* finalizes. Shared by the transcript-join tests. */
|
|
166
|
+
async function driveOneTurn(consumer: AudioFrameConsumer): Promise<void> {
|
|
167
|
+
let ts = 1000;
|
|
168
|
+
let idx = 0;
|
|
169
|
+
for (let i = 0; i < 40; i++) {
|
|
170
|
+
await consumer.onAudioFrame(
|
|
171
|
+
makeFrame({ amplitude: 0.6, timestamp: ts, frameIndex: idx++ }),
|
|
172
|
+
);
|
|
173
|
+
ts += 20;
|
|
174
|
+
}
|
|
175
|
+
for (let i = 0; i < 24; i++) {
|
|
176
|
+
await consumer.onAudioFrame(
|
|
177
|
+
makeFrame({ amplitude: 0.0, timestamp: ts, frameIndex: idx++ }),
|
|
178
|
+
);
|
|
179
|
+
ts += 20;
|
|
180
|
+
}
|
|
181
|
+
await consumer.flush();
|
|
182
|
+
}
|
|
183
|
+
|
|
149
184
|
describe("decodeAudioFramePcm", () => {
|
|
150
185
|
it("decodes base64 LE-s16 mono → Float32 [-1,1]", () => {
|
|
151
186
|
const frame = makeFrame({ amplitude: 0.5, timestamp: 0, frameIndex: 0 });
|
|
@@ -284,6 +319,41 @@ describe("AudioFrameConsumer", () => {
|
|
|
284
319
|
expect(metaSignal).toBeTruthy();
|
|
285
320
|
});
|
|
286
321
|
|
|
322
|
+
it("passes live selfVoiceSimilarity into the gate and suppresses agent echo", async () => {
|
|
323
|
+
const probs = [...Array(24).fill(0.9), ...Array(12).fill(0.0)];
|
|
324
|
+
const resolveSelfVoiceSimilarity: SelfVoiceSimilarityResolver = (
|
|
325
|
+
embedding,
|
|
326
|
+
output,
|
|
327
|
+
) => {
|
|
328
|
+
expect(embedding).toBe(output.observation?.embedding);
|
|
329
|
+
expect(embedding.length).toBe(256);
|
|
330
|
+
return 0.91;
|
|
331
|
+
};
|
|
332
|
+
const { consumer } = buildHarness(
|
|
333
|
+
probs,
|
|
334
|
+
"entity-x",
|
|
335
|
+
undefined,
|
|
336
|
+
{ agentSpeaking: true },
|
|
337
|
+
resolveSelfVoiceSimilarity,
|
|
338
|
+
);
|
|
339
|
+
let signal: {
|
|
340
|
+
agentShouldSpeak: boolean | null;
|
|
341
|
+
nextSpeaker: string;
|
|
342
|
+
metadata?: { provenance?: string; selfVoiceSimilarity?: number };
|
|
343
|
+
} | null = null;
|
|
344
|
+
consumer.onTurn((t) => {
|
|
345
|
+
signal = t.signal as typeof signal;
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
await driveOneTurn(consumer);
|
|
349
|
+
|
|
350
|
+
expect(signal).not.toBeNull();
|
|
351
|
+
expect(signal?.agentShouldSpeak).toBe(false);
|
|
352
|
+
expect(signal?.nextSpeaker).toBe("user");
|
|
353
|
+
expect(signal?.metadata?.provenance).toBe("voice-bridge+self-voice");
|
|
354
|
+
expect(signal?.metadata?.selfVoiceSimilarity).toBeCloseTo(0.91);
|
|
355
|
+
});
|
|
356
|
+
|
|
287
357
|
it("does not segment a turn from pure silence", async () => {
|
|
288
358
|
const { consumer, pipeline } = buildHarness(Array(40).fill(0.0));
|
|
289
359
|
let ts = 1000;
|
|
@@ -341,3 +411,259 @@ describe("AudioFrameConsumer", () => {
|
|
|
341
411
|
}
|
|
342
412
|
});
|
|
343
413
|
});
|
|
414
|
+
|
|
415
|
+
describe("AudioFrameConsumer — ASR transcript join (#8786)", () => {
|
|
416
|
+
const TURN_PROBS = [...Array(24).fill(0.9), ...Array(12).fill(0.0)];
|
|
417
|
+
|
|
418
|
+
it("joins the per-turn ASR transcript onto VOICE_TURN_OBSERVED", async () => {
|
|
419
|
+
const seen: Array<{ length: number; sampleRate: number }> = [];
|
|
420
|
+
const transcribe: TurnTranscriber = (pcm, sampleRate) => {
|
|
421
|
+
seen.push({ length: pcm.length, sampleRate });
|
|
422
|
+
return " I'm Jill ";
|
|
423
|
+
};
|
|
424
|
+
const { consumer, runtime } = buildHarness(
|
|
425
|
+
TURN_PROBS,
|
|
426
|
+
"entity-x",
|
|
427
|
+
transcribe,
|
|
428
|
+
);
|
|
429
|
+
await driveOneTurn(consumer);
|
|
430
|
+
|
|
431
|
+
// The transcriber saw the real buffered turn PCM at 16 kHz.
|
|
432
|
+
expect(seen.length).toBe(1);
|
|
433
|
+
expect(seen[0].sampleRate).toBe(16_000);
|
|
434
|
+
expect(seen[0].length).toBeGreaterThan(SR * 0.4);
|
|
435
|
+
// VOICE_TURN_OBSERVED now carries the trimmed transcript (was "" before).
|
|
436
|
+
expect(runtime.emitted.length).toBe(1);
|
|
437
|
+
expect(runtime.emitted[0].payload.text).toBe("I'm Jill");
|
|
438
|
+
});
|
|
439
|
+
|
|
440
|
+
it("stays diarization-only (empty text) when no transcriber is wired", async () => {
|
|
441
|
+
const { consumer, runtime } = buildHarness(TURN_PROBS, "entity-x");
|
|
442
|
+
await driveOneTurn(consumer);
|
|
443
|
+
expect(runtime.emitted.length).toBe(1);
|
|
444
|
+
expect(runtime.emitted[0].payload.text).toBe("");
|
|
445
|
+
});
|
|
446
|
+
|
|
447
|
+
it("degrades to a transcript-less turn when ASR throws (turn kept)", async () => {
|
|
448
|
+
const transcribe: TurnTranscriber = () => {
|
|
449
|
+
throw new Error("asr decode failed");
|
|
450
|
+
};
|
|
451
|
+
const { consumer, runtime } = buildHarness(
|
|
452
|
+
TURN_PROBS,
|
|
453
|
+
"entity-x",
|
|
454
|
+
transcribe,
|
|
455
|
+
);
|
|
456
|
+
await driveOneTurn(consumer);
|
|
457
|
+
// The diarized turn still emits; only the transcript is dropped, counted.
|
|
458
|
+
expect(runtime.emitted.length).toBe(1);
|
|
459
|
+
expect(runtime.emitted[0].payload.text).toBe("");
|
|
460
|
+
expect(consumer.transcriptionErrors).toBe(1);
|
|
461
|
+
});
|
|
462
|
+
|
|
463
|
+
it("ignores an empty/whitespace transcript (no text stamped)", async () => {
|
|
464
|
+
const transcribe: TurnTranscriber = () => " ";
|
|
465
|
+
const { consumer, runtime } = buildHarness(
|
|
466
|
+
TURN_PROBS,
|
|
467
|
+
"entity-x",
|
|
468
|
+
transcribe,
|
|
469
|
+
);
|
|
470
|
+
await driveOneTurn(consumer);
|
|
471
|
+
expect(runtime.emitted.length).toBe(1);
|
|
472
|
+
expect(runtime.emitted[0].payload.text).toBe("");
|
|
473
|
+
});
|
|
474
|
+
});
|
|
475
|
+
|
|
476
|
+
// --- echo cancellation wiring (#9455) ----------------------------------------
|
|
477
|
+
|
|
478
|
+
/** VadSegmenter that just records every frame the consumer pushes downstream. */
|
|
479
|
+
class RecordingVad implements VadSegmenter {
|
|
480
|
+
readonly frames: Float32Array[] = [];
|
|
481
|
+
get inSpeech(): boolean {
|
|
482
|
+
return false;
|
|
483
|
+
}
|
|
484
|
+
onVadEvent(_listener: (event: VadEvent) => void): () => void {
|
|
485
|
+
return () => {};
|
|
486
|
+
}
|
|
487
|
+
async pushFrame(frame: PcmFrame): Promise<void> {
|
|
488
|
+
this.frames.push(frame.pcm);
|
|
489
|
+
}
|
|
490
|
+
async flush(): Promise<void> {}
|
|
491
|
+
reset(): void {}
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
describe("AudioFrameConsumer — echo cancellation (#9455)", () => {
|
|
495
|
+
const SR = 16000;
|
|
496
|
+
const BLOCK = 320;
|
|
497
|
+
function farSignal(n: number, seed = 1): Float32Array {
|
|
498
|
+
const x = new Float32Array(n);
|
|
499
|
+
let s = seed >>> 0;
|
|
500
|
+
let p1 = 0;
|
|
501
|
+
let p2 = 0;
|
|
502
|
+
for (let i = 0; i < n; i++) {
|
|
503
|
+
s = (s * 1103515245 + 12345) & 0x7fffffff;
|
|
504
|
+
const w = s / 0x3fffffff - 1;
|
|
505
|
+
p1 = 0.92 * p1 + 0.08 * w;
|
|
506
|
+
p2 = 0.85 * p2 + 0.15 * p1;
|
|
507
|
+
x[i] = p2 * 3;
|
|
508
|
+
}
|
|
509
|
+
return x;
|
|
510
|
+
}
|
|
511
|
+
function echoOf(x: Float32Array): Float32Array {
|
|
512
|
+
const delay = 35;
|
|
513
|
+
const tail = 90;
|
|
514
|
+
const h = new Float32Array(delay + tail);
|
|
515
|
+
for (let k = 0; k < tail; k++)
|
|
516
|
+
h[delay + k] = Math.exp(-k / 25) * (k % 2 ? -0.6 : 0.8) * 0.22;
|
|
517
|
+
const y = new Float32Array(x.length);
|
|
518
|
+
for (let n = 0; n < x.length; n++) {
|
|
519
|
+
let acc = 0;
|
|
520
|
+
for (let k = 0; k < h.length; k++) if (n - k >= 0) acc += h[k] * x[n - k];
|
|
521
|
+
y[n] = acc;
|
|
522
|
+
}
|
|
523
|
+
return y;
|
|
524
|
+
}
|
|
525
|
+
const power = (a: Float32Array) =>
|
|
526
|
+
a.reduce((p, v) => p + v * v, 0) / Math.max(1, a.length);
|
|
527
|
+
|
|
528
|
+
function makeConsumer(vad: RecordingVad, far: Float32Array | null) {
|
|
529
|
+
return new AudioFrameConsumer(
|
|
530
|
+
{
|
|
531
|
+
vad,
|
|
532
|
+
pipeline: new FakePipeline("e"),
|
|
533
|
+
runtime: new FakeRuntime(),
|
|
534
|
+
...(far
|
|
535
|
+
? {
|
|
536
|
+
echoReference: (ts: number, samples: number) => {
|
|
537
|
+
const off = Math.round((ts - 1000) / 20) * BLOCK;
|
|
538
|
+
return far.subarray(off, off + samples);
|
|
539
|
+
},
|
|
540
|
+
}
|
|
541
|
+
: {}),
|
|
542
|
+
},
|
|
543
|
+
{ source: { kind: "device", deviceId: "pixel" }, preRollSeconds: 0 },
|
|
544
|
+
);
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
it("cancels the agent's echo on the mic before VAD when echoReference is wired", async () => {
|
|
548
|
+
const N = SR * 3;
|
|
549
|
+
const far = farSignal(N);
|
|
550
|
+
const echo = echoOf(far); // the agent's TTS leaking into the mic
|
|
551
|
+
const vad = new RecordingVad();
|
|
552
|
+
const consumer = makeConsumer(vad, far);
|
|
553
|
+
let ts = 1000;
|
|
554
|
+
for (let off = 0; off + BLOCK <= N; off += BLOCK) {
|
|
555
|
+
await consumer.pushDecodedFrame(echo.subarray(off, off + BLOCK), ts);
|
|
556
|
+
ts += 20;
|
|
557
|
+
}
|
|
558
|
+
// after convergence, the recorded (post-AEC) frames carry far less echo
|
|
559
|
+
// energy than the raw mic frames did.
|
|
560
|
+
const lateOut = vad.frames[vad.frames.length - 1];
|
|
561
|
+
const lateRawOff = (vad.frames.length - 1) * BLOCK;
|
|
562
|
+
const lateRaw = echo.subarray(lateRawOff, lateRawOff + BLOCK);
|
|
563
|
+
expect(power(lateOut)).toBeLessThan(power(lateRaw) * 0.1); // >10 dB
|
|
564
|
+
});
|
|
565
|
+
|
|
566
|
+
it("leaves the mic untouched when no echoReference is wired", async () => {
|
|
567
|
+
const vad = new RecordingVad();
|
|
568
|
+
const consumer = makeConsumer(vad, null);
|
|
569
|
+
const frame = farSignal(BLOCK, 7);
|
|
570
|
+
await consumer.pushDecodedFrame(frame, 1000);
|
|
571
|
+
expect(Array.from(vad.frames[0])).toEqual(Array.from(frame));
|
|
572
|
+
});
|
|
573
|
+
|
|
574
|
+
it("skips the canceller entirely while the agent is silent (#9649 fast path)", async () => {
|
|
575
|
+
// The reference provider returns far PCM while the agent plays, then null
|
|
576
|
+
// once it stops. Frames during silence must be EXACT passthrough — proving
|
|
577
|
+
// the canceller is not invoked at all (so it can't subtract a stale echo
|
|
578
|
+
// estimate against converged weights) — and must not increment the
|
|
579
|
+
// cancelled-frame counter.
|
|
580
|
+
const N = SR * 2;
|
|
581
|
+
const far = farSignal(N);
|
|
582
|
+
const echo = echoOf(far);
|
|
583
|
+
const PLAYBACK_FRAMES = 40; // agent plays for the first 40 frames, then stops
|
|
584
|
+
const vad = new RecordingVad();
|
|
585
|
+
const consumer = new AudioFrameConsumer(
|
|
586
|
+
{
|
|
587
|
+
vad,
|
|
588
|
+
pipeline: new FakePipeline("skip"),
|
|
589
|
+
runtime: new FakeRuntime(),
|
|
590
|
+
echoReference: (ts: number, samples: number) => {
|
|
591
|
+
const idx = Math.round((ts - 1000) / 20);
|
|
592
|
+
if (idx >= PLAYBACK_FRAMES) return null; // agent silent
|
|
593
|
+
const off = idx * BLOCK;
|
|
594
|
+
return far.subarray(off, off + samples);
|
|
595
|
+
},
|
|
596
|
+
},
|
|
597
|
+
{ source: { kind: "device", deviceId: "pixel" }, preRollSeconds: 0 },
|
|
598
|
+
);
|
|
599
|
+
|
|
600
|
+
// Mic carries echo while the agent plays, then pure (distinct) near speech.
|
|
601
|
+
const nearSilentEra = farSignal(N, 555);
|
|
602
|
+
let ts = 1000;
|
|
603
|
+
let frameIdx = 0;
|
|
604
|
+
const silentInputs: Float32Array[] = [];
|
|
605
|
+
for (let off = 0; off + BLOCK <= N; off += BLOCK, frameIdx++) {
|
|
606
|
+
const mic =
|
|
607
|
+
frameIdx < PLAYBACK_FRAMES
|
|
608
|
+
? echo.subarray(off, off + BLOCK)
|
|
609
|
+
: nearSilentEra.subarray(off, off + BLOCK);
|
|
610
|
+
if (frameIdx >= PLAYBACK_FRAMES) silentInputs.push(mic);
|
|
611
|
+
await consumer.pushDecodedFrame(mic, ts);
|
|
612
|
+
ts += 20;
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// Only the playback frames were cancelled; silent frames took the fast path.
|
|
616
|
+
expect(consumer.echoFramesCancelled).toBe(PLAYBACK_FRAMES);
|
|
617
|
+
|
|
618
|
+
// Every silent-era frame is bit-identical to its input (no canceller touch).
|
|
619
|
+
const silentOutputs = vad.frames.slice(PLAYBACK_FRAMES);
|
|
620
|
+
expect(silentOutputs.length).toBe(silentInputs.length);
|
|
621
|
+
for (let i = 0; i < silentOutputs.length; i++) {
|
|
622
|
+
expect(Array.from(silentOutputs[i])).toEqual(Array.from(silentInputs[i]));
|
|
623
|
+
}
|
|
624
|
+
});
|
|
625
|
+
|
|
626
|
+
it("clears stale far-end state before playback resumes after silence", async () => {
|
|
627
|
+
const PLAYBACK_FRAMES = 40;
|
|
628
|
+
const SILENT_FRAMES = 5;
|
|
629
|
+
const restartFrame = PLAYBACK_FRAMES + SILENT_FRAMES;
|
|
630
|
+
const totalFrames = restartFrame + 1;
|
|
631
|
+
const N = totalFrames * BLOCK;
|
|
632
|
+
const far = farSignal(N);
|
|
633
|
+
const echo = echoOf(far);
|
|
634
|
+
const zeroReference = new Float32Array(BLOCK);
|
|
635
|
+
const zeroMic = new Float32Array(BLOCK);
|
|
636
|
+
const vad = new RecordingVad();
|
|
637
|
+
const consumer = new AudioFrameConsumer(
|
|
638
|
+
{
|
|
639
|
+
vad,
|
|
640
|
+
pipeline: new FakePipeline("restart"),
|
|
641
|
+
runtime: new FakeRuntime(),
|
|
642
|
+
echoReference: (ts: number, samples: number) => {
|
|
643
|
+
const idx = Math.round((ts - 1000) / 20);
|
|
644
|
+
if (idx < PLAYBACK_FRAMES) {
|
|
645
|
+
const off = idx * BLOCK;
|
|
646
|
+
return far.subarray(off, off + samples);
|
|
647
|
+
}
|
|
648
|
+
if (idx < restartFrame) return null;
|
|
649
|
+
return zeroReference.subarray(0, samples);
|
|
650
|
+
},
|
|
651
|
+
},
|
|
652
|
+
{ source: { kind: "device", deviceId: "pixel" }, preRollSeconds: 0 },
|
|
653
|
+
);
|
|
654
|
+
|
|
655
|
+
let ts = 1000;
|
|
656
|
+
for (let frameIdx = 0; frameIdx < totalFrames; frameIdx++) {
|
|
657
|
+
const off = frameIdx * BLOCK;
|
|
658
|
+
const mic =
|
|
659
|
+
frameIdx < PLAYBACK_FRAMES ? echo.subarray(off, off + BLOCK) : zeroMic;
|
|
660
|
+
await consumer.pushDecodedFrame(mic, ts);
|
|
661
|
+
ts += 20;
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
// The post-silence non-empty reference frame should not inherit any
|
|
665
|
+
// stale far-end samples from the previous playback burst.
|
|
666
|
+
expect(consumer.echoFramesCancelled).toBe(PLAYBACK_FRAMES + 1);
|
|
667
|
+
expect(Array.from(vad.frames[restartFrame])).toEqual(Array.from(zeroMic));
|
|
668
|
+
});
|
|
669
|
+
});
|
|
@@ -36,6 +36,10 @@ import {
|
|
|
36
36
|
handleLiveVoiceAttribution,
|
|
37
37
|
} from "../../runtime/voice-entity-binding.js";
|
|
38
38
|
import type { VoiceTurnSignal } from "./eot-classifier.js";
|
|
39
|
+
import {
|
|
40
|
+
NlmsEchoCanceller,
|
|
41
|
+
type ResidualSuppressionOptions,
|
|
42
|
+
} from "./nlms-echo-canceller.js";
|
|
39
43
|
import type {
|
|
40
44
|
VoiceAttributionOutput,
|
|
41
45
|
VoiceAttributionPipeline,
|
|
@@ -176,8 +180,36 @@ export interface AttributionPipelineLike {
|
|
|
176
180
|
*/
|
|
177
181
|
export interface RuntimeEventSink {
|
|
178
182
|
emitEvent(type: unknown, payload: Record<string, unknown>): Promise<void>;
|
|
183
|
+
/**
|
|
184
|
+
* Optional host-supplied far-end (agent TTS playback) reference for the live
|
|
185
|
+
* AEC path (#9583). When a host wires this, the live diarization route threads
|
|
186
|
+
* it into the session's NLMS echo canceller instead of relying on the
|
|
187
|
+
* playback-frames ingest route. Absent on headless/core runtimes.
|
|
188
|
+
*/
|
|
189
|
+
voiceEchoReferenceProvider?: EchoReferenceProvider;
|
|
179
190
|
}
|
|
180
191
|
|
|
192
|
+
/**
|
|
193
|
+
* Transcribe a finalized turn's buffered PCM to text (#8786). When injected, the
|
|
194
|
+
* consumer joins the ASR transcript into the diarization attribution so
|
|
195
|
+
* `VOICE_TURN_OBSERVED` carries the real text — previously the live audio-frame
|
|
196
|
+
* path attributed *who* spoke but always emitted `text: ""`, so name/partner
|
|
197
|
+
* extraction (`VoiceObserver.ingestTurn`) could never fire from live audio.
|
|
198
|
+
*
|
|
199
|
+
* Returns the transcript, or `null`/empty for silence / no decode. Best-effort:
|
|
200
|
+
* the consumer swallows a rejection (counted in `transcriptionErrors`) and falls
|
|
201
|
+
* back to a transcript-less turn rather than dropping the diarized turn.
|
|
202
|
+
*/
|
|
203
|
+
export type TurnTranscriber = (
|
|
204
|
+
pcm: Float32Array,
|
|
205
|
+
sampleRate: number,
|
|
206
|
+
) => Promise<string | null> | string | null;
|
|
207
|
+
|
|
208
|
+
export type SelfVoiceSimilarityResolver = (
|
|
209
|
+
embedding: Float32Array,
|
|
210
|
+
output: VoiceAttributionOutput,
|
|
211
|
+
) => Promise<number | null | undefined> | number | null | undefined;
|
|
212
|
+
|
|
181
213
|
// ---------------------------------------------------------------------------
|
|
182
214
|
// Consumer
|
|
183
215
|
// ---------------------------------------------------------------------------
|
|
@@ -189,8 +221,39 @@ export interface AudioFrameConsumerDeps {
|
|
|
189
221
|
pipeline: AttributionPipelineLike;
|
|
190
222
|
/** Runtime event sink for VOICE_TURN_OBSERVED. */
|
|
191
223
|
runtime: RuntimeEventSink;
|
|
224
|
+
/**
|
|
225
|
+
* Optional ASR for the finalized turn's PCM (#8786). When present, its text
|
|
226
|
+
* rides on `VOICE_TURN_OBSERVED` so live name/entity extraction runs. When
|
|
227
|
+
* absent the path stays diarization-only (transcript `""`, as before).
|
|
228
|
+
*/
|
|
229
|
+
transcribe?: TurnTranscriber;
|
|
230
|
+
/**
|
|
231
|
+
* Optional live acoustic self-voice resolver. When wired, the consumer passes
|
|
232
|
+
* the turn's WeSpeaker embedding to the host's agent-TTS centroid matcher and
|
|
233
|
+
* forwards the resulting cosine into the ambient gate.
|
|
234
|
+
*/
|
|
235
|
+
resolveSelfVoiceSimilarity?: SelfVoiceSimilarityResolver;
|
|
236
|
+
/**
|
|
237
|
+
* Optional agent-playback (far-end) reference for acoustic echo cancellation
|
|
238
|
+
* (#9455). Given a mic frame's clock timestamp and sample count, returns the
|
|
239
|
+
* agent's TTS playback PCM for that exact window (Float32 16 kHz), or null
|
|
240
|
+
* when the agent is not playing. When wired, the consumer runs an NLMS echo
|
|
241
|
+
* canceller on every mic frame BEFORE VAD/attribution so the agent never
|
|
242
|
+
* transcribes its own TTS. Absent → no AEC (unchanged behavior). The caller
|
|
243
|
+
* owns the playback capture + the playback→mic delay calibration.
|
|
244
|
+
*/
|
|
245
|
+
echoReference?: EchoReferenceProvider;
|
|
192
246
|
}
|
|
193
247
|
|
|
248
|
+
/**
|
|
249
|
+
* Returns the agent's TTS playback PCM (the far-end echo reference) aligned to a
|
|
250
|
+
* mic frame's time window, or null when the agent is silent. See #9455.
|
|
251
|
+
*/
|
|
252
|
+
export type EchoReferenceProvider = (
|
|
253
|
+
timestampMs: number,
|
|
254
|
+
samples: number,
|
|
255
|
+
) => Float32Array | null;
|
|
256
|
+
|
|
194
257
|
export interface AudioFrameConsumerConfig {
|
|
195
258
|
/** Source metadata stamped onto every attributed turn. */
|
|
196
259
|
source?: VoiceInputSource;
|
|
@@ -208,6 +271,12 @@ export interface AudioFrameConsumerConfig {
|
|
|
208
271
|
* out of the attribution buffer. Default 0.3 s.
|
|
209
272
|
*/
|
|
210
273
|
preRollSeconds?: number;
|
|
274
|
+
/**
|
|
275
|
+
* Opt-in nonlinear residual-echo suppressor forwarded to the NLMS canceller
|
|
276
|
+
* (#9583/#9649). Default-off; only meaningful when an `echoReference` is wired
|
|
277
|
+
* (no canceller exists otherwise). See {@link NlmsEchoCancellerOptions.residualSuppression}.
|
|
278
|
+
*/
|
|
279
|
+
residualSuppression?: boolean | ResidualSuppressionOptions;
|
|
211
280
|
}
|
|
212
281
|
|
|
213
282
|
/** A finalized, attributed turn the consumer surfaces to its caller. */
|
|
@@ -237,6 +306,11 @@ export class AudioFrameConsumer {
|
|
|
237
306
|
private readonly vad: VadSegmenter;
|
|
238
307
|
private readonly pipeline: AttributionPipelineLike;
|
|
239
308
|
private readonly runtime: RuntimeEventSink;
|
|
309
|
+
private readonly transcribe: TurnTranscriber | null;
|
|
310
|
+
private readonly resolveSelfVoiceSimilarity: SelfVoiceSimilarityResolver | null;
|
|
311
|
+
private readonly echoReference: EchoReferenceProvider | null;
|
|
312
|
+
/** NLMS echo canceller, instantiated only when an `echoReference` is wired. */
|
|
313
|
+
private readonly echoCanceller: NlmsEchoCanceller | null;
|
|
240
314
|
private readonly source: VoiceInputSource | undefined;
|
|
241
315
|
private readonly attributionOptions: HandleLiveVoiceAttributionOptions;
|
|
242
316
|
private readonly maxTurnSamples: number;
|
|
@@ -261,6 +335,15 @@ export class AudioFrameConsumer {
|
|
|
261
335
|
/** Count of frames that failed to decode (surfaced via getters, not thrown). */
|
|
262
336
|
droppedFrames = 0;
|
|
263
337
|
|
|
338
|
+
/** Count of turns whose ASR transcribe threw (degraded to a transcript-less
|
|
339
|
+
* turn rather than dropping the diarized turn). */
|
|
340
|
+
transcriptionErrors = 0;
|
|
341
|
+
|
|
342
|
+
/** Count of mic frames the echo canceller actually processed (i.e. the agent
|
|
343
|
+
* was playing). Frames skipped while the agent is silent do not count, so
|
|
344
|
+
* this also measures how often AEC took the cheap passthrough path. */
|
|
345
|
+
echoFramesCancelled = 0;
|
|
346
|
+
|
|
264
347
|
constructor(
|
|
265
348
|
deps: AudioFrameConsumerDeps,
|
|
266
349
|
config: AudioFrameConsumerConfig = {},
|
|
@@ -268,6 +351,16 @@ export class AudioFrameConsumer {
|
|
|
268
351
|
this.vad = deps.vad;
|
|
269
352
|
this.pipeline = deps.pipeline;
|
|
270
353
|
this.runtime = deps.runtime;
|
|
354
|
+
this.transcribe = deps.transcribe ?? null;
|
|
355
|
+
this.resolveSelfVoiceSimilarity = deps.resolveSelfVoiceSimilarity ?? null;
|
|
356
|
+
this.echoReference = deps.echoReference ?? null;
|
|
357
|
+
this.echoCanceller = this.echoReference
|
|
358
|
+
? new NlmsEchoCanceller(
|
|
359
|
+
config.residualSuppression
|
|
360
|
+
? { residualSuppression: config.residualSuppression }
|
|
361
|
+
: {},
|
|
362
|
+
)
|
|
363
|
+
: null;
|
|
271
364
|
this.source = config.source;
|
|
272
365
|
this.attributionOptions = config.attributionOptions ?? {};
|
|
273
366
|
const sr = AUDIO_FRAME_PIPELINE_SAMPLE_RATE;
|
|
@@ -329,20 +422,44 @@ export class AudioFrameConsumer {
|
|
|
329
422
|
timestampMs: number,
|
|
330
423
|
): Promise<void> {
|
|
331
424
|
if (this.closed) return;
|
|
425
|
+
// #9455/#9649: cancel the agent's TTS echo before VAD/attribution so the
|
|
426
|
+
// agent never transcribes its own playback. When the reference provider
|
|
427
|
+
// returns null/empty the agent is silent — skip the FIR canceller so AEC
|
|
428
|
+
// is cheap and exactly passthrough on the common no-playback path.
|
|
429
|
+
const micPcm = this.cancelEcho(pcm, timestampMs);
|
|
332
430
|
this.lastFrameEndMs =
|
|
333
|
-
timestampMs + (
|
|
431
|
+
timestampMs + (micPcm.length / AUDIO_FRAME_PIPELINE_SAMPLE_RATE) * 1000;
|
|
334
432
|
if (this.capturing) {
|
|
335
|
-
this.appendTurnChunk(
|
|
433
|
+
this.appendTurnChunk(micPcm);
|
|
336
434
|
} else {
|
|
337
|
-
this.appendPreRoll(
|
|
435
|
+
this.appendPreRoll(micPcm);
|
|
338
436
|
}
|
|
339
437
|
await this.vad.pushFrame({
|
|
340
|
-
pcm,
|
|
438
|
+
pcm: micPcm,
|
|
341
439
|
sampleRate: AUDIO_FRAME_PIPELINE_SAMPLE_RATE,
|
|
342
440
|
timestampMs,
|
|
343
441
|
});
|
|
344
442
|
}
|
|
345
443
|
|
|
444
|
+
/**
|
|
445
|
+
* Run the echo canceller on one mic frame when (and only when) the agent is
|
|
446
|
+
* playing. The reference provider returns null while the agent is silent, in
|
|
447
|
+
* which case the mic frame is passed through verbatim and the FIR
|
|
448
|
+
* `process()` loop is not invoked. The canceller still observes the silent
|
|
449
|
+
* far-end so stale playback history is cleared before playback resumes.
|
|
450
|
+
* Returns the echo-cancelled (or untouched) mic frame.
|
|
451
|
+
*/
|
|
452
|
+
private cancelEcho(pcm: Float32Array, timestampMs: number): Float32Array {
|
|
453
|
+
if (!this.echoCanceller || !this.echoReference) return pcm;
|
|
454
|
+
const reference = this.echoReference(timestampMs, pcm.length);
|
|
455
|
+
if (!reference || reference.length === 0) {
|
|
456
|
+
this.echoCanceller.observeFarEndSilence(pcm);
|
|
457
|
+
return pcm;
|
|
458
|
+
}
|
|
459
|
+
this.echoFramesCancelled += 1;
|
|
460
|
+
return this.echoCanceller.process(pcm, reference);
|
|
461
|
+
}
|
|
462
|
+
|
|
346
463
|
/**
|
|
347
464
|
* Flush the VAD (finalize any open segment) and await all pending
|
|
348
465
|
* attribution. Call at end-of-capture so a trailing utterance is not lost.
|
|
@@ -426,10 +543,15 @@ export class AudioFrameConsumer {
|
|
|
426
543
|
endedAtMs: args.endedAtMs,
|
|
427
544
|
...(this.source ? { source: this.source } : {}),
|
|
428
545
|
});
|
|
546
|
+
// Join the ASR transcript for this turn (#8786) so VOICE_TURN_OBSERVED
|
|
547
|
+
// carries the real text and live name/entity extraction can fire. ASR is
|
|
548
|
+
// best-effort: a decode failure degrades to a transcript-less turn (the
|
|
549
|
+
// diarized speaker is still emitted), never a dropped turn.
|
|
550
|
+
const opts = await this.resolveTurnOptions(args.pcm, output);
|
|
429
551
|
const signal = await handleLiveVoiceAttribution(
|
|
430
552
|
this.runtime as Parameters<typeof handleLiveVoiceAttribution>[0],
|
|
431
553
|
output,
|
|
432
|
-
|
|
554
|
+
opts,
|
|
433
555
|
);
|
|
434
556
|
const turn: AttributedTurn = {
|
|
435
557
|
turnId: args.turnId,
|
|
@@ -442,6 +564,44 @@ export class AudioFrameConsumer {
|
|
|
442
564
|
for (const listener of this.turnListeners) listener(turn);
|
|
443
565
|
}
|
|
444
566
|
|
|
567
|
+
/**
|
|
568
|
+
* Merge the per-turn ASR transcript into the attribution options. Returns the
|
|
569
|
+
* base options unchanged when no transcriber is wired or the decode yields no
|
|
570
|
+
* text; a thrown decode is swallowed (counted in `transcriptionErrors`) so a
|
|
571
|
+
* diarized turn is never dropped over an ASR failure.
|
|
572
|
+
*/
|
|
573
|
+
private async resolveTurnOptions(
|
|
574
|
+
pcm: Float32Array,
|
|
575
|
+
output: VoiceAttributionOutput,
|
|
576
|
+
): Promise<HandleLiveVoiceAttributionOptions> {
|
|
577
|
+
let options = this.attributionOptions;
|
|
578
|
+
try {
|
|
579
|
+
if (this.transcribe) {
|
|
580
|
+
const transcript = await this.transcribe(
|
|
581
|
+
pcm,
|
|
582
|
+
AUDIO_FRAME_PIPELINE_SAMPLE_RATE,
|
|
583
|
+
);
|
|
584
|
+
const trimmed = transcript?.trim();
|
|
585
|
+
if (trimmed) {
|
|
586
|
+
options = { ...options, transcript: trimmed };
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
} catch {
|
|
590
|
+
this.transcriptionErrors += 1;
|
|
591
|
+
}
|
|
592
|
+
const embedding = output.observation?.embedding;
|
|
593
|
+
if (this.resolveSelfVoiceSimilarity && embedding) {
|
|
594
|
+
const similarity = await this.resolveSelfVoiceSimilarity(
|
|
595
|
+
embedding,
|
|
596
|
+
output,
|
|
597
|
+
);
|
|
598
|
+
if (typeof similarity === "number" && Number.isFinite(similarity)) {
|
|
599
|
+
options = { ...options, selfVoiceSimilarity: similarity };
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
return options;
|
|
603
|
+
}
|
|
604
|
+
|
|
445
605
|
// ---- buffering ---------------------------------------------------------
|
|
446
606
|
|
|
447
607
|
private appendTurnChunk(pcm: Float32Array): void {
|
|
@@ -87,9 +87,8 @@ function tripToken(
|
|
|
87
87
|
token: BargeInCancelToken,
|
|
88
88
|
reason: BargeInCancelToken["reason"],
|
|
89
89
|
): void {
|
|
90
|
-
const trip = (
|
|
91
|
-
|
|
92
|
-
).__trip;
|
|
90
|
+
const trip = (token as { __trip?: (r: BargeInCancelToken["reason"]) => void })
|
|
91
|
+
.__trip;
|
|
93
92
|
if (trip) trip(reason);
|
|
94
93
|
}
|
|
95
94
|
|