@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/dist/actions/generate-media.d.ts +59 -0
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts +23 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts +29 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts +8 -37
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +38979 -430
- package/dist/index.js.map +217 -0
- package/dist/local-inference-routes.d.ts +47 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts +21 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/routes/compat-helpers.d.ts +18 -0
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts +62 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/dist/routes/index.d.ts +20 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/dist/routes/live-diarization-route.d.ts +33 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts +4 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts +16 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts +7 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts +15 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/dist/routes/transcripts-routes.d.ts +44 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts +62 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts +62 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts +77 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts +16 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/dist/runtime/index.d.ts +15 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/dist/runtime/voice-entity-binding.d.ts +113 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/dist/services/active-model.d.ts +310 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/dist/services/assignments.d.ts +84 -0
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/dist/services/backend.d.ts +440 -0
- package/dist/services/backend.d.ts.map +1 -0
- package/dist/services/bionic-host-loader.d.ts +67 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts +34 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts +206 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts +109 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts +102 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts +142 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts +188 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts +149 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/dist/services/device-tier.d.ts +133 -0
- package/dist/services/device-tier.d.ts.map +1 -0
- package/dist/services/downloader.d.ts +94 -0
- package/dist/services/downloader.d.ts.map +1 -0
- package/dist/services/engine.d.ts +579 -0
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts +17 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/dist/services/ffi-streaming-backend.d.ts +201 -0
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/dist/services/ffi-streaming-runner.d.ts +146 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts +56 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts +72 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts +63 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts +14 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts +118 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts +16 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts +58 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts +74 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts +181 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts +181 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/dist/services/index.d.ts +31 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts +132 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts +59 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts +189 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts +346 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts +96 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts +82 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/dist/services/manifest/schema.d.ts +903 -0
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/dist/services/manifest/types.d.ts +32 -0
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts +66 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/dist/services/memory-arbiter.d.ts +348 -0
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/dist/services/memory-monitor.d.ts +128 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts +130 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts +13 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts +127 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts +6 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts +124 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts +38 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts +110 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts +9 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts +111 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/dist/services/registry.d.ts +33 -0
- package/dist/services/registry.d.ts.map +1 -0
- package/dist/services/router-handler.d.ts +92 -0
- package/dist/services/router-handler.d.ts.map +1 -0
- package/dist/services/routing-policy.d.ts +92 -0
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts +8 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts +98 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/dist/services/service.d.ts +128 -0
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts +72 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts +311 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts +33 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/dist/services/types.d.ts +19 -0
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts +34 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts +8 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts +115 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts +99 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts +47 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts +71 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/dist/services/vision/index.d.ts +95 -0
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts +73 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/dist/services/vision/types.d.ts +162 -0
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts +18 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/dist/services/vision-embedding-cache.d.ts +98 -0
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts +112 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts +199 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts +170 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/dist/services/voice/embedding.d.ts +132 -0
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts +68 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/dist/services/voice/engine-bridge.d.ts +762 -0
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier.d.ts +211 -0
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/dist/services/voice/errors.d.ts +20 -0
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/dist/services/voice/expressive-tags.d.ts +158 -0
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/dist/services/voice/ffi-bindings.d.ts +696 -0
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts +181 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/index.d.ts +96 -0
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts +82 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts +30 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts +135 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/dist/services/voice/mic-source.d.ts +136 -0
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts +109 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/dist/services/voice/partial-stabilizer.d.ts +73 -0
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts +76 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts +62 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts +151 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts +216 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts +123 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts +248 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts +40 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts +24 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts +146 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/dist/services/voice/shared-resources.d.ts +204 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts +75 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts +37 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts +83 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts +73 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/dist/services/voice/transcriber.d.ts +244 -0
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts +37 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/dist/services/voice/transcript-service.d.ts +60 -0
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/dist/services/voice/transcript-store.d.ts +64 -0
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts +183 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/dist/services/voice/types.d.ts +643 -0
- package/dist/services/voice/types.d.ts.map +1 -0
- package/dist/services/voice/vad.d.ts +283 -0
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts +241 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/dist/services/voice/voice-preset-format.d.ts +158 -0
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts +83 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts +364 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/dist/services/voice/wake-word-ggml.d.ts +100 -0
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts +255 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts +240 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts +3 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +101 -15
- package/registry-entry.json +137 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +831 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.test.ts +390 -0
- package/src/local-inference-routes.ts +1625 -0
- package/src/provider.ts +1111 -0
- package/src/routes/compat-helpers.ts +275 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.ts +61 -0
- package/src/routes/live-diarization-route.test.ts +347 -0
- package/src/routes/live-diarization-route.ts +198 -0
- package/src/routes/local-inference-asr-route.test.ts +246 -0
- package/src/routes/local-inference-asr-route.ts +166 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +775 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.test.ts +195 -0
- package/src/routes/transcripts-routes.ts +191 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
- package/src/runtime/ensure-local-inference-handler.ts +1640 -0
- package/src/runtime/index.ts +36 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
- package/src/runtime/mobile-local-inference-gate.ts +99 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
- package/src/runtime/voice-entity-binding.ts +368 -0
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.ts +1416 -0
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +106 -0
- package/src/services/assignments.ts +278 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +791 -0
- package/src/services/bionic-host-loader.test.ts +226 -0
- package/src/services/bionic-host-loader.ts +252 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.test.ts +259 -0
- package/src/services/catalog.ts +33 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.test.ts +458 -0
- package/src/services/device-tier.ts +502 -0
- package/src/services/downloader.test.ts +888 -0
- package/src/services/downloader.ts +1039 -0
- package/src/services/engine-direct-bundle.test.ts +90 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.ts +2096 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +445 -0
- package/src/services/ffi-streaming-backend.ts +418 -0
- package/src/services/ffi-streaming-runner.test.ts +220 -0
- package/src/services/ffi-streaming-runner.ts +407 -0
- package/src/services/ffi-unload-ordering.test.ts +166 -0
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.test.ts +236 -0
- package/src/services/hardware.ts +438 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.ts +715 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.ts +229 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +357 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
- package/src/services/manifest/index.ts +72 -0
- package/src/services/manifest/manifest.test.ts +791 -0
- package/src/services/manifest/schema.ts +761 -0
- package/src/services/manifest/types.ts +61 -0
- package/src/services/manifest/validator.ts +633 -0
- package/src/services/memory-arbiter.test.ts +558 -0
- package/src/services/memory-arbiter.ts +991 -0
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +232 -0
- package/src/services/memory-monitor.ts +309 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.ts +86 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +164 -0
- package/src/services/ram-budget.ts +309 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.ts +157 -0
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +426 -0
- package/src/services/routing-policy.test.ts +352 -0
- package/src/services/routing-policy.ts +367 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +750 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.ts +59 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.ts +163 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +133 -0
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +139 -0
- package/src/services/voice/audio-frame-consumer.test.ts +669 -0
- package/src/services/voice/audio-frame-consumer.ts +651 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +335 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +902 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +242 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2343 -0
- package/src/services/voice/eot-classifier-ggml.ts +569 -0
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +422 -0
- package/src/services/voice/errors.ts +34 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.test.ts +735 -0
- package/src/services/voice/ffi-bindings.ts +3387 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.ts +139 -0
- package/src/services/voice/index.ts +502 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.ts +64 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +622 -0
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.ts +504 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +343 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.test.ts +195 -0
- package/src/services/voice/transcript-service.ts +205 -0
- package/src/services/voice/transcript-store.test.ts +189 -0
- package/src/services/voice/transcript-store.ts +164 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.test.ts +498 -0
- package/src/services/voice/vad.ts +832 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.test.ts +415 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +713 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +280 -0
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +367 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.ts +319 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/src/voice-workbench.ts +71 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AudioFrameConsumer — turn the Android `audioFrame` PCM stream into live,
|
|
3
|
+
* VAD-segmented, speaker-attributed voice turns.
|
|
4
|
+
*
|
|
5
|
+
* The Android native capture path (`plugin-native-talkmode`) streams an
|
|
6
|
+
* `audioFrame` Capacitor event: base64 little-endian s16 mono PCM at 16 kHz,
|
|
7
|
+
* 20 ms per frame, plus `{ sampleRate, channels, samples, rms, timestamp,
|
|
8
|
+
* frameIndex }`. This module is the platform-agnostic consumer that subscribes
|
|
9
|
+
* to that stream (wherever the bun:ffi voice libs are present) and runs:
|
|
10
|
+
*
|
|
11
|
+
* audioFrame (base64 LE-s16) → decode → VadDetector (turn segmentation)
|
|
12
|
+
* → on speech-end: VoiceAttributionPipeline.attribute(turn PCM)
|
|
13
|
+
* → handleLiveVoiceAttribution → VOICE_TURN_OBSERVED + voiceTurnSignal
|
|
14
|
+
*
|
|
15
|
+
* Design notes:
|
|
16
|
+
* - It does NOT reinvent VAD: it drives the existing `VadDetector` state
|
|
17
|
+
* machine (`speech-start` / `speech-pause` / `speech-end`), reusing its
|
|
18
|
+
* Silero onset/offset/hangover logic. It buffers the turn's PCM between
|
|
19
|
+
* `speech-start` and `speech-end`, then attributes the whole utterance.
|
|
20
|
+
* - Every native dependency (`VadDetector`, `VoiceAttributionPipeline`, the
|
|
21
|
+
* runtime) is INJECTED, so the consumer is fully unit-testable with fakes
|
|
22
|
+
* and has no static import of bun:ffi. A `build*` factory in the smoke
|
|
23
|
+
* harness wires the real ggml-backed deps.
|
|
24
|
+
* - The decode boundary (`decodeAudioFramePcm`) is the ONLY place that knows
|
|
25
|
+
* the wire format (base64 LE-s16). Internally everything is Float32 [-1,1].
|
|
26
|
+
*
|
|
27
|
+
* What this module does NOT do: it does not transcribe (ASR text is the
|
|
28
|
+
* separate streaming-ASR path) and it does not own the WebView→agent
|
|
29
|
+
* transport — see `android/AUDIO_FRAMES.md` and `LIVE_PIPELINE.md` for the
|
|
30
|
+
* remaining device wiring.
|
|
31
|
+
*/
|
|
32
|
+
import { type EmitVoiceTurnObservedArgs, type HandleLiveVoiceAttributionOptions } from "../../runtime/voice-entity-binding.js";
|
|
33
|
+
import type { VoiceTurnSignal } from "./eot-classifier.js";
|
|
34
|
+
import { type ResidualSuppressionOptions } from "./nlms-echo-canceller.js";
|
|
35
|
+
import type { VoiceAttributionOutput, VoiceAttributionPipeline } from "./speaker/attribution-pipeline.js";
|
|
36
|
+
import type { PcmFrame, VadEvent, VoiceInputSource } from "./types.js";
|
|
37
|
+
/**
|
|
38
|
+
* The `audioFrame` event payload, mirroring `TalkModeAudioFrameEvent` in
|
|
39
|
+
* `@elizaos/capacitor-talkmode`. Re-declared structurally here so this
|
|
40
|
+
* package does not take a build dep on the Capacitor plugin.
|
|
41
|
+
*/
|
|
42
|
+
export interface AudioFrameEvent {
|
|
43
|
+
/** Base64-encoded little-endian signed 16-bit mono PCM for this frame. */
|
|
44
|
+
pcm16: string;
|
|
45
|
+
/** Sample rate of the captured PCM in Hz (e.g. 16000). */
|
|
46
|
+
sampleRate: number;
|
|
47
|
+
/** Channel count (always 1 — mono). */
|
|
48
|
+
channels: number;
|
|
49
|
+
/** Number of PCM samples in this frame (`pcm16` byte length / 2). */
|
|
50
|
+
samples: number;
|
|
51
|
+
/** RMS amplitude of this frame, normalized 0..1. */
|
|
52
|
+
rms: number;
|
|
53
|
+
/** Monotonic capture timestamp for this frame, ms. */
|
|
54
|
+
timestamp: number;
|
|
55
|
+
/** Running index of this frame since capture started (0-based). */
|
|
56
|
+
frameIndex: number;
|
|
57
|
+
}
|
|
58
|
+
/** The sample rate every voice model in this pipeline is dimensioned for. */
|
|
59
|
+
export declare const AUDIO_FRAME_PIPELINE_SAMPLE_RATE = 16000;
|
|
60
|
+
export declare class AudioFrameDecodeError extends Error {
|
|
61
|
+
constructor(message: string);
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Decode an `audioFrame` payload into a Float32 [-1, 1] window. This is the
|
|
65
|
+
* single boundary that understands the base64 LE-s16 wire format.
|
|
66
|
+
*
|
|
67
|
+
* The native capture path only ever produces 16 kHz mono; this asserts that
|
|
68
|
+
* invariant rather than resampling silently (the downstream Silero/WeSpeaker
|
|
69
|
+
* graphs are 16 kHz-only — a wrong rate is a bug to surface, not paper over).
|
|
70
|
+
*/
|
|
71
|
+
export declare function decodeAudioFramePcm(frame: AudioFrameEvent): Float32Array;
|
|
72
|
+
/**
|
|
73
|
+
* The structural slice of `VadDetector` the consumer needs. Taking the
|
|
74
|
+
* structural view (not the concrete class) keeps the consumer testable with a
|
|
75
|
+
* fake VAD and avoids pulling the optional native VAD surface into callers
|
|
76
|
+
* that only want to feed frames.
|
|
77
|
+
*/
|
|
78
|
+
export interface VadSegmenter {
|
|
79
|
+
/** True while a speech segment (incl. its pause hangover) is open. */
|
|
80
|
+
readonly inSpeech: boolean;
|
|
81
|
+
/** Subscribe to the authoritative VAD timeline. Returns an unsubscribe fn. */
|
|
82
|
+
onVadEvent(listener: (event: VadEvent) => void): () => void;
|
|
83
|
+
/** Feed one mic frame; resolves once its windows are processed. */
|
|
84
|
+
pushFrame(frame: PcmFrame): Promise<void>;
|
|
85
|
+
/** Flush trailing samples and finalize any open segment. */
|
|
86
|
+
flush(): Promise<void>;
|
|
87
|
+
/** Clear all state at a hard boundary. */
|
|
88
|
+
reset(): void;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* The structural slice of `VoiceAttributionPipeline` the consumer needs.
|
|
92
|
+
*/
|
|
93
|
+
export interface AttributionPipelineLike {
|
|
94
|
+
attribute(req: Parameters<VoiceAttributionPipeline["attribute"]>[0]): Promise<VoiceAttributionOutput>;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* The structural slice of `IAgentRuntime` the consumer needs:
|
|
98
|
+
* `handleLiveVoiceAttribution` calls `emitEvent`.
|
|
99
|
+
*/
|
|
100
|
+
export interface RuntimeEventSink {
|
|
101
|
+
emitEvent(type: unknown, payload: Record<string, unknown>): Promise<void>;
|
|
102
|
+
/**
|
|
103
|
+
* Optional host-supplied far-end (agent TTS playback) reference for the live
|
|
104
|
+
* AEC path (#9583). When a host wires this, the live diarization route threads
|
|
105
|
+
* it into the session's NLMS echo canceller instead of relying on the
|
|
106
|
+
* playback-frames ingest route. Absent on headless/core runtimes.
|
|
107
|
+
*/
|
|
108
|
+
voiceEchoReferenceProvider?: EchoReferenceProvider;
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Transcribe a finalized turn's buffered PCM to text (#8786). When injected, the
|
|
112
|
+
* consumer joins the ASR transcript into the diarization attribution so
|
|
113
|
+
* `VOICE_TURN_OBSERVED` carries the real text — previously the live audio-frame
|
|
114
|
+
* path attributed *who* spoke but always emitted `text: ""`, so name/partner
|
|
115
|
+
* extraction (`VoiceObserver.ingestTurn`) could never fire from live audio.
|
|
116
|
+
*
|
|
117
|
+
* Returns the transcript, or `null`/empty for silence / no decode. Best-effort:
|
|
118
|
+
* the consumer swallows a rejection (counted in `transcriptionErrors`) and falls
|
|
119
|
+
* back to a transcript-less turn rather than dropping the diarized turn.
|
|
120
|
+
*/
|
|
121
|
+
export type TurnTranscriber = (pcm: Float32Array, sampleRate: number) => Promise<string | null> | string | null;
|
|
122
|
+
export type SelfVoiceSimilarityResolver = (embedding: Float32Array, output: VoiceAttributionOutput) => Promise<number | null | undefined> | number | null | undefined;
|
|
123
|
+
export interface AudioFrameConsumerDeps {
|
|
124
|
+
/** Turn-segmentation VAD (drives speech-start/pause/end). */
|
|
125
|
+
vad: VadSegmenter;
|
|
126
|
+
/** Diarization + speaker-attribution pipeline. */
|
|
127
|
+
pipeline: AttributionPipelineLike;
|
|
128
|
+
/** Runtime event sink for VOICE_TURN_OBSERVED. */
|
|
129
|
+
runtime: RuntimeEventSink;
|
|
130
|
+
/**
|
|
131
|
+
* Optional ASR for the finalized turn's PCM (#8786). When present, its text
|
|
132
|
+
* rides on `VOICE_TURN_OBSERVED` so live name/entity extraction runs. When
|
|
133
|
+
* absent the path stays diarization-only (transcript `""`, as before).
|
|
134
|
+
*/
|
|
135
|
+
transcribe?: TurnTranscriber;
|
|
136
|
+
/**
|
|
137
|
+
* Optional live acoustic self-voice resolver. When wired, the consumer passes
|
|
138
|
+
* the turn's WeSpeaker embedding to the host's agent-TTS centroid matcher and
|
|
139
|
+
* forwards the resulting cosine into the ambient gate.
|
|
140
|
+
*/
|
|
141
|
+
resolveSelfVoiceSimilarity?: SelfVoiceSimilarityResolver;
|
|
142
|
+
/**
|
|
143
|
+
* Optional agent-playback (far-end) reference for acoustic echo cancellation
|
|
144
|
+
* (#9455). Given a mic frame's clock timestamp and sample count, returns the
|
|
145
|
+
* agent's TTS playback PCM for that exact window (Float32 16 kHz), or null
|
|
146
|
+
* when the agent is not playing. When wired, the consumer runs an NLMS echo
|
|
147
|
+
* canceller on every mic frame BEFORE VAD/attribution so the agent never
|
|
148
|
+
* transcribes its own TTS. Absent → no AEC (unchanged behavior). The caller
|
|
149
|
+
* owns the playback capture + the playback→mic delay calibration.
|
|
150
|
+
*/
|
|
151
|
+
echoReference?: EchoReferenceProvider;
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Returns the agent's TTS playback PCM (the far-end echo reference) aligned to a
|
|
155
|
+
* mic frame's time window, or null when the agent is silent. See #9455.
|
|
156
|
+
*/
|
|
157
|
+
export type EchoReferenceProvider = (timestampMs: number, samples: number) => Float32Array | null;
|
|
158
|
+
export interface AudioFrameConsumerConfig {
|
|
159
|
+
/** Source metadata stamped onto every attributed turn. */
|
|
160
|
+
source?: VoiceInputSource;
|
|
161
|
+
/** Gating options forwarded to `handleLiveVoiceAttribution` per turn. */
|
|
162
|
+
attributionOptions?: HandleLiveVoiceAttributionOptions;
|
|
163
|
+
/**
|
|
164
|
+
* Hard cap on a single buffered turn, in seconds. A speaker who never
|
|
165
|
+
* triggers `speech-end` (e.g. continuous noise) must not grow the buffer
|
|
166
|
+
* without bound. When exceeded the turn is force-finalized. Default 30 s.
|
|
167
|
+
*/
|
|
168
|
+
maxTurnSeconds?: number;
|
|
169
|
+
/**
|
|
170
|
+
* Pre-roll seconds of audio kept before `speech-start` so the onset of the
|
|
171
|
+
* first word (which the VAD only confirms a window or two in) is not clipped
|
|
172
|
+
* out of the attribution buffer. Default 0.3 s.
|
|
173
|
+
*/
|
|
174
|
+
preRollSeconds?: number;
|
|
175
|
+
/**
|
|
176
|
+
* Opt-in nonlinear residual-echo suppressor forwarded to the NLMS canceller
|
|
177
|
+
* (#9583/#9649). Default-off; only meaningful when an `echoReference` is wired
|
|
178
|
+
* (no canceller exists otherwise). See {@link NlmsEchoCancellerOptions.residualSuppression}.
|
|
179
|
+
*/
|
|
180
|
+
residualSuppression?: boolean | ResidualSuppressionOptions;
|
|
181
|
+
}
|
|
182
|
+
/** A finalized, attributed turn the consumer surfaces to its caller. */
|
|
183
|
+
export interface AttributedTurn {
|
|
184
|
+
turnId: string;
|
|
185
|
+
output: VoiceAttributionOutput;
|
|
186
|
+
signal: VoiceTurnSignal;
|
|
187
|
+
/** Turn span in the mic-clock (frame `timestamp`) domain. */
|
|
188
|
+
startedAtMs: number;
|
|
189
|
+
endedAtMs: number;
|
|
190
|
+
/** Total buffered turn samples that were attributed. */
|
|
191
|
+
samples: number;
|
|
192
|
+
}
|
|
193
|
+
export type AttributedTurnListener = (turn: AttributedTurn) => void;
|
|
194
|
+
/**
|
|
195
|
+
* Drives the `audioFrame` → VAD turn-segmentation → attribution → signal
|
|
196
|
+
* pipeline. One instance per capture session.
|
|
197
|
+
*
|
|
198
|
+
* Frame ingestion is serialized through the injected VAD's `pushFrame`
|
|
199
|
+
* (which itself serializes the Silero forward pass), so `onAudioFrame` is
|
|
200
|
+
* safe to fire-and-forget from a Capacitor event listener; turns surface in
|
|
201
|
+
* order via `onTurn`.
|
|
202
|
+
*/
|
|
203
|
+
export declare class AudioFrameConsumer {
|
|
204
|
+
private readonly vad;
|
|
205
|
+
private readonly pipeline;
|
|
206
|
+
private readonly runtime;
|
|
207
|
+
private readonly transcribe;
|
|
208
|
+
private readonly resolveSelfVoiceSimilarity;
|
|
209
|
+
private readonly echoReference;
|
|
210
|
+
/** NLMS echo canceller, instantiated only when an `echoReference` is wired. */
|
|
211
|
+
private readonly echoCanceller;
|
|
212
|
+
private readonly source;
|
|
213
|
+
private readonly attributionOptions;
|
|
214
|
+
private readonly maxTurnSamples;
|
|
215
|
+
private readonly preRollSamples;
|
|
216
|
+
private readonly unsubscribeVad;
|
|
217
|
+
private readonly turnListeners;
|
|
218
|
+
/** Float32 chunks of the in-flight turn, oldest first. */
|
|
219
|
+
private turnChunks;
|
|
220
|
+
private turnSamples;
|
|
221
|
+
/** Rolling pre-roll ring (frames captured before speech-start). */
|
|
222
|
+
private preRoll;
|
|
223
|
+
private preRollSampleCount;
|
|
224
|
+
private capturing;
|
|
225
|
+
private turnSeq;
|
|
226
|
+
private turnStartedAtMs;
|
|
227
|
+
private lastFrameEndMs;
|
|
228
|
+
/** Serialized attribution chain so turns finalize one at a time, in order. */
|
|
229
|
+
private attributing;
|
|
230
|
+
private closed;
|
|
231
|
+
/** Count of frames that failed to decode (surfaced via getters, not thrown). */
|
|
232
|
+
droppedFrames: number;
|
|
233
|
+
/** Count of turns whose ASR transcribe threw (degraded to a transcript-less
|
|
234
|
+
* turn rather than dropping the diarized turn). */
|
|
235
|
+
transcriptionErrors: number;
|
|
236
|
+
/** Count of mic frames the echo canceller actually processed (i.e. the agent
|
|
237
|
+
* was playing). Frames skipped while the agent is silent do not count, so
|
|
238
|
+
* this also measures how often AEC took the cheap passthrough path. */
|
|
239
|
+
echoFramesCancelled: number;
|
|
240
|
+
constructor(deps: AudioFrameConsumerDeps, config?: AudioFrameConsumerConfig);
|
|
241
|
+
/** True while a turn is being buffered (between speech-start and speech-end). */
|
|
242
|
+
get inTurn(): boolean;
|
|
243
|
+
/** Subscribe to finalized attributed turns. Returns an unsubscribe fn. */
|
|
244
|
+
onTurn(listener: AttributedTurnListener): () => void;
|
|
245
|
+
/**
|
|
246
|
+
* Feed one decoded-or-raw `audioFrame`. Accepts either the wire-format
|
|
247
|
+
* `AudioFrameEvent` (decoded here) or a pre-decoded Float32 window with the
|
|
248
|
+
* frame's mic-clock timestamp. Resolves once the frame's VAD windows are
|
|
249
|
+
* processed.
|
|
250
|
+
*/
|
|
251
|
+
onAudioFrame(frame: AudioFrameEvent): Promise<void>;
|
|
252
|
+
/**
|
|
253
|
+
* Feed a pre-decoded Float32 16 kHz window with its mic-clock timestamp
|
|
254
|
+
* (ms). The decode boundary already ran; used by transports that decode
|
|
255
|
+
* upstream and by the host harness.
|
|
256
|
+
*/
|
|
257
|
+
pushDecodedFrame(pcm: Float32Array, timestampMs: number): Promise<void>;
|
|
258
|
+
/**
|
|
259
|
+
* Run the echo canceller on one mic frame when (and only when) the agent is
|
|
260
|
+
* playing. The reference provider returns null while the agent is silent, in
|
|
261
|
+
* which case the mic frame is passed through verbatim and the FIR
|
|
262
|
+
* `process()` loop is not invoked. The canceller still observes the silent
|
|
263
|
+
* far-end so stale playback history is cleared before playback resumes.
|
|
264
|
+
* Returns the echo-cancelled (or untouched) mic frame.
|
|
265
|
+
*/
|
|
266
|
+
private cancelEcho;
|
|
267
|
+
/**
|
|
268
|
+
* Flush the VAD (finalize any open segment) and await all pending
|
|
269
|
+
* attribution. Call at end-of-capture so a trailing utterance is not lost.
|
|
270
|
+
*/
|
|
271
|
+
flush(): Promise<void>;
|
|
272
|
+
/** Release listeners and clear all buffers. Idempotent. */
|
|
273
|
+
close(): Promise<void>;
|
|
274
|
+
private onVadEvent;
|
|
275
|
+
private beginTurn;
|
|
276
|
+
private finalizeTurn;
|
|
277
|
+
private attributeTurn;
|
|
278
|
+
/**
|
|
279
|
+
* Merge the per-turn ASR transcript into the attribution options. Returns the
|
|
280
|
+
* base options unchanged when no transcriber is wired or the decode yields no
|
|
281
|
+
* text; a thrown decode is swallowed (counted in `transcriptionErrors`) so a
|
|
282
|
+
* diarized turn is never dropped over an ASR failure.
|
|
283
|
+
*/
|
|
284
|
+
private resolveTurnOptions;
|
|
285
|
+
private appendTurnChunk;
|
|
286
|
+
private appendPreRoll;
|
|
287
|
+
}
|
|
288
|
+
/**
|
|
289
|
+
* Re-export of the producer's emit args, so a consumer caller can construct a
|
|
290
|
+
* VOICE_TURN_OBSERVED payload directly when wiring a custom transport without
|
|
291
|
+
* importing the runtime subpath twice.
|
|
292
|
+
*/
|
|
293
|
+
export type { EmitVoiceTurnObservedArgs };
|
|
294
|
+
//# sourceMappingURL=audio-frame-consumer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-frame-consumer.d.ts","sourceRoot":"","sources":["../../../src/services/voice/audio-frame-consumer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,EACN,KAAK,yBAAyB,EAC9B,KAAK,iCAAiC,EAEtC,MAAM,uCAAuC,CAAC;AAC/C,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,EAEN,KAAK,0BAA0B,EAC/B,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EACX,sBAAsB,EACtB,wBAAwB,EACxB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAMvE;;;;GAIG;AACH,MAAM,WAAW,eAAe;IAC/B,0EAA0E;IAC1E,KAAK,EAAE,MAAM,CAAC;IACd,0DAA0D;IAC1D,UAAU,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,qEAAqE;IACrE,OAAO,EAAE,MAAM,CAAC;IAChB,oDAAoD;IACpD,GAAG,EAAE,MAAM,CAAC;IACZ,sDAAsD;IACtD,SAAS,EAAE,MAAM,CAAC;IAClB,mEAAmE;IACnE,UAAU,EAAE,MAAM,CAAC;CACnB;AAED,6EAA6E;AAC7E,eAAO,MAAM,gCAAgC,QAAS,CAAC;AAEvD,qBAAa,qBAAsB,SAAQ,KAAK;gBACnC,OAAO,EAAE,MAAM;CAI3B;AAED;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,eAAe,GAAG,YAAY,CA0BxE;AA8BD;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B,sEAAsE;IACtE,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC;IAC3B,8EAA8E;IAC9E,UAAU,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,GAAG,MAAM,IAAI,CAAC;IAC5D,mEAAmE;IACnE,SAAS,CAAC,KAAK,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC1C,4DAA4D;IAC5D,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,0CAA0C;IAC1C,KAAK,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACvC,SAAS,CACR,GAAG,EAAE,UAAU,CAAC,wBAAwB,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,GACvD,OAAO,CAAC,sBAAsB,CAAC,CAAC;CACnC;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAChC,SAAS,CAAC,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC1E;;;;;OAKG;IACH,0BAA0B,CAAC,EAAE,qBAAqB,CAAC;CACnD;AAED;;;;;;;;;;GAUG;AACH,MAAM,MAAM,eAAe,GAAG,CAC7B,GAAG,EAAE,YAAY,EACjB,UAAU,EAAE,MAAM,KACd,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,MAAM,GAAG,IAAI,CAAC;AAE5C,MAAM,MAAM,2BAA2B,GAAG,CACzC,SAAS,EAAE,YAAY,EACvB,MAAM,EAAE,sBAAsB,KAC1B,OAAO,CAAC,MAAM,GAAG,IAAI,GAAG,SAAS,CAAC,GAAG,MAAM,GAAG,IAAI,GAAG,SAAS,CAAC;AAMpE,MAAM,WAAW,sBAAsB;IACtC,6DAA6D;IAC7D,GAAG,EAAE,YAAY,CAAC;IAClB,kDAAkD;IAClD,QAAQ,EAAE,uBAAuB,CAAC;IAClC,kDAAkD;IAClD,OAAO,EAAE,gBAAgB,CAAC;IAC1B;;;;OAIG;IACH,UAAU,CAAC,EAAE,eAAe,CAAC;IAC7B;;;;OAIG;IACH,0BAA0B,CAAC,EAAE,2BAA2B,CAAC;IACzD;;;;;;;;OAQG;IACH,aAAa,CAAC,EAAE,qBAAqB,CAAC;CACtC;AAED;;;GAGG;AACH,MAAM,MAAM,qBAAqB,GAAG,CACnC,WAAW,EAAE,MAAM,EACnB,OAAO,EAAE,MAAM,KACX,YAAY,GAAG,IAAI,CAAC;AAEzB,MAAM,WAAW,wBAAwB;IACxC,0DAA0D;IAC1D,MAAM,CAAC,EAAE,gBAAgB,CAAC;IAC1B,yEAAyE;IACzE,kBAAkB,CAAC,EAAE,iCAAiC,CAAC;IACvD;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;OAIG;IACH,mBAAmB,CAAC,EAAE,OAAO,GAAG,0BAA0B,CAAC;CAC3D;AAED,wEAAwE;AACxE,MAAM,WAAW,cAAc;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,sBAAsB,CAAC;IAC/B,MAAM,EAAE,eAAe,CAAC;IACxB,6DAA6D;IAC7D,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,wDAAwD;IACxD,OAAO,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,MAAM,sBAAsB,GAAG,CAAC,IAAI,EAAE,cAAc,KAAK,IAAI,CAAC;AAEpE;;;;;;;;GAQG;AACH,qBAAa,kBAAkB;IAC9B,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAe;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA0B;IACnD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAmB;IAC3C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAyB;IACpD,OAAO,CAAC,QAAQ,CAAC,0BAA0B,CAAqC;IAChF,OAAO,CAAC,QAAQ,CAAC,aAAa,CAA+B;IAC7D,+EAA+E;IAC/E,OAAO,CAAC,QAAQ,CAAC,aAAa,CAA2B;IACzD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA+B;IACtD,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAoC;IACvE,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAa;IAC5C,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAqC;IAEnE,0DAA0D;IAC1D,OAAO,CAAC,UAAU,CAAsB;IACxC,OAAO,CAAC,WAAW,CAAK;IACxB,mEAAmE;IACnE,OAAO,CAAC,OAAO,CAAsB;IACrC,OAAO,CAAC,kBAAkB,CAAK;IAC/B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,OAAO,CAAK;IACpB,OAAO,CAAC,eAAe,CAAK;IAC5B,OAAO,CAAC,cAAc,CAAK;IAC3B,8EAA8E;IAC9E,OAAO,CAAC,WAAW,CAAoC;IACvD,OAAO,CAAC,MAAM,CAAS;IAEvB,gFAAgF;IAChF,aAAa,SAAK;IAElB;wDACoD;IACpD,mBAAmB,SAAK;IAExB;;4EAEwE;IACxE,mBAAmB,SAAK;gBAGvB,IAAI,EAAE,sBAAsB,EAC5B,MAAM,GAAE,wBAA6B;IA+BtC,iFAAiF;IACjF,IAAI,MAAM,IAAI,OAAO,CAEpB;IAED,0EAA0E;IAC1E,MAAM,CAAC,QAAQ,EAAE,sBAAsB,GAAG,MAAM,IAAI;IAKpD;;;;;OAKG;IACG,YAAY,CAAC,KAAK,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBzD;;;;OAIG;IACG,gBAAgB,CACrB,GAAG,EAAE,YAAY,EACjB,WAAW,EAAE,MAAM,GACjB,OAAO,CAAC,IAAI,CAAC;IAqBhB;;;;;;;OAOG;IACH,OAAO,CAAC,UAAU;IAWlB;;;OAGG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAM5B,2DAA2D;IACrD,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAc5B,OAAO,CAAC,UAAU;IAgBlB,OAAO,CAAC,SAAS;IAWjB,OAAO,CAAC,YAAY;YAkBN,aAAa;IAkC3B;;;;;OAKG;YACW,kBAAkB;IAkChC,OAAO,CAAC,eAAe;IAUvB,OAAO,CAAC,aAAa;CAarB;AAgBD;;;;GAIG;AACH,YAAY,EAAE,yBAAyB,EAAE,CAAC"}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Barge-in controller — distinguishes a blip from real speech while the
|
|
3
|
+
* agent is talking, and turns that into TTS pause/resume/hard-stop plus an
|
|
4
|
+
* LLM-generation abort.
|
|
5
|
+
*
|
|
6
|
+
* Inputs:
|
|
7
|
+
* - the `VadEvent` stream from `VadDetector` (subscribe via `bindVad()`),
|
|
8
|
+
* - W2's ASR word-confirm callback (`onWordsDetected()` — the
|
|
9
|
+
* `WordsDetectedSink` contract).
|
|
10
|
+
*
|
|
11
|
+
* Behaviour while the agent is speaking (`agentSpeaking === true`):
|
|
12
|
+
* - `speech-active` → emit `pause-tts`. (Provisional — could still be a
|
|
13
|
+
* blip; the energy-duration heuristic guesses, ASR
|
|
14
|
+
* confirms.)
|
|
15
|
+
* - `blip` (or a short `speech-end` before any words)
|
|
16
|
+
* → emit `resume-tts`. The agent keeps talking.
|
|
17
|
+
* - `onWordsDetected({wordCount ≥ 1})` → emit `hard-stop` with a fresh
|
|
18
|
+
* `BargeInCancelToken`. Hard-stop means: cancel TTS
|
|
19
|
+
* *and* abort the in-flight LLM / MTP drafter
|
|
20
|
+
* generation. The engine layer (W9) threads
|
|
21
|
+
* `token.signal` into `dispatcher.generate` and polls
|
|
22
|
+
* `token.cancelled` at kernel boundaries.
|
|
23
|
+
* - `speech-end` with a long-enough segment but no ASR words yet →
|
|
24
|
+
* treated as words-pending: emit `hard-stop` only
|
|
25
|
+
* once ASR confirms; if ASR never confirms within
|
|
26
|
+
* `wordsGraceMs`, resume TTS (it was non-speech the
|
|
27
|
+
* Silero VAD let through).
|
|
28
|
+
*
|
|
29
|
+
* Legacy API (still used by `VoiceScheduler` and `EngineVoiceBridge`):
|
|
30
|
+
* `attach({onCancel})`, `onMicActive()`, `cancelSignal()`, `reset()` — a
|
|
31
|
+
* thin "everything cancelled" path. `onMicActive()` is now equivalent to
|
|
32
|
+
* `hardStop("manual")`.
|
|
33
|
+
*
|
|
34
|
+
* No fallback sludge: a `hard-stop` always carries a real `AbortSignal`; the
|
|
35
|
+
* controller never swallows a VAD event.
|
|
36
|
+
*/
|
|
37
|
+
import type { BargeInCancelToken, BargeInSignalListener, VadEventListener, WordsDetectedSink } from "./types";
|
|
38
|
+
/** Minimal structural view of `VadDetector` — avoids a module dependency on
|
|
39
|
+
* `vad.ts` (which pulls in the fused `libelizainference` VAD FFI surface). */
|
|
40
|
+
interface VadEventSource {
|
|
41
|
+
onVadEvent(listener: VadEventListener): () => void;
|
|
42
|
+
}
|
|
43
|
+
export interface BargeInListener {
|
|
44
|
+
onCancel(): void;
|
|
45
|
+
}
|
|
46
|
+
export interface CancelSignal {
|
|
47
|
+
cancelled: boolean;
|
|
48
|
+
}
|
|
49
|
+
export interface BargeInControllerConfig {
|
|
50
|
+
/**
|
|
51
|
+
* After a `speech-active` (TTS paused) with no ASR word confirmation,
|
|
52
|
+
* resume TTS if ASR has not reported ≥1 word within this window. Default
|
|
53
|
+
* 600 ms. Long enough for a streaming ASR partial; short enough that a
|
|
54
|
+
* cough doesn't keep the agent muted.
|
|
55
|
+
*/
|
|
56
|
+
wordsGraceMs?: number;
|
|
57
|
+
}
|
|
58
|
+
export declare class BargeInController implements WordsDetectedSink {
|
|
59
|
+
private readonly listeners;
|
|
60
|
+
private readonly signalListeners;
|
|
61
|
+
private readonly wordsGraceMs;
|
|
62
|
+
/** Legacy single-shot cancel flag, reset by `reset()`. */
|
|
63
|
+
private signal;
|
|
64
|
+
/** True while the agent's TTS is playing. The turn controller / scheduler
|
|
65
|
+
* flips this via `setAgentSpeaking()`. Barge-in logic only acts while
|
|
66
|
+
* this is true. */
|
|
67
|
+
private agentSpeaking;
|
|
68
|
+
/** True while we have emitted `pause-tts` and are waiting on the
|
|
69
|
+
* blip-vs-words decision. */
|
|
70
|
+
private awaitingWordConfirm;
|
|
71
|
+
private wordConfirmDeadlineTimer;
|
|
72
|
+
private wordConfirmExpiresAtMs;
|
|
73
|
+
private lastEventTimestampMs;
|
|
74
|
+
private vadUnsub;
|
|
75
|
+
constructor(config?: BargeInControllerConfig);
|
|
76
|
+
/** Subscribe to `pause-tts` / `resume-tts` / `hard-stop`. */
|
|
77
|
+
onSignal(listener: BargeInSignalListener): () => void;
|
|
78
|
+
/** Wire this controller to a `VadDetector`. Returns an unsubscribe fn. */
|
|
79
|
+
bindVad(detector: VadEventSource): () => void;
|
|
80
|
+
unbindVad(): void;
|
|
81
|
+
/** The turn controller flips this when TTS starts/stops playing. */
|
|
82
|
+
setAgentSpeaking(speaking: boolean): void;
|
|
83
|
+
get isAgentSpeaking(): boolean;
|
|
84
|
+
private onVadEvent;
|
|
85
|
+
onWordsDetected(args: {
|
|
86
|
+
wordCount: number;
|
|
87
|
+
partialText: string;
|
|
88
|
+
timestampMs: number;
|
|
89
|
+
}): void;
|
|
90
|
+
/**
|
|
91
|
+
* Cancel TTS + abort the in-flight LLM / drafter generation. Returns the
|
|
92
|
+
* `BargeInCancelToken` whose `signal` the engine layer aborts on. Idempotent
|
|
93
|
+
* within a single barge-in episode — calling it again returns the same
|
|
94
|
+
* token until `reset()`.
|
|
95
|
+
*/
|
|
96
|
+
hardStop(reason?: NonNullable<BargeInCancelToken["reason"]>, timestampMs?: number): BargeInCancelToken;
|
|
97
|
+
private activeToken;
|
|
98
|
+
/** The cancel token for the current barge-in episode (null until a
|
|
99
|
+
* `hard-stop`). The engine threads `.signal` into generation. */
|
|
100
|
+
currentCancelToken(): BargeInCancelToken | null;
|
|
101
|
+
/** @deprecated Use `currentCancelToken()`; kept for `VoiceScheduler`. */
|
|
102
|
+
cancelSignal(): CancelSignal;
|
|
103
|
+
attach(listener: BargeInListener): () => void;
|
|
104
|
+
/** @deprecated Equivalent to `hardStop("manual")`; kept for the bridge. */
|
|
105
|
+
onMicActive(): void;
|
|
106
|
+
reset(): void;
|
|
107
|
+
private emitSignal;
|
|
108
|
+
private armWordConfirmDeadline;
|
|
109
|
+
private clearWordConfirm;
|
|
110
|
+
}
|
|
111
|
+
export {};
|
|
112
|
+
//# sourceMappingURL=barge-in.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"barge-in.d.ts","sourceRoot":"","sources":["../../../src/services/voice/barge-in.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmCG;AAEH,OAAO,KAAK,EACX,kBAAkB,EAElB,qBAAqB,EAErB,gBAAgB,EAChB,iBAAiB,EACjB,MAAM,SAAS,CAAC;AAEjB;+EAC+E;AAC/E,UAAU,cAAc;IACvB,UAAU,CAAC,QAAQ,EAAE,gBAAgB,GAAG,MAAM,IAAI,CAAC;CACnD;AAID,MAAM,WAAW,eAAe;IAC/B,QAAQ,IAAI,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC5B,SAAS,EAAE,OAAO,CAAC;CACnB;AAkCD,MAAM,WAAW,uBAAuB;IACvC;;;;;OAKG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,iBAAkB,YAAW,iBAAiB;IAC1D,OAAO,CAAC,QAAQ,CAAC,SAAS,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAoC;IACpE,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;IAEtC,0DAA0D;IAC1D,OAAO,CAAC,MAAM,CAAsC;IAEpD;;wBAEoB;IACpB,OAAO,CAAC,aAAa,CAAS;IAC9B;kCAC8B;IAC9B,OAAO,CAAC,mBAAmB,CAAS;IACpC,OAAO,CAAC,wBAAwB,CAA8C;IAC9E,OAAO,CAAC,sBAAsB,CAAuB;IACrD,OAAO,CAAC,oBAAoB,CAAK;IACjC,OAAO,CAAC,QAAQ,CAA6B;gBAEjC,MAAM,GAAE,uBAA4B;IAMhD,6DAA6D;IAC7D,QAAQ,CAAC,QAAQ,EAAE,qBAAqB,GAAG,MAAM,IAAI;IAKrD,0EAA0E;IAC1E,OAAO,CAAC,QAAQ,EAAE,cAAc,GAAG,MAAM,IAAI;IAM7C,SAAS,IAAI,IAAI;IAOjB,oEAAoE;IACpE,gBAAgB,CAAC,QAAQ,EAAE,OAAO,GAAG,IAAI;IAUzC,IAAI,eAAe,IAAI,OAAO,CAE7B;IAID,OAAO,CAAC,UAAU;IA8ClB,eAAe,CAAC,IAAI,EAAE;QACrB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;KACpB,GAAG,IAAI;IAiBR;;;;;OAKG;IACH,QAAQ,CACP,MAAM,GAAE,WAAW,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAY,EAC5D,WAAW,GAAE,MAAgD,GAC3D,kBAAkB;IAkBrB,OAAO,CAAC,WAAW,CAAmC;IAEtD;sEACkE;IAClE,kBAAkB,IAAI,kBAAkB,GAAG,IAAI;IAM/C,yEAAyE;IACzE,YAAY,IAAI,YAAY;IAI5B,MAAM,CAAC,QAAQ,EAAE,eAAe,GAAG,MAAM,IAAI;IAK7C,2EAA2E;IAC3E,WAAW,IAAI,IAAI;IAInB,KAAK,IAAI,IAAI;IASb,OAAO,CAAC,UAAU;IAIlB,OAAO,CAAC,sBAAsB;IA0B9B,OAAO,CAAC,gBAAgB;CASxB"}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice cancellation coordinator — Wave 3 W3-9.
|
|
3
|
+
*
|
|
4
|
+
* Single brain that owns one `VoiceCancellationToken` per active voice turn
|
|
5
|
+
* and binds every cancellation source into it:
|
|
6
|
+
*
|
|
7
|
+
* 1. VAD start-of-speech while the agent is speaking (barge-in).
|
|
8
|
+
* 2. `BargeInController.hardStop` (ASR-confirmed barge-in words).
|
|
9
|
+
* 3. Turn-detector EOT revocation (user resumed mid-tentative-pause).
|
|
10
|
+
* 4. Runtime turn abort (`TurnControllerRegistry` "aborted" event).
|
|
11
|
+
*
|
|
12
|
+
* On any cancel, it fans out to:
|
|
13
|
+
*
|
|
14
|
+
* 1. The voice token's `AbortSignal` — every fetch / model call wired to
|
|
15
|
+
* `signal` aborts at the next yield point.
|
|
16
|
+
* 2. `runtime.turnControllers.abortTurn(roomId, reason)` — the runtime's
|
|
17
|
+
* planner-loop / action handlers see the abort within one tick
|
|
18
|
+
* (between model calls / between actions / between provider calls).
|
|
19
|
+
* 3. Optional `slotAbort(slotId)` — invokes the registered LM
|
|
20
|
+
* slot-abort callback (typically `MtpLlamaServer.abortSlot` which
|
|
21
|
+
* either aborts in-flight HTTP fetches against that slot or, on a
|
|
22
|
+
* capable fork, calls the slot-cancel REST route).
|
|
23
|
+
* 4. Optional `ttsStop()` — invokes the registered TTS-stop callback
|
|
24
|
+
* (typically `EngineVoiceBridge.triggerBargeIn` which drains the
|
|
25
|
+
* audio sink + cancels the FFI/HTTP synthesis path).
|
|
26
|
+
*
|
|
27
|
+
* The coordinator is intentionally a plain class — no engine coupling. The
|
|
28
|
+
* engine bridge (and tests) construct one with the structural runtime + the
|
|
29
|
+
* appropriate callbacks.
|
|
30
|
+
*/
|
|
31
|
+
import { type VoiceCancellationReason, VoiceCancellationRegistry, type VoiceCancellationToken } from "@elizaos/shared";
|
|
32
|
+
/**
|
|
33
|
+
* Minimum runtime surface this coordinator needs. Matches a subset of
|
|
34
|
+
* `AgentRuntime.turnControllers`. Structural so unit tests can pass a fake.
|
|
35
|
+
*/
|
|
36
|
+
export interface CoordinatorRuntime {
|
|
37
|
+
turnControllers: {
|
|
38
|
+
abortTurn(roomId: string, reason: string): boolean;
|
|
39
|
+
onEvent(listener: (event: {
|
|
40
|
+
type: "started" | "completed" | "errored" | "aborted" | "aborted-cleanup";
|
|
41
|
+
roomId: string;
|
|
42
|
+
reason?: string;
|
|
43
|
+
}) => void): () => void;
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
export interface VoiceCancellationCoordinatorOptions {
|
|
47
|
+
/** The runtime to bind to. */
|
|
48
|
+
runtime: CoordinatorRuntime;
|
|
49
|
+
/**
|
|
50
|
+
* Abort the inference server slot. Wired to `MtpLlamaServer.abortSlot`
|
|
51
|
+
* in production. Async — the coordinator does NOT await it (the slot
|
|
52
|
+
* abort path is best-effort; the AbortSignal closure on the fetch is the
|
|
53
|
+
* authoritative cancel).
|
|
54
|
+
*/
|
|
55
|
+
slotAbort?: (slotId: number, reason: VoiceCancellationReason) => void;
|
|
56
|
+
/**
|
|
57
|
+
* Hard-stop the TTS pipeline (audio sink drain + FFI/HTTP synthesis
|
|
58
|
+
* cancel). Wired to `EngineVoiceBridge.triggerBargeIn`. Synchronous —
|
|
59
|
+
* the audio sink drain MUST happen within one tick of `abort()`.
|
|
60
|
+
*/
|
|
61
|
+
ttsStop?: (reason: VoiceCancellationReason) => void;
|
|
62
|
+
/**
|
|
63
|
+
* Optional pre-existing registry. Tests inject one to inspect token
|
|
64
|
+
* lifecycle directly. Production creates a fresh registry per session.
|
|
65
|
+
*/
|
|
66
|
+
registry?: VoiceCancellationRegistry;
|
|
67
|
+
}
|
|
68
|
+
export declare class VoiceCancellationCoordinator {
|
|
69
|
+
private readonly runtime;
|
|
70
|
+
private readonly slotAbort;
|
|
71
|
+
private readonly ttsStop;
|
|
72
|
+
private readonly registry;
|
|
73
|
+
/** Active turns keyed by roomId. One per room. */
|
|
74
|
+
private readonly armed;
|
|
75
|
+
constructor(opts: VoiceCancellationCoordinatorOptions);
|
|
76
|
+
/**
|
|
77
|
+
* Begin a new voice turn for `roomId`. If a previous turn was active,
|
|
78
|
+
* it is aborted with `"external"` (the regular replace-on-arm semantics
|
|
79
|
+
* inherited from `VoiceCancellationRegistry`).
|
|
80
|
+
*/
|
|
81
|
+
armTurn(args: {
|
|
82
|
+
roomId: string;
|
|
83
|
+
runId: string;
|
|
84
|
+
slot?: number;
|
|
85
|
+
}): VoiceCancellationToken;
|
|
86
|
+
/** Fetch the current voice token for `roomId`, or null. */
|
|
87
|
+
current(roomId: string): VoiceCancellationToken | null;
|
|
88
|
+
/** Snapshot of armed room ids. */
|
|
89
|
+
armedRoomIds(): string[];
|
|
90
|
+
/**
|
|
91
|
+
* Abort the active turn for `roomId` with the given reason. Idempotent.
|
|
92
|
+
* Returns true when a live token was aborted.
|
|
93
|
+
*/
|
|
94
|
+
abort(roomId: string, reason: VoiceCancellationReason): boolean;
|
|
95
|
+
/**
|
|
96
|
+
* Trip the active token because VAD reported start-of-speech while the
|
|
97
|
+
* agent was speaking. Equivalent to `abort(roomId, "barge-in")` but
|
|
98
|
+
* keeps the call-site grep-able as the canonical barge-in entry point.
|
|
99
|
+
*/
|
|
100
|
+
bargeIn(roomId: string): boolean;
|
|
101
|
+
/**
|
|
102
|
+
* Trip the active token because the turn detector revoked the previous
|
|
103
|
+
* EOT decision (user resumed within the rollback window).
|
|
104
|
+
*/
|
|
105
|
+
revokeEot(roomId: string): boolean;
|
|
106
|
+
/**
|
|
107
|
+
* Wire a `BargeInController.onSignal` listener into this coordinator.
|
|
108
|
+
* The controller emits `hard-stop` when ASR confirms barge-in words;
|
|
109
|
+
* this glue translates it into `coordinator.bargeIn(roomId)` so the
|
|
110
|
+
* canonical token (and every downstream consumer) sees the abort.
|
|
111
|
+
*
|
|
112
|
+
* Returns the unsubscribe function from `onSignal`. Production callers
|
|
113
|
+
* (the engine bridge) call this once per `BargeInController` per
|
|
114
|
+
* room and keep the handle until session teardown.
|
|
115
|
+
*/
|
|
116
|
+
bindBargeInController(roomId: string, controller: {
|
|
117
|
+
onSignal(listener: (signal: {
|
|
118
|
+
type: string;
|
|
119
|
+
}) => void): () => void;
|
|
120
|
+
}): () => void;
|
|
121
|
+
/**
|
|
122
|
+
* Tear down. Cancels every armed turn and unsubscribes from the
|
|
123
|
+
* runtime. Safe to call multiple times.
|
|
124
|
+
*/
|
|
125
|
+
dispose(): void;
|
|
126
|
+
}
|
|
127
|
+
//# sourceMappingURL=cancellation-coordinator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cancellation-coordinator.d.ts","sourceRoot":"","sources":["../../../src/services/voice/cancellation-coordinator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAEH,OAAO,EACN,KAAK,uBAAuB,EAC5B,yBAAyB,EACzB,KAAK,sBAAsB,EAC3B,MAAM,iBAAiB,CAAC;AAEzB;;;GAGG;AACH,MAAM,WAAW,kBAAkB;IAClC,eAAe,EAAE;QAChB,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC;QACnD,OAAO,CACN,QAAQ,EAAE,CAAC,KAAK,EAAE;YACjB,IAAI,EACD,SAAS,GACT,WAAW,GACX,SAAS,GACT,SAAS,GACT,iBAAiB,CAAC;YACrB,MAAM,EAAE,MAAM,CAAC;YACf,MAAM,CAAC,EAAE,MAAM,CAAC;SAChB,KAAK,IAAI,GACR,MAAM,IAAI,CAAC;KACd,CAAC;CACF;AAED,MAAM,WAAW,mCAAmC;IACnD,8BAA8B;IAC9B,OAAO,EAAE,kBAAkB,CAAC;IAC5B;;;;;OAKG;IACH,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,uBAAuB,KAAK,IAAI,CAAC;IACtE;;;;OAIG;IACH,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,uBAAuB,KAAK,IAAI,CAAC;IACpD;;;OAGG;IACH,QAAQ,CAAC,EAAE,yBAAyB,CAAC;CACrC;AAgBD,qBAAa,4BAA4B;IACxC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAqB;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmD;IAC7E,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAiD;IACzE,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA4B;IACrD,kDAAkD;IAClD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAgC;gBAE1C,IAAI,EAAE,mCAAmC;IAOrD;;;;OAIG;IACH,OAAO,CAAC,IAAI,EAAE;QACb,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,CAAC,EAAE,MAAM,CAAC;KACd,GAAG,sBAAsB;IA8D1B,2DAA2D;IAC3D,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,sBAAsB,GAAG,IAAI;IAItD,kCAAkC;IAClC,YAAY,IAAI,MAAM,EAAE;IAIxB;;;OAGG;IACH,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,uBAAuB,GAAG,OAAO;IAI/D;;;;OAIG;IACH,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO;IAIhC;;;OAGG;IACH,SAAS,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO;IAIlC;;;;;;;;;OASG;IACH,qBAAqB,CACpB,MAAM,EAAE,MAAM,EACd,UAAU,EAAE;QACX,QAAQ,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE;YAAE,IAAI,EAAE,MAAM,CAAA;SAAE,KAAK,IAAI,GAAG,MAAM,IAAI,CAAC;KACnE,GACC,MAAM,IAAI;IAQb;;;OAGG;IACH,OAAO,IAAI,IAAI;CASf"}
|