@elizaos/plugin-local-inference 2.0.3-beta.2 → 2.0.3-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -10
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +39647 -0
- package/dist/index.js.map +217 -0
- package/{src → dist}/local-inference-routes.d.ts +9 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts.map +1 -0
- package/{src → dist}/routes/compat-helpers.d.ts +1 -1
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/{src → dist}/routes/index.d.ts +1 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/{src → dist}/routes/live-diarization-route.d.ts +7 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/{src → dist}/routes/transcripts-routes.d.ts +8 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/{src → dist}/runtime/ensure-local-inference-handler.d.ts +8 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/{src → dist}/runtime/index.d.ts +1 -1
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/{src → dist}/runtime/voice-entity-binding.d.ts +10 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/{src → dist}/services/active-model.d.ts +28 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/{src → dist}/services/assignments.d.ts +16 -3
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/{src → dist}/services/backend.d.ts +110 -16
- package/dist/services/backend.d.ts.map +1 -0
- package/{src → dist}/services/bionic-host-loader.d.ts +21 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/{src → dist}/services/desktop-fused-ffi-backend-runtime.d.ts +22 -6
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/{src → dist}/services/device-tier.d.ts +19 -1
- package/dist/services/device-tier.d.ts.map +1 -0
- package/{src → dist}/services/downloader.d.ts +16 -4
- package/dist/services/downloader.d.ts.map +1 -0
- package/{src → dist}/services/engine.d.ts +43 -4
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-backend.d.ts +28 -7
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-runner.d.ts +24 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/{src → dist}/services/imagegen/sd-cpp.d.ts +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/{src → dist}/services/index.d.ts +3 -1
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/{src → dist}/services/manifest/schema.d.ts +196 -6
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/{src → dist}/services/manifest/types.d.ts +3 -1
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/{src → dist}/services/memory-arbiter.d.ts +33 -3
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/{src → dist}/services/memory-monitor.d.ts +6 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/{src → dist}/services/registry.d.ts +11 -13
- package/dist/services/registry.d.ts.map +1 -0
- package/{src → dist}/services/router-handler.d.ts +2 -2
- package/dist/services/router-handler.d.ts.map +1 -0
- package/{src → dist}/services/routing-policy.d.ts +32 -9
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/{src → dist}/services/service.d.ts +1 -1
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/{src → dist}/services/types.d.ts +1 -1
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/{src → dist}/services/vision/index.d.ts +1 -1
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/{src → dist}/services/vision/types.d.ts +13 -4
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/{src → dist}/services/vision-embedding-cache.d.ts +1 -1
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/audio-frame-consumer.d.ts +82 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/{src → dist}/services/voice/eliza1-eot-scorer.d.ts +8 -8
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/{src → dist}/services/voice/embedding.d.ts +2 -3
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/engine-bridge.d.ts +8 -5
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier-ggml.d.ts +22 -22
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier.d.ts +9 -12
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/{src → dist}/services/voice/errors.d.ts +1 -1
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/{src → dist}/services/voice/expressive-tags.d.ts +5 -5
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/{src → dist}/services/voice/ffi-bindings.d.ts +26 -4
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/{src → dist}/services/voice/fused-eot-scorer.d.ts +6 -6
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/{src → dist}/services/voice/index.d.ts +8 -3
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-backend.d.ts +15 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-engine-discovery.d.ts +1 -1
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-ffi-runtime.d.ts +3 -3
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/pick-runtime.d.ts +1 -1
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/{src → dist}/services/voice/mic-source.d.ts +1 -1
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/{src → dist}/services/voice/partial-stabilizer.d.ts +1 -1
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/{src → dist}/services/voice/shared-resources.d.ts +14 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcriber.d.ts +4 -4
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-service.d.ts +20 -1
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-store.d.ts +12 -1
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/{src → dist}/services/voice/types.d.ts +6 -6
- package/dist/services/voice/types.d.ts.map +1 -0
- package/{src → dist}/services/voice/vad.d.ts +6 -5
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/{src → dist}/services/voice/voice-preset-format.d.ts +2 -2
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/{src → dist}/services/voice/wake-word-ggml.d.ts +8 -9
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +28 -9
- package/registry-entry.json +137 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +1 -1
- package/src/adapters/capacitor-llama/index.ts +28 -4
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +2 -2
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +1 -1
- package/src/local-inference-routes.test.ts +57 -11
- package/src/local-inference-routes.ts +90 -8
- package/src/provider.ts +32 -3
- package/src/routes/compat-helpers.ts +2 -1
- package/src/routes/index.ts +1 -0
- package/src/routes/live-diarization-route.test.ts +134 -0
- package/src/routes/live-diarization-route.ts +79 -3
- package/src/routes/local-inference-asr-route.test.ts +43 -2
- package/src/routes/local-inference-asr-route.ts +7 -4
- package/src/routes/local-inference-asr-transcribe.test.ts +4 -4
- package/src/routes/local-inference-asr-transcribe.ts +1 -1
- package/src/routes/local-inference-compat-routes.test.ts +3 -3
- package/src/routes/local-inference-compat-routes.ts +23 -56
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcripts-routes.test.ts +51 -0
- package/src/routes/transcripts-routes.ts +35 -3
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +203 -5
- package/src/runtime/ensure-local-inference-handler.ts +203 -11
- package/src/runtime/index.ts +4 -1
- package/src/runtime/mobile-local-inference-gate.test.ts +85 -2
- package/src/runtime/mobile-local-inference-gate.ts +60 -5
- package/src/runtime/voice-entity-binding.transcript.test.ts +29 -0
- package/src/runtime/voice-entity-binding.ts +46 -6
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +2 -2
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model.ts +211 -8
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +26 -0
- package/src/services/assignments.ts +52 -4
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +198 -19
- package/src/services/bionic-host-loader.test.ts +94 -1
- package/src/services/bionic-host-loader.ts +72 -0
- package/src/services/cache-bridge.test.ts +7 -7
- package/src/services/catalog.test.ts +32 -11
- package/src/services/catalog.ts +6 -0
- package/src/services/cloud-fallback.ts +1 -1
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +99 -7
- package/src/services/device-tier.test.ts +89 -2
- package/src/services/device-tier.ts +103 -11
- package/src/services/downloader.test.ts +199 -58
- package/src/services/downloader.ts +141 -27
- package/src/services/engine-direct-bundle.test.ts +38 -6
- package/src/services/engine.ts +291 -104
- package/src/services/ensure-local-artifacts.ts +1 -1
- package/src/services/ffi-llm-streaming-abi.ts +6 -3
- package/src/services/ffi-streaming-backend.ts +44 -8
- package/src/services/ffi-streaming-runner.test.ts +163 -3
- package/src/services/ffi-streaming-runner.ts +54 -1
- package/src/services/ffi-unload-ordering.test.ts +5 -1
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/hardware.test.ts +7 -2
- package/src/services/hardware.ts +28 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/sd-cpp.ts +6 -9
- package/src/services/index.ts +18 -0
- package/src/services/ios-llama-streaming.ts +1 -1
- package/src/services/kv-spill.ts +6 -5
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +84 -2
- package/src/services/manifest/index.ts +6 -0
- package/src/services/manifest/manifest.test.ts +156 -54
- package/src/services/manifest/schema.ts +160 -52
- package/src/services/manifest/types.ts +6 -0
- package/src/services/manifest/validator.ts +91 -25
- package/src/services/memory-arbiter.test.ts +139 -0
- package/src/services/memory-arbiter.ts +81 -15
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +24 -0
- package/src/services/memory-monitor.ts +12 -0
- package/src/services/mtp-doctor.ts +10 -2
- package/src/services/network-policy.ts +5 -5
- package/src/services/ram-budget-cache.test.ts +2 -1
- package/src/services/ram-budget.ts +0 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/registry.ts +25 -19
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.ts +43 -24
- package/src/services/routing-policy.test.ts +211 -23
- package/src/services/routing-policy.ts +92 -22
- package/src/services/service.test.ts +3 -3
- package/src/services/service.ts +22 -7
- package/src/services/transcription-priority.test.ts +2 -2
- package/src/services/types.ts +4 -0
- package/src/services/verify-on-device.test.ts +2 -2
- package/src/services/vision/hash.ts +1 -1
- package/src/services/vision/index.ts +2 -2
- package/src/services/vision/llama-server.ts +1 -1
- package/src/services/vision/types.ts +13 -4
- package/src/services/vision-embedding-cache.ts +1 -1
- package/src/services/voice/VOICE_WORKBENCH.md +71 -26
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +72 -2
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +29 -29
- package/src/services/voice/__tests__/streaming-asr.test.ts +1 -1
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +6 -8
- package/src/services/voice/audio-frame-consumer.test.ts +327 -1
- package/src/services/voice/audio-frame-consumer.ts +165 -5
- package/src/services/voice/barge-in.ts +2 -3
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +2 -2
- package/src/services/voice/e2e-harness.ts +175 -16
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +22 -22
- package/src/services/voice/embedding.ts +2 -3
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.ts +151 -110
- package/src/services/voice/eot-classifier-ggml.ts +42 -39
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +11 -122
- package/src/services/voice/errors.ts +2 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +8 -8
- package/src/services/voice/ffi-bindings.test.ts +10 -3
- package/src/services/voice/ffi-bindings.ts +177 -15
- package/src/services/voice/fused-eot-scorer.ts +17 -13
- package/src/services/voice/index.ts +33 -12
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +112 -1
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +88 -3
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +37 -201
- package/src/services/voice/kokoro/kokoro-backend.ts +16 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +1 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +3 -3
- package/src/services/voice/kokoro/pick-runtime.ts +1 -1
- package/src/services/voice/kokoro/runtime-selection.ts +28 -201
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +335 -2
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.ts +1 -1
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/partial-stabilizer.ts +1 -1
- package/src/services/voice/pipeline.ts +3 -4
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +23 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +85 -22
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.ts +4 -4
- package/src/services/voice/transcript-service.test.ts +58 -0
- package/src/services/voice/transcript-service.ts +64 -0
- package/src/services/voice/transcript-store.test.ts +36 -0
- package/src/services/voice/transcript-store.ts +32 -0
- package/src/services/voice/types.ts +7 -7
- package/src/services/voice/vad.test.ts +33 -15
- package/src/services/voice/vad.ts +25 -20
- package/src/services/voice/voice-budget.test.ts +0 -3
- package/src/services/voice/voice-budget.ts +6 -6
- package/src/services/voice/voice-duet.test.ts +1 -1
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +17 -4
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +133 -7
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-workbench-report.ts +58 -17
- package/src/services/voice/wake-word-ggml.ts +12 -13
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice-prewarm.ts +1 -1
- package/src/voice-workbench.ts +71 -0
- package/src/actions/generate-media.d.ts.map +0 -1
- package/src/actions/identify-speaker.d.ts.map +0 -1
- package/src/actions/transcription-control.d.ts.map +0 -1
- package/src/index.d.ts.map +0 -1
- package/src/local-inference-routes.d.ts.map +0 -1
- package/src/provider.d.ts.map +0 -1
- package/src/routes/compat-helpers.d.ts.map +0 -1
- package/src/routes/family-member-route.d.ts.map +0 -1
- package/src/routes/index.d.ts.map +0 -1
- package/src/routes/live-diarization-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-transcribe.d.ts.map +0 -1
- package/src/routes/local-inference-compat-routes.d.ts.map +0 -1
- package/src/routes/local-inference-tts-route.d.ts.map +0 -1
- package/src/routes/transcript-audio-store.d.ts.map +0 -1
- package/src/routes/transcripts-routes.d.ts.map +0 -1
- package/src/routes/voice-first-run-routes.d.ts.map +0 -1
- package/src/routes/voice-models-routes.d.ts.map +0 -1
- package/src/routes/voice-profile-plugin-routes.d.ts.map +0 -1
- package/src/routes/voice-profiles-management-routes.d.ts.map +0 -1
- package/src/routes/voice-speaker-profile-routes.d.ts.map +0 -1
- package/src/runtime/embedding-manager-support.d.ts.map +0 -1
- package/src/runtime/embedding-presets.d.ts.map +0 -1
- package/src/runtime/embedding-warmup-policy.d.ts.map +0 -1
- package/src/runtime/ensure-local-inference-handler.d.ts.map +0 -1
- package/src/runtime/index.d.ts.map +0 -1
- package/src/runtime/mobile-local-inference-gate.d.ts +0 -31
- package/src/runtime/mobile-local-inference-gate.d.ts.map +0 -1
- package/src/runtime/voice-entity-binding.d.ts.map +0 -1
- package/src/services/active-model.d.ts.map +0 -1
- package/src/services/assignments.d.ts.map +0 -1
- package/src/services/backend.d.ts.map +0 -1
- package/src/services/bionic-host-loader.d.ts.map +0 -1
- package/src/services/bundled-models.d.ts.map +0 -1
- package/src/services/cache-bridge.d.ts.map +0 -1
- package/src/services/catalog.d.ts +0 -10
- package/src/services/catalog.d.ts.map +0 -1
- package/src/services/checkpoint-client.d.ts.map +0 -1
- package/src/services/cloud-fallback.d.ts.map +0 -1
- package/src/services/conversation-registry.d.ts.map +0 -1
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +0 -1
- package/src/services/device-bridge.d.ts.map +0 -1
- package/src/services/device-resource-metrics.d.ts.map +0 -1
- package/src/services/device-tier.d.ts.map +0 -1
- package/src/services/downloader.d.ts.map +0 -1
- package/src/services/engine.d.ts.map +0 -1
- package/src/services/external-scanner.d.ts.map +0 -1
- package/src/services/ffi-streaming-backend.d.ts.map +0 -1
- package/src/services/ffi-streaming-runner.d.ts.map +0 -1
- package/src/services/gpu-detect.d.ts.map +0 -1
- package/src/services/handler-registry.d.ts.map +0 -1
- package/src/services/hardware.d.ts.map +0 -1
- package/src/services/hf-search.d.ts +0 -26
- package/src/services/hf-search.d.ts.map +0 -1
- package/src/services/hf-search.test.ts +0 -69
- package/src/services/hf-search.ts +0 -420
- package/src/services/image-description-runtime.d.ts.map +0 -1
- package/src/services/imagegen/aosp-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/backend-selector.d.ts.map +0 -1
- package/src/services/imagegen/coreml-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/errors.d.ts.map +0 -1
- package/src/services/imagegen/index.d.ts.map +0 -1
- package/src/services/imagegen/mflux.d.ts.map +0 -1
- package/src/services/imagegen/sd-cpp.d.ts.map +0 -1
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/types.d.ts.map +0 -1
- package/src/services/index.d.ts.map +0 -1
- package/src/services/inference-capabilities.d.ts.map +0 -1
- package/src/services/inference-telemetry.d.ts.map +0 -1
- package/src/services/kv-spill.d.ts.map +0 -1
- package/src/services/latency-trace.d.ts.map +0 -1
- package/src/services/llm-streaming-binding.d.ts.map +0 -1
- package/src/services/load-args.d.ts.map +0 -1
- package/src/services/manifest/index.d.ts +0 -4
- package/src/services/manifest/index.d.ts.map +0 -1
- package/src/services/manifest/schema.d.ts.map +0 -1
- package/src/services/manifest/types.d.ts.map +0 -1
- package/src/services/manifest/validator.d.ts.map +0 -1
- package/src/services/memory-arbiter.d.ts.map +0 -1
- package/src/services/memory-monitor.d.ts.map +0 -1
- package/src/services/memory-pressure.d.ts.map +0 -1
- package/src/services/mtp-doctor.d.ts.map +0 -1
- package/src/services/network-policy.d.ts.map +0 -1
- package/src/services/paths.d.ts.map +0 -1
- package/src/services/planner-skeleton.d.ts.map +0 -1
- package/src/services/providers.d.ts.map +0 -1
- package/src/services/ram-budget.d.ts.map +0 -1
- package/src/services/readiness.d.ts.map +0 -1
- package/src/services/recommendation.d.ts.map +0 -1
- package/src/services/registry.d.ts.map +0 -1
- package/src/services/router-handler.d.ts.map +0 -1
- package/src/services/routing-policy.d.ts.map +0 -1
- package/src/services/routing-preferences.d.ts.map +0 -1
- package/src/services/runtime-target.d.ts.map +0 -1
- package/src/services/service.d.ts.map +0 -1
- package/src/services/session-pool.d.ts.map +0 -1
- package/src/services/structured-output/deterministic-repair.d.ts.map +0 -1
- package/src/services/structured-output.d.ts.map +0 -1
- package/src/services/system-memory.d.ts.map +0 -1
- package/src/services/types.d.ts.map +0 -1
- package/src/services/verify-on-device.d.ts.map +0 -1
- package/src/services/verify.d.ts.map +0 -1
- package/src/services/vision/aosp-unavailable.d.ts.map +0 -1
- package/src/services/vision/capacitor-llama.d.ts.map +0 -1
- package/src/services/vision/cloud-fallback.d.ts.map +0 -1
- package/src/services/vision/hash.d.ts.map +0 -1
- package/src/services/vision/index.d.ts.map +0 -1
- package/src/services/vision/llama-server.d.ts.map +0 -1
- package/src/services/vision/types.d.ts.map +0 -1
- package/src/services/vision/vast-fallback.d.ts.map +0 -1
- package/src/services/vision-embedding-cache.d.ts.map +0 -1
- package/src/services/voice/audio-frame-consumer.d.ts.map +0 -1
- package/src/services/voice/barge-in.d.ts.map +0 -1
- package/src/services/voice/cancellation-coordinator.d.ts.map +0 -1
- package/src/services/voice/checkpoint-manager.d.ts.map +0 -1
- package/src/services/voice/eager-context-builder.d.ts.map +0 -1
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/embedding.d.ts.map +0 -1
- package/src/services/voice/emotion-attribution.d.ts.map +0 -1
- package/src/services/voice/engine-bridge.d.ts.map +0 -1
- package/src/services/voice/eot-classifier-ggml.d.ts.map +0 -1
- package/src/services/voice/eot-classifier.d.ts.map +0 -1
- package/src/services/voice/errors.d.ts.map +0 -1
- package/src/services/voice/expressive-tags.d.ts.map +0 -1
- package/src/services/voice/ffi-bindings.d.ts.map +0 -1
- package/src/services/voice/first-line-cache.d.ts.map +0 -1
- package/src/services/voice/fused-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/index.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/phonemizer.d.ts.map +0 -1
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/runtime-selection.d.ts +0 -92
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +0 -1
- package/src/services/voice/kokoro/types.d.ts.map +0 -1
- package/src/services/voice/kokoro/voice-presets.d.ts.map +0 -1
- package/src/services/voice/kokoro/voices.d.ts.map +0 -1
- package/src/services/voice/lifecycle.d.ts.map +0 -1
- package/src/services/voice/live-diarization-session.d.ts +0 -96
- package/src/services/voice/live-diarization-session.d.ts.map +0 -1
- package/src/services/voice/mic-source.d.ts.map +0 -1
- package/src/services/voice/optimistic-policy.d.ts.map +0 -1
- package/src/services/voice/partial-stabilizer.d.ts.map +0 -1
- package/src/services/voice/phoneme-tokenizer.d.ts.map +0 -1
- package/src/services/voice/phrase-cache.d.ts.map +0 -1
- package/src/services/voice/phrase-chunker.d.ts.map +0 -1
- package/src/services/voice/pipeline-impls.d.ts.map +0 -1
- package/src/services/voice/pipeline.d.ts.map +0 -1
- package/src/services/voice/prefill-client.d.ts.map +0 -1
- package/src/services/voice/prefix-preserving-queue.d.ts.map +0 -1
- package/src/services/voice/profile-store.d.ts.map +0 -1
- package/src/services/voice/ring-buffer.d.ts.map +0 -1
- package/src/services/voice/rollback-queue.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +0 -1
- package/src/services/voice/scheduler.d.ts.map +0 -1
- package/src/services/voice/shared-resources.d.ts.map +0 -1
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder.d.ts.map +0 -1
- package/src/services/voice/speaker-imprint.d.ts.map +0 -1
- package/src/services/voice/speaker-preset-cache.d.ts.map +0 -1
- package/src/services/voice/system-audio-sink.d.ts.map +0 -1
- package/src/services/voice/transcriber.d.ts.map +0 -1
- package/src/services/voice/transcript-knowledge.d.ts.map +0 -1
- package/src/services/voice/transcript-service.d.ts.map +0 -1
- package/src/services/voice/transcript-store.d.ts.map +0 -1
- package/src/services/voice/turn-controller.d.ts.map +0 -1
- package/src/services/voice/types.d.ts.map +0 -1
- package/src/services/voice/vad.d.ts.map +0 -1
- package/src/services/voice/voice-budget.d.ts.map +0 -1
- package/src/services/voice/voice-emotion-classifier.d.ts.map +0 -1
- package/src/services/voice/voice-preset-format.d.ts.map +0 -1
- package/src/services/voice/voice-profile-artifact.d.ts.map +0 -1
- package/src/services/voice/voice-profile-routes.d.ts.map +0 -1
- package/src/services/voice/voice-settings.d.ts +0 -82
- package/src/services/voice/voice-settings.d.ts.map +0 -1
- package/src/services/voice/voice-settings.ts +0 -172
- package/src/services/voice/voice-state-machine.d.ts.map +0 -1
- package/src/services/voice/wake-word-ggml.d.ts.map +0 -1
- package/src/services/voice/wake-word.d.ts.map +0 -1
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +0 -1
- package/src/services/voice-model-updater.d.ts.map +0 -1
- package/src/services/voice-prewarm.d.ts.map +0 -1
- /package/{src → dist}/actions/generate-media.d.ts +0 -0
- /package/{src → dist}/actions/identify-speaker.d.ts +0 -0
- /package/{src → dist}/actions/transcription-control.d.ts +0 -0
- /package/{src → dist}/index.d.ts +0 -0
- /package/{src → dist}/provider.d.ts +0 -0
- /package/{src → dist}/routes/family-member-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-transcribe.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-compat-routes.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-tts-route.d.ts +0 -0
- /package/{src → dist}/routes/transcript-audio-store.d.ts +0 -0
- /package/{src → dist}/routes/voice-first-run-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-models-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profile-plugin-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profiles-management-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-speaker-profile-routes.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-manager-support.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-presets.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-warmup-policy.d.ts +0 -0
- /package/{src → dist}/services/bundled-models.d.ts +0 -0
- /package/{src → dist}/services/cache-bridge.d.ts +0 -0
- /package/{src → dist}/services/checkpoint-client.d.ts +0 -0
- /package/{src → dist}/services/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/conversation-registry.d.ts +0 -0
- /package/{src → dist}/services/device-bridge.d.ts +0 -0
- /package/{src → dist}/services/device-resource-metrics.d.ts +0 -0
- /package/{src → dist}/services/external-scanner.d.ts +0 -0
- /package/{src → dist}/services/gpu-detect.d.ts +0 -0
- /package/{src → dist}/services/handler-registry.d.ts +0 -0
- /package/{src → dist}/services/hardware.d.ts +0 -0
- /package/{src → dist}/services/image-description-runtime.d.ts +0 -0
- /package/{src → dist}/services/imagegen/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/backend-selector.d.ts +0 -0
- /package/{src → dist}/services/imagegen/coreml-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/errors.d.ts +0 -0
- /package/{src → dist}/services/imagegen/index.d.ts +0 -0
- /package/{src → dist}/services/imagegen/mflux.d.ts +0 -0
- /package/{src → dist}/services/imagegen/tensorrt-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/types.d.ts +0 -0
- /package/{src → dist}/services/inference-capabilities.d.ts +0 -0
- /package/{src → dist}/services/inference-telemetry.d.ts +0 -0
- /package/{src → dist}/services/kv-spill.d.ts +0 -0
- /package/{src → dist}/services/latency-trace.d.ts +0 -0
- /package/{src → dist}/services/llm-streaming-binding.d.ts +0 -0
- /package/{src → dist}/services/load-args.d.ts +0 -0
- /package/{src → dist}/services/manifest/validator.d.ts +0 -0
- /package/{src → dist}/services/memory-pressure.d.ts +0 -0
- /package/{src → dist}/services/mtp-doctor.d.ts +0 -0
- /package/{src → dist}/services/network-policy.d.ts +0 -0
- /package/{src → dist}/services/paths.d.ts +0 -0
- /package/{src → dist}/services/planner-skeleton.d.ts +0 -0
- /package/{src → dist}/services/providers.d.ts +0 -0
- /package/{src → dist}/services/ram-budget.d.ts +0 -0
- /package/{src → dist}/services/readiness.d.ts +0 -0
- /package/{src → dist}/services/recommendation.d.ts +0 -0
- /package/{src → dist}/services/routing-preferences.d.ts +0 -0
- /package/{src → dist}/services/runtime-target.d.ts +0 -0
- /package/{src → dist}/services/session-pool.d.ts +0 -0
- /package/{src → dist}/services/structured-output/deterministic-repair.d.ts +0 -0
- /package/{src → dist}/services/structured-output.d.ts +0 -0
- /package/{src → dist}/services/system-memory.d.ts +0 -0
- /package/{src → dist}/services/verify-on-device.d.ts +0 -0
- /package/{src → dist}/services/verify.d.ts +0 -0
- /package/{src → dist}/services/vision/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/vision/capacitor-llama.d.ts +0 -0
- /package/{src → dist}/services/vision/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/vision/hash.d.ts +0 -0
- /package/{src → dist}/services/vision/llama-server.d.ts +0 -0
- /package/{src → dist}/services/vision/vast-fallback.d.ts +0 -0
- /package/{src → dist}/services/voice/barge-in.d.ts +0 -0
- /package/{src → dist}/services/voice/cancellation-coordinator.d.ts +0 -0
- /package/{src → dist}/services/voice/checkpoint-manager.d.ts +0 -0
- /package/{src → dist}/services/voice/eager-context-builder.d.ts +0 -0
- /package/{src → dist}/services/voice/emotion-attribution.d.ts +0 -0
- /package/{src → dist}/services/voice/first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/kokoro-runtime.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/phonemizer.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/types.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voice-presets.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voices.d.ts +0 -0
- /package/{src → dist}/services/voice/lifecycle.d.ts +0 -0
- /package/{src → dist}/services/voice/optimistic-policy.d.ts +0 -0
- /package/{src → dist}/services/voice/phoneme-tokenizer.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-chunker.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline-impls.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/prefill-client.d.ts +0 -0
- /package/{src → dist}/services/voice/prefix-preserving-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/profile-store.d.ts +0 -0
- /package/{src → dist}/services/voice/ring-buffer.d.ts +0 -0
- /package/{src → dist}/services/voice/rollback-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-placeholder.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-regenerator.d.ts +0 -0
- /package/{src → dist}/services/voice/scheduler.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/attribution-pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-ggml.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-imprint.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-preset-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/system-audio-sink.d.ts +0 -0
- /package/{src → dist}/services/voice/transcript-knowledge.d.ts +0 -0
- /package/{src → dist}/services/voice/turn-controller.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-budget.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-emotion-classifier.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-artifact.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-routes.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-state-machine.d.ts +0 -0
- /package/{src → dist}/services/voice/wake-word.d.ts +0 -0
- /package/{src → dist}/services/voice/wrap-with-first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice-model-updater.d.ts +0 -0
- /package/{src → dist}/services/voice-prewarm.d.ts +0 -0
|
@@ -31,6 +31,7 @@
|
|
|
31
31
|
*/
|
|
32
32
|
import { type EmitVoiceTurnObservedArgs, type HandleLiveVoiceAttributionOptions } from "../../runtime/voice-entity-binding.js";
|
|
33
33
|
import type { VoiceTurnSignal } from "./eot-classifier.js";
|
|
34
|
+
import { type ResidualSuppressionOptions } from "./nlms-echo-canceller.js";
|
|
34
35
|
import type { VoiceAttributionOutput, VoiceAttributionPipeline } from "./speaker/attribution-pipeline.js";
|
|
35
36
|
import type { PcmFrame, VadEvent, VoiceInputSource } from "./types.js";
|
|
36
37
|
/**
|
|
@@ -98,7 +99,27 @@ export interface AttributionPipelineLike {
|
|
|
98
99
|
*/
|
|
99
100
|
export interface RuntimeEventSink {
|
|
100
101
|
emitEvent(type: unknown, payload: Record<string, unknown>): Promise<void>;
|
|
102
|
+
/**
|
|
103
|
+
* Optional host-supplied far-end (agent TTS playback) reference for the live
|
|
104
|
+
* AEC path (#9583). When a host wires this, the live diarization route threads
|
|
105
|
+
* it into the session's NLMS echo canceller instead of relying on the
|
|
106
|
+
* playback-frames ingest route. Absent on headless/core runtimes.
|
|
107
|
+
*/
|
|
108
|
+
voiceEchoReferenceProvider?: EchoReferenceProvider;
|
|
101
109
|
}
|
|
110
|
+
/**
|
|
111
|
+
* Transcribe a finalized turn's buffered PCM to text (#8786). When injected, the
|
|
112
|
+
* consumer joins the ASR transcript into the diarization attribution so
|
|
113
|
+
* `VOICE_TURN_OBSERVED` carries the real text — previously the live audio-frame
|
|
114
|
+
* path attributed *who* spoke but always emitted `text: ""`, so name/partner
|
|
115
|
+
* extraction (`VoiceObserver.ingestTurn`) could never fire from live audio.
|
|
116
|
+
*
|
|
117
|
+
* Returns the transcript, or `null`/empty for silence / no decode. Best-effort:
|
|
118
|
+
* the consumer swallows a rejection (counted in `transcriptionErrors`) and falls
|
|
119
|
+
* back to a transcript-less turn rather than dropping the diarized turn.
|
|
120
|
+
*/
|
|
121
|
+
export type TurnTranscriber = (pcm: Float32Array, sampleRate: number) => Promise<string | null> | string | null;
|
|
122
|
+
export type SelfVoiceSimilarityResolver = (embedding: Float32Array, output: VoiceAttributionOutput) => Promise<number | null | undefined> | number | null | undefined;
|
|
102
123
|
export interface AudioFrameConsumerDeps {
|
|
103
124
|
/** Turn-segmentation VAD (drives speech-start/pause/end). */
|
|
104
125
|
vad: VadSegmenter;
|
|
@@ -106,7 +127,34 @@ export interface AudioFrameConsumerDeps {
|
|
|
106
127
|
pipeline: AttributionPipelineLike;
|
|
107
128
|
/** Runtime event sink for VOICE_TURN_OBSERVED. */
|
|
108
129
|
runtime: RuntimeEventSink;
|
|
130
|
+
/**
|
|
131
|
+
* Optional ASR for the finalized turn's PCM (#8786). When present, its text
|
|
132
|
+
* rides on `VOICE_TURN_OBSERVED` so live name/entity extraction runs. When
|
|
133
|
+
* absent the path stays diarization-only (transcript `""`, as before).
|
|
134
|
+
*/
|
|
135
|
+
transcribe?: TurnTranscriber;
|
|
136
|
+
/**
|
|
137
|
+
* Optional live acoustic self-voice resolver. When wired, the consumer passes
|
|
138
|
+
* the turn's WeSpeaker embedding to the host's agent-TTS centroid matcher and
|
|
139
|
+
* forwards the resulting cosine into the ambient gate.
|
|
140
|
+
*/
|
|
141
|
+
resolveSelfVoiceSimilarity?: SelfVoiceSimilarityResolver;
|
|
142
|
+
/**
|
|
143
|
+
* Optional agent-playback (far-end) reference for acoustic echo cancellation
|
|
144
|
+
* (#9455). Given a mic frame's clock timestamp and sample count, returns the
|
|
145
|
+
* agent's TTS playback PCM for that exact window (Float32 16 kHz), or null
|
|
146
|
+
* when the agent is not playing. When wired, the consumer runs an NLMS echo
|
|
147
|
+
* canceller on every mic frame BEFORE VAD/attribution so the agent never
|
|
148
|
+
* transcribes its own TTS. Absent → no AEC (unchanged behavior). The caller
|
|
149
|
+
* owns the playback capture + the playback→mic delay calibration.
|
|
150
|
+
*/
|
|
151
|
+
echoReference?: EchoReferenceProvider;
|
|
109
152
|
}
|
|
153
|
+
/**
|
|
154
|
+
* Returns the agent's TTS playback PCM (the far-end echo reference) aligned to a
|
|
155
|
+
* mic frame's time window, or null when the agent is silent. See #9455.
|
|
156
|
+
*/
|
|
157
|
+
export type EchoReferenceProvider = (timestampMs: number, samples: number) => Float32Array | null;
|
|
110
158
|
export interface AudioFrameConsumerConfig {
|
|
111
159
|
/** Source metadata stamped onto every attributed turn. */
|
|
112
160
|
source?: VoiceInputSource;
|
|
@@ -124,6 +172,12 @@ export interface AudioFrameConsumerConfig {
|
|
|
124
172
|
* out of the attribution buffer. Default 0.3 s.
|
|
125
173
|
*/
|
|
126
174
|
preRollSeconds?: number;
|
|
175
|
+
/**
|
|
176
|
+
* Opt-in nonlinear residual-echo suppressor forwarded to the NLMS canceller
|
|
177
|
+
* (#9583/#9649). Default-off; only meaningful when an `echoReference` is wired
|
|
178
|
+
* (no canceller exists otherwise). See {@link NlmsEchoCancellerOptions.residualSuppression}.
|
|
179
|
+
*/
|
|
180
|
+
residualSuppression?: boolean | ResidualSuppressionOptions;
|
|
127
181
|
}
|
|
128
182
|
/** A finalized, attributed turn the consumer surfaces to its caller. */
|
|
129
183
|
export interface AttributedTurn {
|
|
@@ -150,6 +204,11 @@ export declare class AudioFrameConsumer {
|
|
|
150
204
|
private readonly vad;
|
|
151
205
|
private readonly pipeline;
|
|
152
206
|
private readonly runtime;
|
|
207
|
+
private readonly transcribe;
|
|
208
|
+
private readonly resolveSelfVoiceSimilarity;
|
|
209
|
+
private readonly echoReference;
|
|
210
|
+
/** NLMS echo canceller, instantiated only when an `echoReference` is wired. */
|
|
211
|
+
private readonly echoCanceller;
|
|
153
212
|
private readonly source;
|
|
154
213
|
private readonly attributionOptions;
|
|
155
214
|
private readonly maxTurnSamples;
|
|
@@ -171,6 +230,13 @@ export declare class AudioFrameConsumer {
|
|
|
171
230
|
private closed;
|
|
172
231
|
/** Count of frames that failed to decode (surfaced via getters, not thrown). */
|
|
173
232
|
droppedFrames: number;
|
|
233
|
+
/** Count of turns whose ASR transcribe threw (degraded to a transcript-less
|
|
234
|
+
* turn rather than dropping the diarized turn). */
|
|
235
|
+
transcriptionErrors: number;
|
|
236
|
+
/** Count of mic frames the echo canceller actually processed (i.e. the agent
|
|
237
|
+
* was playing). Frames skipped while the agent is silent do not count, so
|
|
238
|
+
* this also measures how often AEC took the cheap passthrough path. */
|
|
239
|
+
echoFramesCancelled: number;
|
|
174
240
|
constructor(deps: AudioFrameConsumerDeps, config?: AudioFrameConsumerConfig);
|
|
175
241
|
/** True while a turn is being buffered (between speech-start and speech-end). */
|
|
176
242
|
get inTurn(): boolean;
|
|
@@ -189,6 +255,15 @@ export declare class AudioFrameConsumer {
|
|
|
189
255
|
* upstream and by the host harness.
|
|
190
256
|
*/
|
|
191
257
|
pushDecodedFrame(pcm: Float32Array, timestampMs: number): Promise<void>;
|
|
258
|
+
/**
|
|
259
|
+
* Run the echo canceller on one mic frame when (and only when) the agent is
|
|
260
|
+
* playing. The reference provider returns null while the agent is silent, in
|
|
261
|
+
* which case the mic frame is passed through verbatim and the FIR
|
|
262
|
+
* `process()` loop is not invoked. The canceller still observes the silent
|
|
263
|
+
* far-end so stale playback history is cleared before playback resumes.
|
|
264
|
+
* Returns the echo-cancelled (or untouched) mic frame.
|
|
265
|
+
*/
|
|
266
|
+
private cancelEcho;
|
|
192
267
|
/**
|
|
193
268
|
* Flush the VAD (finalize any open segment) and await all pending
|
|
194
269
|
* attribution. Call at end-of-capture so a trailing utterance is not lost.
|
|
@@ -200,6 +275,13 @@ export declare class AudioFrameConsumer {
|
|
|
200
275
|
private beginTurn;
|
|
201
276
|
private finalizeTurn;
|
|
202
277
|
private attributeTurn;
|
|
278
|
+
/**
|
|
279
|
+
* Merge the per-turn ASR transcript into the attribution options. Returns the
|
|
280
|
+
* base options unchanged when no transcriber is wired or the decode yields no
|
|
281
|
+
* text; a thrown decode is swallowed (counted in `transcriptionErrors`) so a
|
|
282
|
+
* diarized turn is never dropped over an ASR failure.
|
|
283
|
+
*/
|
|
284
|
+
private resolveTurnOptions;
|
|
203
285
|
private appendTurnChunk;
|
|
204
286
|
private appendPreRoll;
|
|
205
287
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio-frame-consumer.d.ts","sourceRoot":"","sources":["../../../src/services/voice/audio-frame-consumer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,EACN,KAAK,yBAAyB,EAC9B,KAAK,iCAAiC,EAEtC,MAAM,uCAAuC,CAAC;AAC/C,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAC3D,OAAO,EAEN,KAAK,0BAA0B,EAC/B,MAAM,0BAA0B,CAAC;AAClC,OAAO,KAAK,EACX,sBAAsB,EACtB,wBAAwB,EACxB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,KAAK,EAAE,QAAQ,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAMvE;;;;GAIG;AACH,MAAM,WAAW,eAAe;IAC/B,0EAA0E;IAC1E,KAAK,EAAE,MAAM,CAAC;IACd,0DAA0D;IAC1D,UAAU,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,qEAAqE;IACrE,OAAO,EAAE,MAAM,CAAC;IAChB,oDAAoD;IACpD,GAAG,EAAE,MAAM,CAAC;IACZ,sDAAsD;IACtD,SAAS,EAAE,MAAM,CAAC;IAClB,mEAAmE;IACnE,UAAU,EAAE,MAAM,CAAC;CACnB;AAED,6EAA6E;AAC7E,eAAO,MAAM,gCAAgC,QAAS,CAAC;AAEvD,qBAAa,qBAAsB,SAAQ,KAAK;gBACnC,OAAO,EAAE,MAAM;CAI3B;AAED;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,eAAe,GAAG,YAAY,CA0BxE;AA8BD;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B,sEAAsE;IACtE,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC;IAC3B,8EAA8E;IAC9E,UAAU,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,GAAG,MAAM,IAAI,CAAC;IAC5D,mEAAmE;IACnE,SAAS,CAAC,KAAK,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC1C,4DAA4D;IAC5D,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,0CAA0C;IAC1C,KAAK,IAAI,IAAI,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACvC,SAAS,CACR,GAAG,EAAE,UAAU,CAAC,wBAAwB,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,GACvD,OAAO,CAAC,sBAAsB,CAAC,CAAC;CACnC;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAChC,SAAS,CAAC,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC1E;;;;;OAKG;IACH,0BAA0B,CAAC,EAAE,qBAAqB,CAAC;CACnD;AAED;;;;;;;;;;GAUG;AACH,MAAM,MAAM,eAAe,GAAG,CAC7B,GAAG,EAAE,YAAY,EACjB,UAAU,EAAE,MAAM,KACd,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,MAAM,GAAG,IAAI,CAAC;AAE5C,MAAM,MAAM,2BAA2B,GAAG,CACzC,SAAS,EAAE,YAAY,EACvB,MAAM,EAAE,sBAAsB,KAC1B,OAAO,CAAC,MAAM,GAAG,IAAI,GAAG,SAAS,CAAC,GAAG,MAAM,GAAG,IAAI,GAAG,SAAS,CAAC;AAMpE,MAAM,WAAW,sBAAsB;IACtC,6DAA6D;IAC7D,GAAG,EAAE,YAAY,CAAC;IAClB,kDAAkD;IAClD,QAAQ,EAAE,uBAAuB,CAAC;IAClC,kDAAkD;IAClD,OAAO,EAAE,gBAAgB,CAAC;IAC1B;;;;OAIG;IACH,UAAU,CAAC,EAAE,eAAe,CAAC;IAC7B;;;;OAIG;IACH,0BAA0B,CAAC,EAAE,2BAA2B,CAAC;IACzD;;;;;;;;OAQG;IACH,aAAa,CAAC,EAAE,qBAAqB,CAAC;CACtC;AAED;;;GAGG;AACH,MAAM,MAAM,qBAAqB,GAAG,CACnC,WAAW,EAAE,MAAM,EACnB,OAAO,EAAE,MAAM,KACX,YAAY,GAAG,IAAI,CAAC;AAEzB,MAAM,WAAW,wBAAwB;IACxC,0DAA0D;IAC1D,MAAM,CAAC,EAAE,gBAAgB,CAAC;IAC1B,yEAAyE;IACzE,kBAAkB,CAAC,EAAE,iCAAiC,CAAC;IACvD;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;OAIG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;OAIG;IACH,mBAAmB,CAAC,EAAE,OAAO,GAAG,0BAA0B,CAAC;CAC3D;AAED,wEAAwE;AACxE,MAAM,WAAW,cAAc;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,sBAAsB,CAAC;IAC/B,MAAM,EAAE,eAAe,CAAC;IACxB,6DAA6D;IAC7D,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,wDAAwD;IACxD,OAAO,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,MAAM,sBAAsB,GAAG,CAAC,IAAI,EAAE,cAAc,KAAK,IAAI,CAAC;AAEpE;;;;;;;;GAQG;AACH,qBAAa,kBAAkB;IAC9B,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAe;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA0B;IACnD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAmB;IAC3C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAyB;IACpD,OAAO,CAAC,QAAQ,CAAC,0BAA0B,CAAqC;IAChF,OAAO,CAAC,QAAQ,CAAC,aAAa,CAA+B;IAC7D,+EAA+E;IAC/E,OAAO,CAAC,QAAQ,CAAC,aAAa,CAA2B;IACzD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA+B;IACtD,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAoC;IACvE,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAa;IAC5C,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAqC;IAEnE,0DAA0D;IAC1D,OAAO,CAAC,UAAU,CAAsB;IACxC,OAAO,CAAC,WAAW,CAAK;IACxB,mEAAmE;IACnE,OAAO,CAAC,OAAO,CAAsB;IACrC,OAAO,CAAC,kBAAkB,CAAK;IAC/B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,OAAO,CAAK;IACpB,OAAO,CAAC,eAAe,CAAK;IAC5B,OAAO,CAAC,cAAc,CAAK;IAC3B,8EAA8E;IAC9E,OAAO,CAAC,WAAW,CAAoC;IACvD,OAAO,CAAC,MAAM,CAAS;IAEvB,gFAAgF;IAChF,aAAa,SAAK;IAElB;wDACoD;IACpD,mBAAmB,SAAK;IAExB;;4EAEwE;IACxE,mBAAmB,SAAK;gBAGvB,IAAI,EAAE,sBAAsB,EAC5B,MAAM,GAAE,wBAA6B;IA+BtC,iFAAiF;IACjF,IAAI,MAAM,IAAI,OAAO,CAEpB;IAED,0EAA0E;IAC1E,MAAM,CAAC,QAAQ,EAAE,sBAAsB,GAAG,MAAM,IAAI;IAKpD;;;;;OAKG;IACG,YAAY,CAAC,KAAK,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBzD;;;;OAIG;IACG,gBAAgB,CACrB,GAAG,EAAE,YAAY,EACjB,WAAW,EAAE,MAAM,GACjB,OAAO,CAAC,IAAI,CAAC;IAqBhB;;;;;;;OAOG;IACH,OAAO,CAAC,UAAU;IAWlB;;;OAGG;IACG,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAM5B,2DAA2D;IACrD,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAc5B,OAAO,CAAC,UAAU;IAgBlB,OAAO,CAAC,SAAS;IAWjB,OAAO,CAAC,YAAY;YAkBN,aAAa;IAkC3B;;;;;OAKG;YACW,kBAAkB;IAkChC,OAAO,CAAC,eAAe;IAUvB,OAAO,CAAC,aAAa;CAarB;AAgBD;;;;GAIG;AACH,YAAY,EAAE,yBAAyB,EAAE,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"barge-in.d.ts","sourceRoot":"","sources":["../../../src/services/voice/barge-in.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmCG;AAEH,OAAO,KAAK,EACX,kBAAkB,EAElB,qBAAqB,EAErB,gBAAgB,EAChB,iBAAiB,EACjB,MAAM,SAAS,CAAC;AAEjB;+EAC+E;AAC/E,UAAU,cAAc;IACvB,UAAU,CAAC,QAAQ,EAAE,gBAAgB,GAAG,MAAM,IAAI,CAAC;CACnD;AAID,MAAM,WAAW,eAAe;IAC/B,QAAQ,IAAI,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC5B,SAAS,EAAE,OAAO,CAAC;CACnB;AAkCD,MAAM,WAAW,uBAAuB;IACvC;;;;;OAKG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,iBAAkB,YAAW,iBAAiB;IAC1D,OAAO,CAAC,QAAQ,CAAC,SAAS,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAoC;IACpE,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;IAEtC,0DAA0D;IAC1D,OAAO,CAAC,MAAM,CAAsC;IAEpD;;wBAEoB;IACpB,OAAO,CAAC,aAAa,CAAS;IAC9B;kCAC8B;IAC9B,OAAO,CAAC,mBAAmB,CAAS;IACpC,OAAO,CAAC,wBAAwB,CAA8C;IAC9E,OAAO,CAAC,sBAAsB,CAAuB;IACrD,OAAO,CAAC,oBAAoB,CAAK;IACjC,OAAO,CAAC,QAAQ,CAA6B;gBAEjC,MAAM,GAAE,uBAA4B;IAMhD,6DAA6D;IAC7D,QAAQ,CAAC,QAAQ,EAAE,qBAAqB,GAAG,MAAM,IAAI;IAKrD,0EAA0E;IAC1E,OAAO,CAAC,QAAQ,EAAE,cAAc,GAAG,MAAM,IAAI;IAM7C,SAAS,IAAI,IAAI;IAOjB,oEAAoE;IACpE,gBAAgB,CAAC,QAAQ,EAAE,OAAO,GAAG,IAAI;IAUzC,IAAI,eAAe,IAAI,OAAO,CAE7B;IAID,OAAO,CAAC,UAAU;IA8ClB,eAAe,CAAC,IAAI,EAAE;QACrB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;KACpB,GAAG,IAAI;IAiBR;;;;;OAKG;IACH,QAAQ,CACP,MAAM,GAAE,WAAW,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAY,EAC5D,WAAW,GAAE,MAAgD,GAC3D,kBAAkB;IAkBrB,OAAO,CAAC,WAAW,CAAmC;IAEtD;sEACkE;IAClE,kBAAkB,IAAI,kBAAkB,GAAG,IAAI;IAM/C,yEAAyE;IACzE,YAAY,IAAI,YAAY;IAI5B,MAAM,CAAC,QAAQ,EAAE,eAAe,GAAG,MAAM,IAAI;IAK7C,2EAA2E;IAC3E,WAAW,IAAI,IAAI;IAInB,KAAK,IAAI,IAAI;IASb,OAAO,CAAC,UAAU;IAIlB,OAAO,CAAC,sBAAsB;IA0B9B,OAAO,CAAC,gBAAgB;CASxB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cancellation-coordinator.d.ts","sourceRoot":"","sources":["../../../src/services/voice/cancellation-coordinator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAEH,OAAO,EACN,KAAK,uBAAuB,EAC5B,yBAAyB,EACzB,KAAK,sBAAsB,EAC3B,MAAM,iBAAiB,CAAC;AAEzB;;;GAGG;AACH,MAAM,WAAW,kBAAkB;IAClC,eAAe,EAAE;QAChB,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC;QACnD,OAAO,CACN,QAAQ,EAAE,CAAC,KAAK,EAAE;YACjB,IAAI,EACD,SAAS,GACT,WAAW,GACX,SAAS,GACT,SAAS,GACT,iBAAiB,CAAC;YACrB,MAAM,EAAE,MAAM,CAAC;YACf,MAAM,CAAC,EAAE,MAAM,CAAC;SAChB,KAAK,IAAI,GACR,MAAM,IAAI,CAAC;KACd,CAAC;CACF;AAED,MAAM,WAAW,mCAAmC;IACnD,8BAA8B;IAC9B,OAAO,EAAE,kBAAkB,CAAC;IAC5B;;;;;OAKG;IACH,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,uBAAuB,KAAK,IAAI,CAAC;IACtE;;;;OAIG;IACH,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,uBAAuB,KAAK,IAAI,CAAC;IACpD;;;OAGG;IACH,QAAQ,CAAC,EAAE,yBAAyB,CAAC;CACrC;AAgBD,qBAAa,4BAA4B;IACxC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAqB;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmD;IAC7E,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAiD;IACzE,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA4B;IACrD,kDAAkD;IAClD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAgC;gBAE1C,IAAI,EAAE,mCAAmC;IAOrD;;;;OAIG;IACH,OAAO,CAAC,IAAI,EAAE;QACb,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,CAAC,EAAE,MAAM,CAAC;KACd,GAAG,sBAAsB;IA8D1B,2DAA2D;IAC3D,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,sBAAsB,GAAG,IAAI;IAItD,kCAAkC;IAClC,YAAY,IAAI,MAAM,EAAE;IAIxB;;;OAGG;IACH,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,uBAAuB,GAAG,OAAO;IAI/D;;;;OAIG;IACH,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO;IAIhC;;;OAGG;IACH,SAAS,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO;IAIlC;;;;;;;;;OASG;IACH,qBAAqB,CACpB,MAAM,EAAE,MAAM,EACd,UAAU,EAAE;QACX,QAAQ,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE;YAAE,IAAI,EAAE,MAAM,CAAA;SAAE,KAAK,IAAI,GAAG,MAAM,IAAI,CAAC;KACnE,GACC,MAAM,IAAI;IAQb;;;OAGG;IACH,OAAO,IAAI,IAAI;CASf"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"checkpoint-manager.d.ts","sourceRoot":"","sources":["../../../src/services/voice/checkpoint-manager.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAEH,OAAO,EAEN,KAAK,eAAe,EACpB,KAAK,gBAAgB,IAAI,oBAAoB,EAC7C,MAAM,sBAAsB,CAAC;AAE9B;;;;GAIG;AACH,MAAM,WAAW,gBAAgB;IAChC;;;;OAIG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;;OAGG;IACH,EAAE,EAAE,MAAM,CAAC;IACX,sCAAsC;IACtC,SAAS,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,QAAQ,CAAC,UAAU,EAAE,oBAAoB,GAAG,IAAI,CAAC;CACjD;AAED;;;;GAIG;AACH,MAAM,WAAW,qBAAqB;IACrC;;;;OAIG;IACH,cAAc,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACxE;;;;OAIG;IACH,iBAAiB,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3D;;;;OAIG;IACH,iBAAiB,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3D;AAED;;;;GAIG;AACH,qBAAa,4BAA6B,SAAQ,KAAK;gBAC1C,OAAO,EAAE,MAAM;CAI3B;AAMD,MAAM,WAAW,wBAAwB;IACxC;;;OAGG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;;;OAIG;IACH,aAAa,CAAC,EAAE,CAAC,YAAY,EAAE,MAAM,KAAK,MAAM,CAAC;IACjD,wEAAwE;IACxE,SAAS,CAAC,EAAE,eAAe,CAAC;IAC5B;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,IAAI,CAAC;CACjB;AAID;;;;;;GAMG;AACH,qBAAa,iBAAkB,YAAW,qBAAqB;IAC9D,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmB;IAC1C,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAmC;IACjE,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAa;IACjC,OAAO,CAAC,MAAM,CAAK;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAqB;gBAE9B,IAAI,EAAE,wBAAwB;IAYpC,cAAc,CACnB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,MAAM,GACV,OAAO,CAAC,gBAAgB,CAAC;IAmBtB,iBAAiB,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAc1D,iBAAiB,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAchE;;;;OAIG;IACG,cAAc,CAAC,MAAM,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC;IAI5D,OAAO,CAAC,UAAU;CAOlB;AAMD;;;;;GAKG;AACH,MAAM,WAAW,sBAAsB;IACtC,iDAAiD;IACjD,MAAM,EAAE,SAAS,MAAM,EAAE,CAAC;IAC1B,8DAA8D;IAC9D,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAED,MAAM,MAAM,kBAAkB,GAAG,CAChC,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,MAAM,KACR,sBAAsB,CAAC;AAE5B;;;;GAIG;AACH,qBAAa,qBAAsB,YAAW,qBAAqB;IAkBtD,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAC;IAjB5C,OAAO,CAAC,MAAM,CAAK;IACnB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAA6C;IACvE;;;OAGG;IACH,QAAQ,CAAC,UAAU,EAAE,KAAK,CACvB;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,GAChE;QAAE,IAAI,EAAE,SAAS,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,GACrC;QAAE,IAAI,EAAE,SAAS,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CACvC,CAAM;IACP;;;OAGG;IACH,aAAa,EAAE,SAAS,MAAM,EAAE,CAAM;gBAET,cAAc,CAAC,EAAE,kBAAkB,YAAA;IAE1D,cAAc,CACnB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,MAAM,GACV,OAAO,CAAC,gBAAgB,CAAC;IAkBtB,iBAAiB,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAW1D,iBAAiB,CAAC,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IAUhE,gDAAgD;IAChD,eAAe,IAAI,MAAM;IAIzB,sDAAsD;IACtD,WAAW,CAAC,MAAM,EAAE,gBAAgB,GAAG,sBAAsB,GAAG,SAAS;CAGzE"}
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice checkpoint policy — thin VAD-event → checkpoint-op translator that
|
|
3
|
+
* sits between the voice loop (turn-controller / pipeline / vad) and the
|
|
4
|
+
* `GatedCheckpointManager` (`../checkpoint-manager.ts`).
|
|
5
|
+
*
|
|
6
|
+
* Why a separate policy module?
|
|
7
|
+
*
|
|
8
|
+
* - The constraint envelope on this scaffold explicitly forbids editing
|
|
9
|
+
* `turn-controller.ts`, `pipeline.ts`, `pipeline-impls.ts`, `vad.ts`,
|
|
10
|
+
* `scheduler.ts`, `phrase-chunker.ts`, `barge-in.ts`, `transcriber.ts`,
|
|
11
|
+
* and anything under `voice/kokoro/` / `voice/streaming-asr/`. Those
|
|
12
|
+
* files are owned by other agents.
|
|
13
|
+
* - But the upstream merge for `--ctx-checkpoints` lands "any week now,"
|
|
14
|
+
* and the JS-side rollback policy is what the merge unlocks. So the
|
|
15
|
+
* policy lives here as a free-standing module that the turn controller
|
|
16
|
+
* can pick up in a follow-up PR by injecting it into its VAD handler
|
|
17
|
+
* and calling `onSpeechPause` / `onSpeechResume` / `onSpeechEndCommit`
|
|
18
|
+
* / `onHardStop` at the matching transitions.
|
|
19
|
+
* - The wiring required in `turn-controller.ts` is documented in the
|
|
20
|
+
* `WIRING-INSTRUCTIONS` comment at the bottom of this file and in
|
|
21
|
+
* `docs/eliza-1-ctx-checkpoints-integration.md`. We intentionally do
|
|
22
|
+
* NOT apply the wiring here — that is a follow-up PR scoped to the
|
|
23
|
+
* turn-controller owner.
|
|
24
|
+
*
|
|
25
|
+
* Policy summary (one C1 per turn, named `pre-speculative-T<turnId>`):
|
|
26
|
+
*
|
|
27
|
+
* - `onSpeechPause(turnId)` — VAD reports the user stopped speaking but
|
|
28
|
+
* hangover hasn't elapsed. Save C1 and let the caller kick the
|
|
29
|
+
* speculative drafter. If the save fails the policy logs and continues
|
|
30
|
+
* (callers MUST treat speculative work as best-effort).
|
|
31
|
+
*
|
|
32
|
+
* - `onSpeechResume(turnId)` — VAD fires `speech-active` within the
|
|
33
|
+
* rollback window. If we previously kicked a speculative draft (the
|
|
34
|
+
* caller flips `speculativeFired=true` to tell us), restore C1 so the
|
|
35
|
+
* KV state is rolled back to the pre-draft point. Otherwise no-op.
|
|
36
|
+
*
|
|
37
|
+
* - `onSpeechEndCommit(turnId)` — VAD's hangover elapsed; the pause was a
|
|
38
|
+
* real turn boundary. The speculative draft is promoted. Erase C1: we
|
|
39
|
+
* no longer need a rollback target for this turn.
|
|
40
|
+
*
|
|
41
|
+
* - `onHardStop(turnId)` — caller-initiated cancellation (e.g. user
|
|
42
|
+
* pressed mute, app backgrounded). If C1 exists, prefer restoring to
|
|
43
|
+
* it so the KV cache is in a known-clean state for the next turn; if
|
|
44
|
+
* C1 isn't around, fall back to `cancel` (the gated manager will
|
|
45
|
+
* either issue `DELETE /slots/<id>` or invoke the SSE-disconnect
|
|
46
|
+
* callback depending on the gate).
|
|
47
|
+
*
|
|
48
|
+
* All four hooks are idempotent and survive a missing C1 by no-op'ing.
|
|
49
|
+
* Errors from the underlying manager are caught and reported through the
|
|
50
|
+
* `events.onError` sink — the policy NEVER throws back into the voice
|
|
51
|
+
* loop, because a failing checkpoint endpoint must not be able to break
|
|
52
|
+
* audio.
|
|
53
|
+
*
|
|
54
|
+
* The policy holds no state of its own beyond the per-turn name; the
|
|
55
|
+
* `GatedCheckpointManager` owns the registry, the REST client, and the
|
|
56
|
+
* capability cache.
|
|
57
|
+
*
|
|
58
|
+
* --- WIRING-INSTRUCTIONS (turn-controller.ts) -----------------------------
|
|
59
|
+
*
|
|
60
|
+
* The turn-controller owner adds (after the upstream merge lands):
|
|
61
|
+
*
|
|
62
|
+
* 1. Construct a `GatedCheckpointManager` once at session start and
|
|
63
|
+
* pass it into a `CheckpointPolicy` instance (one per slot).
|
|
64
|
+
* 2. In the VAD `speech-pause` handler, immediately after the pause
|
|
65
|
+
* hangover timer is armed:
|
|
66
|
+
*
|
|
67
|
+
* await policy.onSpeechPause(this.turnId, this.slotId);
|
|
68
|
+
* // ...kick speculative drafter against the partial transcript
|
|
69
|
+
*
|
|
70
|
+
* 3. In the VAD `speech-active` handler (only when arriving within the
|
|
71
|
+
* rollback window — the controller already tracks this):
|
|
72
|
+
*
|
|
73
|
+
* await policy.onSpeechResume(this.turnId, this.slotId, {
|
|
74
|
+
* speculativeFired: this.speculativeFired,
|
|
75
|
+
* });
|
|
76
|
+
* // ...abort the speculative drafter
|
|
77
|
+
*
|
|
78
|
+
* 4. In the `speech-end` → SPEAKING transition (after the verifier
|
|
79
|
+
* promotes the draft):
|
|
80
|
+
*
|
|
81
|
+
* await policy.onSpeechEndCommit(this.turnId, this.slotId);
|
|
82
|
+
*
|
|
83
|
+
* 5. In the `dispose()` path and any other hard-stop site (mute, app
|
|
84
|
+
* background, error shutdown, barge-in mid-SPEAKING):
|
|
85
|
+
*
|
|
86
|
+
* await policy.onHardStop(this.turnId, this.slotId, () => {
|
|
87
|
+
* this.speculativeAbort?.abort(); // SSE-disconnect callback
|
|
88
|
+
* });
|
|
89
|
+
*
|
|
90
|
+
* 6. Feature flag: pass `useCtxCheckpoints` through to the
|
|
91
|
+
* `GatedCheckpointManager` constructor; when off the policy still
|
|
92
|
+
* runs but every call is a logged no-op.
|
|
93
|
+
*
|
|
94
|
+
* The turn-controller must NOT call `mgr.save/restore/erase/cancel`
|
|
95
|
+
* directly — those names are reserved for the policy so the gated/no-op
|
|
96
|
+
* branching stays in one place. The `policy.events.onError` sink lets the
|
|
97
|
+
* controller forward checkpoint failures into its existing voice-loop
|
|
98
|
+
* telemetry without coupling to the REST error type.
|
|
99
|
+
*/
|
|
100
|
+
import type { CheckpointHandle, GatedCheckpointManager, SseDisconnectFn } from "../checkpoint-manager";
|
|
101
|
+
/**
|
|
102
|
+
* Errors are surfaced through this sink rather than rethrown. The voice
|
|
103
|
+
* loop wires it into its existing telemetry; tests assert on it directly.
|
|
104
|
+
*/
|
|
105
|
+
export interface CheckpointPolicyEvents {
|
|
106
|
+
onError?(op: "save" | "restore" | "erase" | "cancel", error: unknown, turnId: string): void;
|
|
107
|
+
/**
|
|
108
|
+
* Called after a successful save so callers can record the handle in
|
|
109
|
+
* their per-turn state if they want to bypass the name-based lookup on
|
|
110
|
+
* the matching restore.
|
|
111
|
+
*/
|
|
112
|
+
onSaved?(turnId: string, handle: CheckpointHandle): void;
|
|
113
|
+
/** Called after a successful restore. */
|
|
114
|
+
onRestored?(turnId: string, handle: CheckpointHandle): void;
|
|
115
|
+
/** Called when the policy decides to no-op (registry miss, gate off). */
|
|
116
|
+
onNoop?(op: "save" | "restore" | "erase" | "cancel", turnId: string, reason: "gate-off" | "registry-miss" | "no-speculative"): void;
|
|
117
|
+
}
|
|
118
|
+
export interface CheckpointPolicyOptions {
|
|
119
|
+
/** Gated manager. Owned by the caller; one per session. */
|
|
120
|
+
manager: GatedCheckpointManager;
|
|
121
|
+
/** Events sink (errors + observability). Optional. */
|
|
122
|
+
events?: CheckpointPolicyEvents;
|
|
123
|
+
}
|
|
124
|
+
/** Optional second arg to `onSpeechResume` so the policy knows whether
|
|
125
|
+
* a speculative draft actually fired. When `false`, the resume is a no-op
|
|
126
|
+
* (no draft means nothing to roll back).
|
|
127
|
+
*/
|
|
128
|
+
export interface SpeechResumeContext {
|
|
129
|
+
speculativeFired: boolean;
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Voice checkpoint policy. Stateless w.r.t. checkpoints (the manager owns
|
|
133
|
+
* the registry) — only holds the manager + event sink. One instance per
|
|
134
|
+
* voice session is enough; the `turnId` argument scopes each operation.
|
|
135
|
+
*/
|
|
136
|
+
export declare class CheckpointPolicy {
|
|
137
|
+
private readonly manager;
|
|
138
|
+
private readonly events;
|
|
139
|
+
constructor(opts: CheckpointPolicyOptions);
|
|
140
|
+
/**
|
|
141
|
+
* VAD `speech-pause`. Save C1. Caller kicks the speculative drafter on
|
|
142
|
+
* its own — the policy doesn't care; it just guarantees the rollback
|
|
143
|
+
* target exists.
|
|
144
|
+
*/
|
|
145
|
+
onSpeechPause(turnId: string, slotId: number): Promise<void>;
|
|
146
|
+
/**
|
|
147
|
+
* VAD `speech-active` within the rollback window. Restore C1 ONLY if
|
|
148
|
+
* the caller actually kicked a speculative draft — otherwise the KV
|
|
149
|
+
* state hasn't been mutated and we'd be doing a needless REST round
|
|
150
|
+
* trip.
|
|
151
|
+
*/
|
|
152
|
+
onSpeechResume(turnId: string, slotId: number, ctx: SpeechResumeContext): Promise<void>;
|
|
153
|
+
/**
|
|
154
|
+
* VAD's hangover elapsed → real turn boundary. Speculative draft is
|
|
155
|
+
* being promoted, so C1 is no longer needed. Erase frees the registry
|
|
156
|
+
* slot (the server-side LRU handles its own eviction independently).
|
|
157
|
+
*/
|
|
158
|
+
onSpeechEndCommit(turnId: string, slotId: number): Promise<void>;
|
|
159
|
+
/**
|
|
160
|
+
* Hard-stop: caller-initiated cancellation. Prefer rolling back to C1
|
|
161
|
+
* (clean KV state for the next turn) when available, else cancel any
|
|
162
|
+
* in-flight decode on the slot. `sseDisconnect` is the existing voice-
|
|
163
|
+
* loop abort hook — required because the gated manager falls back to
|
|
164
|
+
* it when the REST endpoints aren't available.
|
|
165
|
+
*/
|
|
166
|
+
onHardStop(turnId: string, slotId: number, sseDisconnect: SseDisconnectFn): Promise<void>;
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Per-turn checkpoint name. Keeps the namespace stable so a hard-stop
|
|
170
|
+
* after a normal commit doesn't collide with the next turn's C1.
|
|
171
|
+
*
|
|
172
|
+
* The format is the only thing callers outside the policy ever see —
|
|
173
|
+
* `GatedCheckpointManager.getNamedHandle('pre-speculative-T123')` returns
|
|
174
|
+
* the same handle the policy used. Keep it stable; if the format changes
|
|
175
|
+
* in a later change, audit every consumer of `getNamedHandle`.
|
|
176
|
+
*/
|
|
177
|
+
export declare function checkpointNameFor(turnId: string): string;
|
|
178
|
+
//# sourceMappingURL=checkpoint-policy.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"checkpoint-policy.d.ts","sourceRoot":"","sources":["../../../src/services/voice/checkpoint-policy.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkGG;AAGH,OAAO,KAAK,EACX,gBAAgB,EAChB,sBAAsB,EACtB,eAAe,EACf,MAAM,uBAAuB,CAAC;AAE/B;;;GAGG;AACH,MAAM,WAAW,sBAAsB;IACtC,OAAO,CAAC,CACP,EAAE,EAAE,MAAM,GAAG,SAAS,GAAG,OAAO,GAAG,QAAQ,EAC3C,KAAK,EAAE,OAAO,EACd,MAAM,EAAE,MAAM,GACZ,IAAI,CAAC;IACR;;;;OAIG;IACH,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACzD,yCAAyC;IACzC,UAAU,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC5D,yEAAyE;IACzE,MAAM,CAAC,CACN,EAAE,EAAE,MAAM,GAAG,SAAS,GAAG,OAAO,GAAG,QAAQ,EAC3C,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,UAAU,GAAG,eAAe,GAAG,gBAAgB,GACrD,IAAI,CAAC;CACR;AAED,MAAM,WAAW,uBAAuB;IACvC,2DAA2D;IAC3D,OAAO,EAAE,sBAAsB,CAAC;IAChC,sDAAsD;IACtD,MAAM,CAAC,EAAE,sBAAsB,CAAC;CAChC;AAED;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IACnC,gBAAgB,EAAE,OAAO,CAAC;CAC1B;AAED;;;;GAIG;AACH,qBAAa,gBAAgB;IAC5B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAyB;IACjD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAyB;gBAEpC,IAAI,EAAE,uBAAuB;IAKzC;;;;OAIG;IACG,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA0BlE;;;;;OAKG;IACG,cAAc,CACnB,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,MAAM,EACd,GAAG,EAAE,mBAAmB,GACtB,OAAO,CAAC,IAAI,CAAC;IAqChB;;;;OAIG;IACG,iBAAiB,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAoBtE;;;;;;OAMG;IACG,UAAU,CACf,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,MAAM,EACd,aAAa,EAAE,eAAe,GAC5B,OAAO,CAAC,IAAI,CAAC;CAyChB;AAED;;;;;;;;GAQG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAOxD"}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice Workbench corpus augmentation — acoustic degradation DSP (#8785).
|
|
3
|
+
*
|
|
4
|
+
* Real rooms are not clean. The corpus generator produces dry speech; this
|
|
5
|
+
* module degrades it the way a microphone actually hears it: additive room
|
|
6
|
+
* noise at a target SNR, reverberation (near vs far), far-field attenuation,
|
|
7
|
+
* a low-quality/telephone line, and competing background talkers. Every
|
|
8
|
+
* function is PURE and DETERMINISTIC (seeded PRNG, no `Math.random`, no I/O), so
|
|
9
|
+
* the same scenario + seed always yields byte-identical audio — a labeled,
|
|
10
|
+
* reproducible corpus the real ASR/diarization/EOT models can be benchmarked
|
|
11
|
+
* against, and the DSP itself is unit-testable in CI with no models.
|
|
12
|
+
*
|
|
13
|
+
* Layering: this module knows nothing about scenarios. It operates on mono
|
|
14
|
+
* `Float32Array` PCM at a given sample rate. `corpus-generator.ts` translates a
|
|
15
|
+
* scenario's declarative {@link AugmentationSpec} into these calls.
|
|
16
|
+
*/
|
|
17
|
+
/** Root-mean-square amplitude over [start, end). */
|
|
18
|
+
export declare function measureRms(pcm: Float32Array, start?: number, end?: number): number;
|
|
19
|
+
/** Peak absolute amplitude over [start, end). */
|
|
20
|
+
export declare function measurePeak(pcm: Float32Array, start?: number, end?: number): number;
|
|
21
|
+
/** Linear amplitude ratio for a dB gain (+6 dB ≈ ×2, −6 dB ≈ ×0.5). */
|
|
22
|
+
export declare function dbToGain(db: number): number;
|
|
23
|
+
/** Estimated SNR (dB) of a signal region against a noise-only region. */
|
|
24
|
+
export declare function estimateSnrDb(signalRms: number, noiseRms: number): number;
|
|
25
|
+
export type NoiseKind = "white" | "pink" | "music";
|
|
26
|
+
/**
|
|
27
|
+
* Add background noise at a target SNR (dB) relative to the signal's voiced RMS.
|
|
28
|
+
* Lower `snrDb` = noisier. `pink` is a one-pole-filtered approximation of 1/f
|
|
29
|
+
* room rumble; `white` is flat; `music` is a seeded harmonic chord (a few
|
|
30
|
+
* detuned partials under a slow tremolo) — tonal and sustained, the kind of
|
|
31
|
+
* steady background that fools an energy-only VAD where flat hiss would not.
|
|
32
|
+
* The noise floor is added across the WHOLE stream (including silent gaps) so
|
|
33
|
+
* silence is no longer pristine — exactly the condition that makes a real
|
|
34
|
+
* VAD/EOT classifier work for its living.
|
|
35
|
+
*/
|
|
36
|
+
export declare function addNoise(pcm: Float32Array, opts: {
|
|
37
|
+
snrDb: number;
|
|
38
|
+
kind?: NoiseKind;
|
|
39
|
+
seed?: number;
|
|
40
|
+
}): Float32Array;
|
|
41
|
+
/**
|
|
42
|
+
* Simulate a low-quality / telephone line: band-limit to ~300–3400 Hz then
|
|
43
|
+
* companded 8-bit quantization. Cheap mics and phone codecs strip the highs and
|
|
44
|
+
* add quantization grunge, which is the dominant real-world ASR stressor for
|
|
45
|
+
* "voices near and far" / low-quality input.
|
|
46
|
+
*/
|
|
47
|
+
export declare function applyLowQualityLine(pcm: Float32Array, sampleRate: number): Float32Array;
|
|
48
|
+
/**
|
|
49
|
+
* Freeverb-style Schroeder reverb (4 parallel combs → 2 series allpasses),
|
|
50
|
+
* mixed with the dry signal. `room` (0..1) sets reflection density/decay;
|
|
51
|
+
* `wet` (0..1) the reflected level. Reverb spreads energy in time — speech
|
|
52
|
+
* keeps ringing after the talker stops — which is what makes a far/reverberant
|
|
53
|
+
* voice hard to endpoint and to diarize. The output is `dry.length + tail`
|
|
54
|
+
* samples so the decay is preserved (callers may keep or trim the tail).
|
|
55
|
+
*/
|
|
56
|
+
export declare function applyReverb(pcm: Float32Array, sampleRate: number, opts?: {
|
|
57
|
+
room?: number;
|
|
58
|
+
wet?: number;
|
|
59
|
+
tailSec?: number;
|
|
60
|
+
}): Float32Array;
|
|
61
|
+
/** Multiply the whole stream by a dB gain (far-field attenuation = negative). */
|
|
62
|
+
export declare function applyGainDb(pcm: Float32Array, db: number): Float32Array;
|
|
63
|
+
/**
|
|
64
|
+
* Mix an overlay stream (a competing talker, babble, or the agent's own TTS for
|
|
65
|
+
* an echo test) into a base stream at a given level, starting at `offsetSamples`
|
|
66
|
+
* and optionally looping the overlay to cover the base. The base length is
|
|
67
|
+
* preserved. Returns a new array; neither input is mutated.
|
|
68
|
+
*/
|
|
69
|
+
export declare function mixInto(base: Float32Array, overlay: Float32Array, opts?: {
|
|
70
|
+
gainDb?: number;
|
|
71
|
+
offsetSamples?: number;
|
|
72
|
+
loop?: boolean;
|
|
73
|
+
}): Float32Array;
|
|
74
|
+
/** Declarative degradation for one stream (the scenario's `environment`). */
|
|
75
|
+
export interface AugmentationSpec {
|
|
76
|
+
/** Additive room-noise SNR (dB) relative to voiced speech. Lower = noisier. */
|
|
77
|
+
noiseSnrDb?: number;
|
|
78
|
+
/** Noise character (default white). */
|
|
79
|
+
noiseKind?: NoiseKind;
|
|
80
|
+
/** Reverb room size 0..1 (near→far, small→large room). */
|
|
81
|
+
reverb?: number;
|
|
82
|
+
/** Reverb wet level 0..1 (defaults from `reverb` when omitted). */
|
|
83
|
+
reverbWet?: number;
|
|
84
|
+
/** Far-field attenuation in dB (how many dB QUIETER; positive number). */
|
|
85
|
+
farFieldDb?: number;
|
|
86
|
+
/** Band-limit + 8-bit companding (telephone / cheap-mic line). */
|
|
87
|
+
lowQuality?: boolean;
|
|
88
|
+
/** Competing background talkers, mixed this many dB BELOW the speech. */
|
|
89
|
+
backgroundTalkersDb?: number;
|
|
90
|
+
/** Deterministic seed for noise/babble. */
|
|
91
|
+
seed?: number;
|
|
92
|
+
}
|
|
93
|
+
/** True when the spec asks for any degradation at all. */
|
|
94
|
+
export declare function specIsClean(spec: AugmentationSpec | undefined): boolean;
|
|
95
|
+
export interface AugmentPcmOptions {
|
|
96
|
+
/**
|
|
97
|
+
* A babble source (a competing-talker stream) for `backgroundTalkersDb`.
|
|
98
|
+
* The generator supplies one synthesized from other voices; omitted = no
|
|
99
|
+
* background talkers even if the spec asks (the runner logs the gap).
|
|
100
|
+
*/
|
|
101
|
+
babble?: Float32Array;
|
|
102
|
+
/** Trim reverb tail back to the input length (keeps corpus timing exact). */
|
|
103
|
+
trimReverbTail?: boolean;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Apply a full degradation chain to one stream, in acoustically sensible order:
|
|
107
|
+
* background talkers → reverb (room reflections) → far-field gain → low-quality
|
|
108
|
+
* line → additive noise floor. Pure; returns a new array.
|
|
109
|
+
*/
|
|
110
|
+
export declare function augmentPcm(pcm: Float32Array, sampleRate: number, spec: AugmentationSpec, options?: AugmentPcmOptions): Float32Array;
|
|
111
|
+
//# sourceMappingURL=corpus-augment.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"corpus-augment.d.ts","sourceRoot":"","sources":["../../../src/services/voice/corpus-augment.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAuBH,oDAAoD;AACpD,wBAAgB,UAAU,CACzB,GAAG,EAAE,YAAY,EACjB,KAAK,SAAI,EACT,GAAG,SAAa,GACd,MAAM,CAOR;AAED,iDAAiD;AACjD,wBAAgB,WAAW,CAC1B,GAAG,EAAE,YAAY,EACjB,KAAK,SAAI,EACT,GAAG,SAAa,GACd,MAAM,CASR;AAED,uEAAuE;AACvE,wBAAgB,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,CAE3C;AAED,yEAAyE;AACzE,wBAAgB,aAAa,CAAC,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAIzE;AAED,MAAM,MAAM,SAAS,GAAG,OAAO,GAAG,MAAM,GAAG,OAAO,CAAC;AAEnD;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CACvB,GAAG,EAAE,YAAY,EACjB,IAAI,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,SAAS,CAAC;IAAC,IAAI,CAAC,EAAE,MAAM,CAAA;CAAE,GACtD,YAAY,CAoDd;AAkFD;;;;;GAKG;AACH,wBAAgB,mBAAmB,CAClC,GAAG,EAAE,YAAY,EACjB,UAAU,EAAE,MAAM,GAChB,YAAY,CAKd;AAED;;;;;;;GAOG;AACH,wBAAgB,WAAW,CAC1B,GAAG,EAAE,YAAY,EACjB,UAAU,EAAE,MAAM,EAClB,IAAI,GAAE;IAAE,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAO,GAC1D,YAAY,CA+Cd;AAED,iFAAiF;AACjF,wBAAgB,WAAW,CAAC,GAAG,EAAE,YAAY,EAAE,EAAE,EAAE,MAAM,GAAG,YAAY,CAKvE;AAED;;;;;GAKG;AACH,wBAAgB,OAAO,CACtB,IAAI,EAAE,YAAY,EAClB,OAAO,EAAE,YAAY,EACrB,IAAI,GAAE;IAAE,MAAM,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,CAAC,EAAE,MAAM,CAAC;IAAC,IAAI,CAAC,EAAE,OAAO,CAAA;CAAO,GACpE,YAAY,CAad;AAED,6EAA6E;AAC7E,MAAM,WAAW,gBAAgB;IAChC,+EAA+E;IAC/E,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,uCAAuC;IACvC,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,0DAA0D;IAC1D,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mEAAmE;IACnE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0EAA0E;IAC1E,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,kEAAkE;IAClE,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,yEAAyE;IACzE,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,2CAA2C;IAC3C,IAAI,CAAC,EAAE,MAAM,CAAC;CACd;AAED,0DAA0D;AAC1D,wBAAgB,WAAW,CAAC,IAAI,EAAE,gBAAgB,GAAG,SAAS,GAAG,OAAO,CASvE;AAED,MAAM,WAAW,iBAAiB;IACjC;;;;OAIG;IACH,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,6EAA6E;IAC7E,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB;AAED;;;;GAIG;AACH,wBAAgB,UAAU,CACzB,GAAG,EAAE,YAAY,EACjB,UAAU,EAAE,MAAM,EAClB,IAAI,EAAE,gBAAgB,EACtB,OAAO,GAAE,iBAAsB,GAC7B,YAAY,CA0Dd"}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice Workbench corpus generator (#8785).
|
|
3
|
+
*
|
|
4
|
+
* Turns a declarative {@link VoiceScenario} into one labeled audio stream + a
|
|
5
|
+
* ground-truth JSON the headless runner scores against. Two synthesis paths,
|
|
6
|
+
* one shape:
|
|
7
|
+
*
|
|
8
|
+
* - Synthetic (default, deterministic, NO native model): formant-resonator
|
|
9
|
+
* speech-like PCM (`__test-helpers__/synthetic-speech.ts`) the Silero VAD
|
|
10
|
+
* reads as speech. Reproducible in CI with no artifacts — it exercises the
|
|
11
|
+
* pipeline plumbing + the scorers/labels, not diarization/WER *accuracy*.
|
|
12
|
+
* - Real TTS (gated): an injected {@link CorpusTtsSynthesizer} (Kokoro /
|
|
13
|
+
* OmniVoice via the TTS route) produces natural speech. Real diarization
|
|
14
|
+
* DER and transcription WER benchmarking need this path.
|
|
15
|
+
*
|
|
16
|
+
* `generateVoiceCorpus` is pure (no I/O) so it is unit-testable without disk;
|
|
17
|
+
* `writeVoiceCorpus` / `readVoiceCorpus` handle the versioned on-disk corpus.
|
|
18
|
+
* A turn's labels (speaker, transcript, respond decision, entity) come straight
|
|
19
|
+
* from the scenario, so the ground truth is reproducible regardless of path.
|
|
20
|
+
*/
|
|
21
|
+
import { type VoiceEnvironment, type VoiceScenario } from "./voice-scenario";
|
|
22
|
+
/** Per-turn ground-truth label with sample-accurate timing. */
|
|
23
|
+
export interface CorpusTurnLabel {
|
|
24
|
+
index: number;
|
|
25
|
+
/** Diarization ground-truth label (the participant who spoke). */
|
|
26
|
+
speaker: string;
|
|
27
|
+
/** Resolved elizaOS entity id for the speaker, when the scenario binds one. */
|
|
28
|
+
entityId?: string;
|
|
29
|
+
/** First sample of voiced speech in this turn (after any lead silence). */
|
|
30
|
+
speechStartSample: number;
|
|
31
|
+
/** Sample just past the voiced speech (before trailing pauses). */
|
|
32
|
+
speechEndSample: number;
|
|
33
|
+
/** First sample of this turn's whole segment in the stream. */
|
|
34
|
+
segmentStartSample: number;
|
|
35
|
+
/** Sample just past this turn's whole segment (incl. trailing pauses). */
|
|
36
|
+
segmentEndSample: number;
|
|
37
|
+
/** Reference transcript for WER scoring. */
|
|
38
|
+
referenceTranscript: string;
|
|
39
|
+
/** Ground truth: should the agent respond to this turn? */
|
|
40
|
+
expectRespond: boolean;
|
|
41
|
+
/** Ground truth: is this segment a real end-of-turn boundary? */
|
|
42
|
+
expectEndOfTurn?: boolean;
|
|
43
|
+
/** Expected inferred/recognized entity, when the scenario asserts one. */
|
|
44
|
+
expectedEntity?: string;
|
|
45
|
+
/** TTS voice id used for this turn (real-TTS path), when set. */
|
|
46
|
+
ttsVoiceId?: string;
|
|
47
|
+
/** True when this turn was formant-synthesized rather than real TTS. */
|
|
48
|
+
synthetic: boolean;
|
|
49
|
+
/** Acoustic degradation applied to this turn's audio (when any). */
|
|
50
|
+
environment?: VoiceEnvironment;
|
|
51
|
+
/** True when this "turn" is the agent's own TTS echoed back (not a user turn). */
|
|
52
|
+
isAgentEcho?: boolean;
|
|
53
|
+
/** Ground truth: the speaker is the device owner / primary enrolled voice. */
|
|
54
|
+
isOwner?: boolean;
|
|
55
|
+
/** The agent's spoken reply to this turn (drives the echo gate downstream). */
|
|
56
|
+
agentReplyText?: string;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* On-disk corpus ground-truth schema version. Bump when the labeled-corpus
|
|
60
|
+
* shape changes incompatibly; `readVoiceCorpusGroundTruth` treats a corpus
|
|
61
|
+
* written by a different version as absent (→ `skipped`, never a stale `pass`).
|
|
62
|
+
*/
|
|
63
|
+
export declare const CORPUS_SCHEMA_VERSION = 1;
|
|
64
|
+
export interface CorpusGroundTruth {
|
|
65
|
+
/** Labeled-corpus schema version (see {@link CORPUS_SCHEMA_VERSION}). */
|
|
66
|
+
schemaVersion: number;
|
|
67
|
+
scenarioId: string;
|
|
68
|
+
classes: VoiceScenario["classes"];
|
|
69
|
+
sampleRate: number;
|
|
70
|
+
totalSamples: number;
|
|
71
|
+
durationSec: number;
|
|
72
|
+
participants: Array<{
|
|
73
|
+
label: string;
|
|
74
|
+
entityId?: string;
|
|
75
|
+
isOwner?: boolean;
|
|
76
|
+
ttsVoiceId?: string;
|
|
77
|
+
}>;
|
|
78
|
+
agents?: string[];
|
|
79
|
+
/** Entity ids the agent answers without a wake word (owner + enrolled). */
|
|
80
|
+
knownSpeakerEntityIds?: string[];
|
|
81
|
+
turns: CorpusTurnLabel[];
|
|
82
|
+
/** True when EVERY turn was synthetic (no real TTS used anywhere). */
|
|
83
|
+
synthetic: boolean;
|
|
84
|
+
}
|
|
85
|
+
export interface GeneratedVoiceCorpus {
|
|
86
|
+
pcm: Float32Array;
|
|
87
|
+
sampleRate: number;
|
|
88
|
+
groundTruth: CorpusGroundTruth;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Real-TTS synthesizer. Gated: when omitted, the generator uses deterministic
|
|
92
|
+
* synthetic speech. An implementation wraps the TTS route / Kokoro engine and
|
|
93
|
+
* returns mono PCM at the requested sample rate.
|
|
94
|
+
*/
|
|
95
|
+
export interface CorpusTtsSynthesizer {
|
|
96
|
+
synthesize(args: {
|
|
97
|
+
text: string;
|
|
98
|
+
voiceId?: string;
|
|
99
|
+
speakerLabel: string;
|
|
100
|
+
turnIndex: number;
|
|
101
|
+
isAgentEcho: boolean;
|
|
102
|
+
sampleRate: number;
|
|
103
|
+
}): Promise<Float32Array>;
|
|
104
|
+
}
|
|
105
|
+
export interface GenerateVoiceCorpusOptions {
|
|
106
|
+
sampleRate?: number;
|
|
107
|
+
/** Inject a real-TTS synthesizer to produce natural speech (else synthetic). */
|
|
108
|
+
synthesizer?: CorpusTtsSynthesizer;
|
|
109
|
+
/** Silence (s) spliced after a turn that declares no explicit pauses. */
|
|
110
|
+
interTurnSilenceSec?: number;
|
|
111
|
+
/** Synthetic-speech sizing: characters of text per second of audio. */
|
|
112
|
+
charsPerSecond?: number;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Generate one labeled audio stream for a scenario. Throws on an invalid
|
|
116
|
+
* scenario (fail loud — a malformed corpus must not silently produce garbage).
|
|
117
|
+
* A turn that carries only `audioRef` (no `text`) is not synthesizable here and
|
|
118
|
+
* is rejected; pre-rendered audio is supplied through the corpus on disk.
|
|
119
|
+
*/
|
|
120
|
+
export declare function generateVoiceCorpus(scenario: VoiceScenario, options?: GenerateVoiceCorpusOptions): Promise<GeneratedVoiceCorpus>;
|
|
121
|
+
export interface VoiceCorpusPaths {
|
|
122
|
+
dir: string;
|
|
123
|
+
audioPath: string;
|
|
124
|
+
groundTruthPath: string;
|
|
125
|
+
}
|
|
126
|
+
/** Persist a generated corpus as `audio.wav` + `ground-truth.json` under `dir`. */
|
|
127
|
+
export declare function writeVoiceCorpus(corpus: GeneratedVoiceCorpus, dir: string): VoiceCorpusPaths;
|
|
128
|
+
/**
|
|
129
|
+
* Read a previously-written corpus's ground truth. Returns null when the corpus
|
|
130
|
+
* directory or its ground-truth file is absent (the honesty contract — the
|
|
131
|
+
* runner reports `skipped`, never `pass`, when corpus artifacts are missing).
|
|
132
|
+
*/
|
|
133
|
+
export declare function readVoiceCorpusGroundTruth(dir: string): CorpusGroundTruth | null;
|
|
134
|
+
//# sourceMappingURL=corpus-generator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"corpus-generator.d.ts","sourceRoot":"","sources":["../../../src/services/voice/corpus-generator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAeH,OAAO,EAIN,KAAK,gBAAgB,EACrB,KAAK,aAAa,EAElB,MAAM,kBAAkB,CAAC;AAa1B,+DAA+D;AAC/D,MAAM,WAAW,eAAe;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,kEAAkE;IAClE,OAAO,EAAE,MAAM,CAAC;IAChB,+EAA+E;IAC/E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,2EAA2E;IAC3E,iBAAiB,EAAE,MAAM,CAAC;IAC1B,mEAAmE;IACnE,eAAe,EAAE,MAAM,CAAC;IACxB,+DAA+D;IAC/D,kBAAkB,EAAE,MAAM,CAAC;IAC3B,0EAA0E;IAC1E,gBAAgB,EAAE,MAAM,CAAC;IACzB,4CAA4C;IAC5C,mBAAmB,EAAE,MAAM,CAAC;IAC5B,2DAA2D;IAC3D,aAAa,EAAE,OAAO,CAAC;IACvB,iEAAiE;IACjE,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,0EAA0E;IAC1E,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,iEAAiE;IACjE,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,wEAAwE;IACxE,SAAS,EAAE,OAAO,CAAC;IACnB,oEAAoE;IACpE,WAAW,CAAC,EAAE,gBAAgB,CAAC;IAC/B,kFAAkF;IAClF,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,8EAA8E;IAC9E,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,+EAA+E;IAC/E,cAAc,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;;;GAIG;AACH,eAAO,MAAM,qBAAqB,IAAI,CAAC;AAEvC,MAAM,WAAW,iBAAiB;IACjC,yEAAyE;IACzE,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,aAAa,CAAC,SAAS,CAAC,CAAC;IAClC,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,KAAK,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,OAAO,CAAC;QAClB,UAAU,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC,CAAC;IACH,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,2EAA2E;IAC3E,qBAAqB,CAAC,EAAE,MAAM,EAAE,CAAC;IACjC,KAAK,EAAE,eAAe,EAAE,CAAC;IACzB,sEAAsE;IACtE,SAAS,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,oBAAoB;IACpC,GAAG,EAAE,YAAY,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,iBAAiB,CAAC;CAC/B;AAED;;;;GAIG;AACH,MAAM,WAAW,oBAAoB;IACpC,UAAU,CAAC,IAAI,EAAE;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,YAAY,EAAE,MAAM,CAAC;QACrB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,OAAO,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;KACnB,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;CAC1B;AAED,MAAM,WAAW,0BAA0B;IAC1C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gFAAgF;IAChF,WAAW,CAAC,EAAE,oBAAoB,CAAC;IACnC,yEAAyE;IACzE,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,uEAAuE;IACvE,cAAc,CAAC,EAAE,MAAM,CAAC;CACxB;AAoCD;;;;;GAKG;AACH,wBAAsB,mBAAmB,CACxC,QAAQ,EAAE,aAAa,EACvB,OAAO,GAAE,0BAA+B,GACtC,OAAO,CAAC,oBAAoB,CAAC,CA6K/B;AAED,MAAM,WAAW,gBAAgB;IAChC,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;CACxB;AAED,mFAAmF;AACnF,wBAAgB,gBAAgB,CAC/B,MAAM,EAAE,oBAAoB,EAC5B,GAAG,EAAE,MAAM,GACT,gBAAgB,CAUlB;AAED;;;;GAIG;AACH,wBAAgB,0BAA0B,CACzC,GAAG,EAAE,MAAM,GACT,iBAAiB,GAAG,IAAI,CAa1B"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
export interface DiarizationSegment {
|
|
2
|
+
/** Speaker label (any stable string — ground-truth and hypothesis label
|
|
3
|
+
* spaces are mapped against each other, so the raw strings need not match). */
|
|
4
|
+
speaker: string;
|
|
5
|
+
/** Segment start, milliseconds. */
|
|
6
|
+
startMs: number;
|
|
7
|
+
/** Segment end, milliseconds (must be ≥ startMs). */
|
|
8
|
+
endMs: number;
|
|
9
|
+
}
|
|
10
|
+
export interface DerResult {
|
|
11
|
+
/** Diarization Error Rate: (missed + falseAlarm + confusion) / referenceMs.
|
|
12
|
+
* 0 = perfect; can exceed 1 when false alarms dominate. */
|
|
13
|
+
der: number;
|
|
14
|
+
/** Reference speech the system failed to attribute to anyone (ms). */
|
|
15
|
+
missedMs: number;
|
|
16
|
+
/** System speech with no reference speaker present (ms). */
|
|
17
|
+
falseAlarmMs: number;
|
|
18
|
+
/** Reference speech attributed to the wrong (mapped) speaker (ms). */
|
|
19
|
+
confusionMs: number;
|
|
20
|
+
/** Total reference speaker-time (Σ |ref speakers in frame| · frame), the DER denominator. */
|
|
21
|
+
totalReferenceMs: number;
|
|
22
|
+
/** The chosen hypothesis→reference speaker mapping (optimal for matched time). */
|
|
23
|
+
mapping: Record<string, string>;
|
|
24
|
+
}
|
|
25
|
+
export interface DerOptions {
|
|
26
|
+
/** Frame size in ms (default 10). Smaller = more precise, more work. */
|
|
27
|
+
frameMs?: number;
|
|
28
|
+
/** Above this combined speaker count, fall back to a greedy mapping instead
|
|
29
|
+
* of the exact permutation search (keeps it O(n) not O(n!)). Default 7. */
|
|
30
|
+
maxExactSpeakers?: number;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Frame-based Diarization Error Rate between a reference (ground-truth) speaker
|
|
34
|
+
* timeline and a hypothesis (diarizer output) timeline. Returns DER plus its
|
|
35
|
+
* missed / false-alarm / confusion decomposition and the speaker mapping used.
|
|
36
|
+
*/
|
|
37
|
+
export declare function computeDiarizationErrorRate(reference: readonly DiarizationSegment[], hypothesis: readonly DiarizationSegment[], options?: DerOptions): DerResult;
|
|
38
|
+
/** Whether a diarization hypothesis is within a scenario's `maxDer` budget. */
|
|
39
|
+
export declare function diarizationWithinBudget(result: Pick<DerResult, "der">, maxDer: number): boolean;
|
|
40
|
+
//# sourceMappingURL=diarization-error-rate.d.ts.map
|