@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/dist/actions/generate-media.d.ts +59 -0
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts +23 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts +29 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts +8 -37
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +38979 -430
- package/dist/index.js.map +217 -0
- package/dist/local-inference-routes.d.ts +47 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts +21 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/routes/compat-helpers.d.ts +18 -0
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts +62 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/dist/routes/index.d.ts +20 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/dist/routes/live-diarization-route.d.ts +33 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts +4 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts +16 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts +7 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts +15 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/dist/routes/transcripts-routes.d.ts +44 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts +62 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts +62 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts +77 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts +16 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/dist/runtime/index.d.ts +15 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/dist/runtime/voice-entity-binding.d.ts +113 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/dist/services/active-model.d.ts +310 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/dist/services/assignments.d.ts +84 -0
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/dist/services/backend.d.ts +440 -0
- package/dist/services/backend.d.ts.map +1 -0
- package/dist/services/bionic-host-loader.d.ts +67 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts +34 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts +206 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts +109 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts +102 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts +142 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts +188 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts +149 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/dist/services/device-tier.d.ts +133 -0
- package/dist/services/device-tier.d.ts.map +1 -0
- package/dist/services/downloader.d.ts +94 -0
- package/dist/services/downloader.d.ts.map +1 -0
- package/dist/services/engine.d.ts +579 -0
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts +17 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/dist/services/ffi-streaming-backend.d.ts +201 -0
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/dist/services/ffi-streaming-runner.d.ts +146 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts +56 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts +72 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts +63 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts +14 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts +118 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts +16 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts +58 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts +74 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts +181 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts +181 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/dist/services/index.d.ts +31 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts +132 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts +59 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts +189 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts +346 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts +96 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts +82 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/dist/services/manifest/schema.d.ts +903 -0
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/dist/services/manifest/types.d.ts +32 -0
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts +66 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/dist/services/memory-arbiter.d.ts +348 -0
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/dist/services/memory-monitor.d.ts +128 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts +130 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts +13 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts +127 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts +6 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts +124 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts +38 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts +110 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts +9 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts +111 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/dist/services/registry.d.ts +33 -0
- package/dist/services/registry.d.ts.map +1 -0
- package/dist/services/router-handler.d.ts +92 -0
- package/dist/services/router-handler.d.ts.map +1 -0
- package/dist/services/routing-policy.d.ts +92 -0
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts +8 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts +98 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/dist/services/service.d.ts +128 -0
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts +72 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts +311 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts +33 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/dist/services/types.d.ts +19 -0
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts +34 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts +8 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts +115 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts +99 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts +47 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts +71 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/dist/services/vision/index.d.ts +95 -0
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts +73 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/dist/services/vision/types.d.ts +162 -0
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts +18 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/dist/services/vision-embedding-cache.d.ts +98 -0
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts +112 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts +199 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts +170 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/dist/services/voice/embedding.d.ts +132 -0
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts +68 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/dist/services/voice/engine-bridge.d.ts +762 -0
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier.d.ts +211 -0
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/dist/services/voice/errors.d.ts +20 -0
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/dist/services/voice/expressive-tags.d.ts +158 -0
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/dist/services/voice/ffi-bindings.d.ts +696 -0
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts +181 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/index.d.ts +96 -0
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts +82 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts +30 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts +135 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/dist/services/voice/mic-source.d.ts +136 -0
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts +109 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/dist/services/voice/partial-stabilizer.d.ts +73 -0
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts +76 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts +62 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts +151 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts +216 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts +123 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts +248 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts +40 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts +24 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts +146 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/dist/services/voice/shared-resources.d.ts +204 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts +75 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts +37 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts +83 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts +73 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/dist/services/voice/transcriber.d.ts +244 -0
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts +37 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/dist/services/voice/transcript-service.d.ts +60 -0
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/dist/services/voice/transcript-store.d.ts +64 -0
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts +183 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/dist/services/voice/types.d.ts +643 -0
- package/dist/services/voice/types.d.ts.map +1 -0
- package/dist/services/voice/vad.d.ts +283 -0
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts +241 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/dist/services/voice/voice-preset-format.d.ts +158 -0
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts +83 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts +364 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/dist/services/voice/wake-word-ggml.d.ts +100 -0
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts +255 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts +240 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts +3 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +101 -15
- package/registry-entry.json +137 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +831 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.test.ts +390 -0
- package/src/local-inference-routes.ts +1625 -0
- package/src/provider.ts +1111 -0
- package/src/routes/compat-helpers.ts +275 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.ts +61 -0
- package/src/routes/live-diarization-route.test.ts +347 -0
- package/src/routes/live-diarization-route.ts +198 -0
- package/src/routes/local-inference-asr-route.test.ts +246 -0
- package/src/routes/local-inference-asr-route.ts +166 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +775 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.test.ts +195 -0
- package/src/routes/transcripts-routes.ts +191 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
- package/src/runtime/ensure-local-inference-handler.ts +1640 -0
- package/src/runtime/index.ts +36 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
- package/src/runtime/mobile-local-inference-gate.ts +99 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
- package/src/runtime/voice-entity-binding.ts +368 -0
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.ts +1416 -0
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +106 -0
- package/src/services/assignments.ts +278 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +791 -0
- package/src/services/bionic-host-loader.test.ts +226 -0
- package/src/services/bionic-host-loader.ts +252 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.test.ts +259 -0
- package/src/services/catalog.ts +33 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.test.ts +458 -0
- package/src/services/device-tier.ts +502 -0
- package/src/services/downloader.test.ts +888 -0
- package/src/services/downloader.ts +1039 -0
- package/src/services/engine-direct-bundle.test.ts +90 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.ts +2096 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +445 -0
- package/src/services/ffi-streaming-backend.ts +418 -0
- package/src/services/ffi-streaming-runner.test.ts +220 -0
- package/src/services/ffi-streaming-runner.ts +407 -0
- package/src/services/ffi-unload-ordering.test.ts +166 -0
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.test.ts +236 -0
- package/src/services/hardware.ts +438 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.ts +715 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.ts +229 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +357 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
- package/src/services/manifest/index.ts +72 -0
- package/src/services/manifest/manifest.test.ts +791 -0
- package/src/services/manifest/schema.ts +761 -0
- package/src/services/manifest/types.ts +61 -0
- package/src/services/manifest/validator.ts +633 -0
- package/src/services/memory-arbiter.test.ts +558 -0
- package/src/services/memory-arbiter.ts +991 -0
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +232 -0
- package/src/services/memory-monitor.ts +309 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.ts +86 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +164 -0
- package/src/services/ram-budget.ts +309 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.ts +157 -0
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +426 -0
- package/src/services/routing-policy.test.ts +352 -0
- package/src/services/routing-policy.ts +367 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +750 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.ts +59 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.ts +163 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +133 -0
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +139 -0
- package/src/services/voice/audio-frame-consumer.test.ts +669 -0
- package/src/services/voice/audio-frame-consumer.ts +651 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +335 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +902 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +242 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2343 -0
- package/src/services/voice/eot-classifier-ggml.ts +569 -0
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +422 -0
- package/src/services/voice/errors.ts +34 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.test.ts +735 -0
- package/src/services/voice/ffi-bindings.ts +3387 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.ts +139 -0
- package/src/services/voice/index.ts +502 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.ts +64 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +622 -0
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.ts +504 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +343 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.test.ts +195 -0
- package/src/services/voice/transcript-service.ts +205 -0
- package/src/services/voice/transcript-store.test.ts +189 -0
- package/src/services/voice/transcript-store.ts +164 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.test.ts +498 -0
- package/src/services/voice/vad.ts +832 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.test.ts +415 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +713 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +280 -0
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +367 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.ts +319 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/src/voice-workbench.ts +71 -0
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice Workbench corpus generator (#8785).
|
|
3
|
+
*
|
|
4
|
+
* Turns a declarative {@link VoiceScenario} into one labeled audio stream + a
|
|
5
|
+
* ground-truth JSON the headless runner scores against. Two synthesis paths,
|
|
6
|
+
* one shape:
|
|
7
|
+
*
|
|
8
|
+
* - Synthetic (default, deterministic, NO native model): formant-resonator
|
|
9
|
+
* speech-like PCM (`__test-helpers__/synthetic-speech.ts`) the Silero VAD
|
|
10
|
+
* reads as speech. Reproducible in CI with no artifacts — it exercises the
|
|
11
|
+
* pipeline plumbing + the scorers/labels, not diarization/WER *accuracy*.
|
|
12
|
+
* - Real TTS (gated): an injected {@link CorpusTtsSynthesizer} (Kokoro /
|
|
13
|
+
* OmniVoice via the TTS route) produces natural speech. Real diarization
|
|
14
|
+
* DER and transcription WER benchmarking need this path.
|
|
15
|
+
*
|
|
16
|
+
* `generateVoiceCorpus` is pure (no I/O) so it is unit-testable without disk;
|
|
17
|
+
* `writeVoiceCorpus` / `readVoiceCorpus` handle the versioned on-disk corpus.
|
|
18
|
+
* A turn's labels (speaker, transcript, respond decision, entity) come straight
|
|
19
|
+
* from the scenario, so the ground truth is reproducible regardless of path.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
23
|
+
import path from "node:path";
|
|
24
|
+
import {
|
|
25
|
+
AGENT_VOICE_TIMBRE,
|
|
26
|
+
makeSpeechWithSilenceFixture,
|
|
27
|
+
type SpeakerTimbre,
|
|
28
|
+
speakerTimbreForIndex,
|
|
29
|
+
} from "./__test-helpers__/synthetic-speech";
|
|
30
|
+
import {
|
|
31
|
+
type AugmentationSpec,
|
|
32
|
+
augmentPcm,
|
|
33
|
+
specIsClean,
|
|
34
|
+
} from "./corpus-augment";
|
|
35
|
+
import {
|
|
36
|
+
resolveTurnEnvironment,
|
|
37
|
+
turnReferenceTranscript,
|
|
38
|
+
turnSpeakerLabel,
|
|
39
|
+
type VoiceEnvironment,
|
|
40
|
+
type VoiceScenario,
|
|
41
|
+
validateVoiceScenario,
|
|
42
|
+
} from "./voice-scenario";
|
|
43
|
+
import { encodeMonoPcm16Wav } from "./wav-codec";
|
|
44
|
+
|
|
45
|
+
const DEFAULT_SAMPLE_RATE = 16_000;
|
|
46
|
+
/** Natural speaking rate used to size synthetic speech from text length. */
|
|
47
|
+
const DEFAULT_CHARS_PER_SECOND = 13;
|
|
48
|
+
const DEFAULT_INTER_TURN_SILENCE_SEC = 0.4;
|
|
49
|
+
const MIN_SPEECH_SEC = 0.4;
|
|
50
|
+
/** Cap a single synthetic turn (long-form monologue still lands well under). */
|
|
51
|
+
const MAX_SPEECH_SEC = 45;
|
|
52
|
+
const SYNTHETIC_LEAD_SILENCE_SEC = 0.15;
|
|
53
|
+
const SYNTHETIC_TAIL_SILENCE_SEC = 0.15;
|
|
54
|
+
|
|
55
|
+
/** Per-turn ground-truth label with sample-accurate timing. */
|
|
56
|
+
export interface CorpusTurnLabel {
|
|
57
|
+
index: number;
|
|
58
|
+
/** Diarization ground-truth label (the participant who spoke). */
|
|
59
|
+
speaker: string;
|
|
60
|
+
/** Resolved elizaOS entity id for the speaker, when the scenario binds one. */
|
|
61
|
+
entityId?: string;
|
|
62
|
+
/** First sample of voiced speech in this turn (after any lead silence). */
|
|
63
|
+
speechStartSample: number;
|
|
64
|
+
/** Sample just past the voiced speech (before trailing pauses). */
|
|
65
|
+
speechEndSample: number;
|
|
66
|
+
/** First sample of this turn's whole segment in the stream. */
|
|
67
|
+
segmentStartSample: number;
|
|
68
|
+
/** Sample just past this turn's whole segment (incl. trailing pauses). */
|
|
69
|
+
segmentEndSample: number;
|
|
70
|
+
/** Reference transcript for WER scoring. */
|
|
71
|
+
referenceTranscript: string;
|
|
72
|
+
/** Ground truth: should the agent respond to this turn? */
|
|
73
|
+
expectRespond: boolean;
|
|
74
|
+
/** Ground truth: is this segment a real end-of-turn boundary? */
|
|
75
|
+
expectEndOfTurn?: boolean;
|
|
76
|
+
/** Expected inferred/recognized entity, when the scenario asserts one. */
|
|
77
|
+
expectedEntity?: string;
|
|
78
|
+
/** TTS voice id used for this turn (real-TTS path), when set. */
|
|
79
|
+
ttsVoiceId?: string;
|
|
80
|
+
/** True when this turn was formant-synthesized rather than real TTS. */
|
|
81
|
+
synthetic: boolean;
|
|
82
|
+
/** Acoustic degradation applied to this turn's audio (when any). */
|
|
83
|
+
environment?: VoiceEnvironment;
|
|
84
|
+
/** True when this "turn" is the agent's own TTS echoed back (not a user turn). */
|
|
85
|
+
isAgentEcho?: boolean;
|
|
86
|
+
/** Ground truth: the speaker is the device owner / primary enrolled voice. */
|
|
87
|
+
isOwner?: boolean;
|
|
88
|
+
/** The agent's spoken reply to this turn (drives the echo gate downstream). */
|
|
89
|
+
agentReplyText?: string;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* On-disk corpus ground-truth schema version. Bump when the labeled-corpus
|
|
94
|
+
* shape changes incompatibly; `readVoiceCorpusGroundTruth` treats a corpus
|
|
95
|
+
* written by a different version as absent (→ `skipped`, never a stale `pass`).
|
|
96
|
+
*/
|
|
97
|
+
export const CORPUS_SCHEMA_VERSION = 1;
|
|
98
|
+
|
|
99
|
+
export interface CorpusGroundTruth {
|
|
100
|
+
/** Labeled-corpus schema version (see {@link CORPUS_SCHEMA_VERSION}). */
|
|
101
|
+
schemaVersion: number;
|
|
102
|
+
scenarioId: string;
|
|
103
|
+
classes: VoiceScenario["classes"];
|
|
104
|
+
sampleRate: number;
|
|
105
|
+
totalSamples: number;
|
|
106
|
+
durationSec: number;
|
|
107
|
+
participants: Array<{
|
|
108
|
+
label: string;
|
|
109
|
+
entityId?: string;
|
|
110
|
+
isOwner?: boolean;
|
|
111
|
+
ttsVoiceId?: string;
|
|
112
|
+
}>;
|
|
113
|
+
agents?: string[];
|
|
114
|
+
/** Entity ids the agent answers without a wake word (owner + enrolled). */
|
|
115
|
+
knownSpeakerEntityIds?: string[];
|
|
116
|
+
turns: CorpusTurnLabel[];
|
|
117
|
+
/** True when EVERY turn was synthetic (no real TTS used anywhere). */
|
|
118
|
+
synthetic: boolean;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export interface GeneratedVoiceCorpus {
|
|
122
|
+
pcm: Float32Array;
|
|
123
|
+
sampleRate: number;
|
|
124
|
+
groundTruth: CorpusGroundTruth;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Real-TTS synthesizer. Gated: when omitted, the generator uses deterministic
|
|
129
|
+
* synthetic speech. An implementation wraps the TTS route / Kokoro engine and
|
|
130
|
+
* returns mono PCM at the requested sample rate.
|
|
131
|
+
*/
|
|
132
|
+
export interface CorpusTtsSynthesizer {
|
|
133
|
+
synthesize(args: {
|
|
134
|
+
text: string;
|
|
135
|
+
voiceId?: string;
|
|
136
|
+
speakerLabel: string;
|
|
137
|
+
turnIndex: number;
|
|
138
|
+
isAgentEcho: boolean;
|
|
139
|
+
sampleRate: number;
|
|
140
|
+
}): Promise<Float32Array>;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export interface GenerateVoiceCorpusOptions {
|
|
144
|
+
sampleRate?: number;
|
|
145
|
+
/** Inject a real-TTS synthesizer to produce natural speech (else synthetic). */
|
|
146
|
+
synthesizer?: CorpusTtsSynthesizer;
|
|
147
|
+
/** Silence (s) spliced after a turn that declares no explicit pauses. */
|
|
148
|
+
interTurnSilenceSec?: number;
|
|
149
|
+
/** Synthetic-speech sizing: characters of text per second of audio. */
|
|
150
|
+
charsPerSecond?: number;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/** Deterministic 32-bit FNV-1a of a label → a stable per-speaker synthesis seed. */
|
|
154
|
+
function labelSeed(label: string): number {
|
|
155
|
+
let h = 0x811c9dc5;
|
|
156
|
+
for (let i = 0; i < label.length; i++) {
|
|
157
|
+
h ^= label.charCodeAt(i);
|
|
158
|
+
h = Math.imul(h, 0x01000193);
|
|
159
|
+
}
|
|
160
|
+
return h >>> 0;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function silenceSamples(ms: number, sampleRate: number): number {
|
|
164
|
+
return Math.max(0, Math.round((ms / 1000) * sampleRate));
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* A continuous competing-talker stream for `backgroundTalkersDb`, built from
|
|
169
|
+
* formant-synth speech (no models) so it is deterministic. `augmentPcm` mixes +
|
|
170
|
+
* loops it under the real turn at the requested level.
|
|
171
|
+
*/
|
|
172
|
+
function synthesizeBabble(
|
|
173
|
+
sampleRate: number,
|
|
174
|
+
lengthSamples: number,
|
|
175
|
+
seed: number,
|
|
176
|
+
): Float32Array {
|
|
177
|
+
const fixture = makeSpeechWithSilenceFixture({
|
|
178
|
+
sampleRate,
|
|
179
|
+
leadSilenceSec: 0,
|
|
180
|
+
speechSec: Math.max(0.3, lengthSamples / sampleRate),
|
|
181
|
+
tailSilenceSec: 0,
|
|
182
|
+
seed,
|
|
183
|
+
});
|
|
184
|
+
return fixture.pcm;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Generate one labeled audio stream for a scenario. Throws on an invalid
|
|
189
|
+
* scenario (fail loud — a malformed corpus must not silently produce garbage).
|
|
190
|
+
* A turn that carries only `audioRef` (no `text`) is not synthesizable here and
|
|
191
|
+
* is rejected; pre-rendered audio is supplied through the corpus on disk.
|
|
192
|
+
*/
|
|
193
|
+
export async function generateVoiceCorpus(
|
|
194
|
+
scenario: VoiceScenario,
|
|
195
|
+
options: GenerateVoiceCorpusOptions = {},
|
|
196
|
+
): Promise<GeneratedVoiceCorpus> {
|
|
197
|
+
const validation = validateVoiceScenario(scenario);
|
|
198
|
+
if (!validation.valid) {
|
|
199
|
+
throw new Error(
|
|
200
|
+
`[voice-corpus] invalid scenario "${scenario.id}": ${validation.errors.join("; ")}`,
|
|
201
|
+
);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
const sampleRate = options.sampleRate ?? DEFAULT_SAMPLE_RATE;
|
|
205
|
+
const charsPerSecond = options.charsPerSecond ?? DEFAULT_CHARS_PER_SECOND;
|
|
206
|
+
const interTurnSilence = silenceSamples(
|
|
207
|
+
(options.interTurnSilenceSec ?? DEFAULT_INTER_TURN_SILENCE_SEC) * 1000,
|
|
208
|
+
sampleRate,
|
|
209
|
+
);
|
|
210
|
+
const synthesizer = options.synthesizer;
|
|
211
|
+
|
|
212
|
+
const participantByLabel = new Map(
|
|
213
|
+
scenario.participants.map((p) => [p.label, p]),
|
|
214
|
+
);
|
|
215
|
+
// Each participant gets a distinct voice colour, spread evenly across the
|
|
216
|
+
// timbre range so a blind acoustic diarizer can tell co-present speakers apart
|
|
217
|
+
// from the audio alone (#9427).
|
|
218
|
+
const timbreByLabel = new Map<string, SpeakerTimbre>(
|
|
219
|
+
scenario.participants.map((p, i) => [
|
|
220
|
+
p.label,
|
|
221
|
+
speakerTimbreForIndex(i, scenario.participants.length),
|
|
222
|
+
]),
|
|
223
|
+
);
|
|
224
|
+
|
|
225
|
+
const segments: Float32Array[] = [];
|
|
226
|
+
const labels: CorpusTurnLabel[] = [];
|
|
227
|
+
let cursor = 0;
|
|
228
|
+
let anyReal = false;
|
|
229
|
+
|
|
230
|
+
for (let i = 0; i < scenario.turns.length; i++) {
|
|
231
|
+
const turn = scenario.turns[i];
|
|
232
|
+
const text = turn.text?.trim();
|
|
233
|
+
if (!text) {
|
|
234
|
+
throw new Error(
|
|
235
|
+
`[voice-corpus] turn[${i}] of "${scenario.id}" has no text to synthesize (audioRef-only turns are supplied via the on-disk corpus, not generated)`,
|
|
236
|
+
);
|
|
237
|
+
}
|
|
238
|
+
const participant = participantByLabel.get(turn.speaker);
|
|
239
|
+
const ttsVoiceId = turn.ttsVoiceId ?? participant?.ttsVoiceId;
|
|
240
|
+
|
|
241
|
+
const segmentStartSample = cursor;
|
|
242
|
+
let speech: Float32Array;
|
|
243
|
+
let speechStartOffset: number;
|
|
244
|
+
let speechEndOffset: number;
|
|
245
|
+
let synthetic: boolean;
|
|
246
|
+
|
|
247
|
+
if (synthesizer) {
|
|
248
|
+
speech = await synthesizer.synthesize({
|
|
249
|
+
text,
|
|
250
|
+
voiceId: ttsVoiceId,
|
|
251
|
+
speakerLabel: turn.speaker,
|
|
252
|
+
turnIndex: i,
|
|
253
|
+
isAgentEcho: turn.isAgentEcho === true,
|
|
254
|
+
sampleRate,
|
|
255
|
+
});
|
|
256
|
+
speechStartOffset = 0;
|
|
257
|
+
speechEndOffset = speech.length;
|
|
258
|
+
synthetic = false;
|
|
259
|
+
anyReal = true;
|
|
260
|
+
} else {
|
|
261
|
+
const speechSec = Math.min(
|
|
262
|
+
MAX_SPEECH_SEC,
|
|
263
|
+
Math.max(MIN_SPEECH_SEC, text.length / charsPerSecond),
|
|
264
|
+
);
|
|
265
|
+
// An agent-echo turn is the agent's OWN TTS bleeding back through the
|
|
266
|
+
// mic, so it carries the agent's voice — not the labelled speaker's. Real
|
|
267
|
+
// speaker turns get their distinct per-speaker timbre (#9427).
|
|
268
|
+
const timbre = turn.isAgentEcho
|
|
269
|
+
? AGENT_VOICE_TIMBRE
|
|
270
|
+
: (timbreByLabel.get(turn.speaker) ?? AGENT_VOICE_TIMBRE);
|
|
271
|
+
const fixture = makeSpeechWithSilenceFixture({
|
|
272
|
+
sampleRate,
|
|
273
|
+
leadSilenceSec: SYNTHETIC_LEAD_SILENCE_SEC,
|
|
274
|
+
speechSec,
|
|
275
|
+
tailSilenceSec: SYNTHETIC_TAIL_SILENCE_SEC,
|
|
276
|
+
seed: labelSeed(turn.isAgentEcho ? "__agent__" : turn.speaker),
|
|
277
|
+
timbre,
|
|
278
|
+
});
|
|
279
|
+
speech = fixture.pcm;
|
|
280
|
+
speechStartOffset = fixture.speechStartSample;
|
|
281
|
+
speechEndOffset = fixture.speechEndSample;
|
|
282
|
+
synthetic = true;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// Trailing pauses: explicit per-turn gaps, else the default inter-turn gap
|
|
286
|
+
// (except after the final turn).
|
|
287
|
+
const pauseTotal =
|
|
288
|
+
turn.pausesMs && turn.pausesMs.length > 0
|
|
289
|
+
? turn.pausesMs.reduce((n, ms) => n + silenceSamples(ms, sampleRate), 0)
|
|
290
|
+
: i < scenario.turns.length - 1
|
|
291
|
+
? interTurnSilence
|
|
292
|
+
: 0;
|
|
293
|
+
|
|
294
|
+
// Assemble the turn's full segment (voiced speech + trailing pause) so the
|
|
295
|
+
// per-turn acoustic degradation — reverb ringing into the gap, a noise
|
|
296
|
+
// floor in the "silence" — covers the pause, not just the speech.
|
|
297
|
+
let segment: Float32Array = new Float32Array(speech.length + pauseTotal);
|
|
298
|
+
segment.set(speech, 0);
|
|
299
|
+
|
|
300
|
+
const env = resolveTurnEnvironment(scenario, turn);
|
|
301
|
+
let appliedEnv: VoiceEnvironment | undefined;
|
|
302
|
+
if (env && !specIsClean(env)) {
|
|
303
|
+
const seed =
|
|
304
|
+
env.seed ?? (labelSeed(scenario.id) ^ (i * 0x9e3779b1)) >>> 0;
|
|
305
|
+
const resolvedEnv: AugmentationSpec = { ...env, seed };
|
|
306
|
+
const babble =
|
|
307
|
+
resolvedEnv.backgroundTalkersDb !== undefined
|
|
308
|
+
? synthesizeBabble(sampleRate, segment.length, (seed ^ 0x1234) >>> 0)
|
|
309
|
+
: undefined;
|
|
310
|
+
segment = augmentPcm(segment, sampleRate, resolvedEnv, {
|
|
311
|
+
...(babble ? { babble } : {}),
|
|
312
|
+
});
|
|
313
|
+
appliedEnv = resolvedEnv;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
segments.push(segment);
|
|
317
|
+
cursor += segment.length;
|
|
318
|
+
|
|
319
|
+
labels.push({
|
|
320
|
+
index: i,
|
|
321
|
+
speaker: turnSpeakerLabel(turn),
|
|
322
|
+
...(participant?.entityId ? { entityId: participant.entityId } : {}),
|
|
323
|
+
speechStartSample: segmentStartSample + speechStartOffset,
|
|
324
|
+
speechEndSample: segmentStartSample + speechEndOffset,
|
|
325
|
+
segmentStartSample,
|
|
326
|
+
segmentEndSample: cursor,
|
|
327
|
+
referenceTranscript: turnReferenceTranscript(turn),
|
|
328
|
+
expectRespond: turn.isAgentEcho ? false : turn.expectRespond,
|
|
329
|
+
expectEndOfTurn: turn.expectEndOfTurn ?? true,
|
|
330
|
+
...(turn.expectedEntity ? { expectedEntity: turn.expectedEntity } : {}),
|
|
331
|
+
...(ttsVoiceId ? { ttsVoiceId } : {}),
|
|
332
|
+
synthetic,
|
|
333
|
+
...(appliedEnv ? { environment: appliedEnv } : {}),
|
|
334
|
+
...(turn.isAgentEcho ? { isAgentEcho: true } : {}),
|
|
335
|
+
...(participant?.isOwner ? { isOwner: true } : {}),
|
|
336
|
+
...(turn.agentReplyText ? { agentReplyText: turn.agentReplyText } : {}),
|
|
337
|
+
});
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
const pcm = new Float32Array(cursor);
|
|
341
|
+
let offset = 0;
|
|
342
|
+
for (const segment of segments) {
|
|
343
|
+
pcm.set(segment, offset);
|
|
344
|
+
offset += segment.length;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
const groundTruth: CorpusGroundTruth = {
|
|
348
|
+
schemaVersion: CORPUS_SCHEMA_VERSION,
|
|
349
|
+
scenarioId: scenario.id,
|
|
350
|
+
classes: scenario.classes,
|
|
351
|
+
sampleRate,
|
|
352
|
+
totalSamples: pcm.length,
|
|
353
|
+
durationSec: pcm.length / sampleRate,
|
|
354
|
+
participants: scenario.participants.map((p) => ({
|
|
355
|
+
label: p.label,
|
|
356
|
+
...(p.entityId ? { entityId: p.entityId } : {}),
|
|
357
|
+
...(p.isOwner ? { isOwner: p.isOwner } : {}),
|
|
358
|
+
...(p.ttsVoiceId ? { ttsVoiceId: p.ttsVoiceId } : {}),
|
|
359
|
+
})),
|
|
360
|
+
...(scenario.agents ? { agents: scenario.agents } : {}),
|
|
361
|
+
...(scenario.knownSpeakerEntityIds
|
|
362
|
+
? { knownSpeakerEntityIds: scenario.knownSpeakerEntityIds }
|
|
363
|
+
: {}),
|
|
364
|
+
turns: labels,
|
|
365
|
+
synthetic: !anyReal,
|
|
366
|
+
};
|
|
367
|
+
|
|
368
|
+
return { pcm, sampleRate, groundTruth };
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
export interface VoiceCorpusPaths {
|
|
372
|
+
dir: string;
|
|
373
|
+
audioPath: string;
|
|
374
|
+
groundTruthPath: string;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
/** Persist a generated corpus as `audio.wav` + `ground-truth.json` under `dir`. */
|
|
378
|
+
export function writeVoiceCorpus(
|
|
379
|
+
corpus: GeneratedVoiceCorpus,
|
|
380
|
+
dir: string,
|
|
381
|
+
): VoiceCorpusPaths {
|
|
382
|
+
mkdirSync(dir, { recursive: true });
|
|
383
|
+
const audioPath = path.join(dir, "audio.wav");
|
|
384
|
+
const groundTruthPath = path.join(dir, "ground-truth.json");
|
|
385
|
+
writeFileSync(audioPath, encodeMonoPcm16Wav(corpus.pcm, corpus.sampleRate));
|
|
386
|
+
writeFileSync(
|
|
387
|
+
groundTruthPath,
|
|
388
|
+
`${JSON.stringify(corpus.groundTruth, null, 2)}\n`,
|
|
389
|
+
);
|
|
390
|
+
return { dir, audioPath, groundTruthPath };
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Read a previously-written corpus's ground truth. Returns null when the corpus
|
|
395
|
+
* directory or its ground-truth file is absent (the honesty contract — the
|
|
396
|
+
* runner reports `skipped`, never `pass`, when corpus artifacts are missing).
|
|
397
|
+
*/
|
|
398
|
+
export function readVoiceCorpusGroundTruth(
|
|
399
|
+
dir: string,
|
|
400
|
+
): CorpusGroundTruth | null {
|
|
401
|
+
const groundTruthPath = path.join(dir, "ground-truth.json");
|
|
402
|
+
if (!existsSync(groundTruthPath)) return null;
|
|
403
|
+
const parsed = JSON.parse(readFileSync(groundTruthPath, "utf8")) as unknown;
|
|
404
|
+
if (!parsed || typeof parsed !== "object") return null;
|
|
405
|
+
// Honesty contract: a corpus written by an incompatible schema version is
|
|
406
|
+
// treated as absent (→ skipped, never a stale pass against drifted labels).
|
|
407
|
+
if (
|
|
408
|
+
(parsed as { schemaVersion?: unknown }).schemaVersion !==
|
|
409
|
+
CORPUS_SCHEMA_VERSION
|
|
410
|
+
)
|
|
411
|
+
return null;
|
|
412
|
+
return parsed as CorpusGroundTruth;
|
|
413
|
+
}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
computeDiarizationErrorRate,
|
|
4
|
+
type DerResult,
|
|
5
|
+
type DiarizationSegment,
|
|
6
|
+
} from "./diarization-error-rate";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Greedy-branch coverage for the DER scorer (issue #9147).
|
|
10
|
+
*
|
|
11
|
+
* `bestMapping` has two arms: an exact injective permutation search for small
|
|
12
|
+
* speaker counts, and an O(n²) greedy fallback once `refSpeakers.length +
|
|
13
|
+
* hypSpeakers.length > maxExactSpeakers` (default 7) — the existing
|
|
14
|
+
* `diarization-error-rate.test.ts` only ever hits the exact arm. A 7+-speaker
|
|
15
|
+
* conversation (a noisy multi-party room, the very case `maxDer` exists to
|
|
16
|
+
* guard) takes the greedy path, so this pins:
|
|
17
|
+
* - greedy fires both when forced via a low `maxExactSpeakers` AND on a
|
|
18
|
+
* genuine 4-ref/4-hyp (=8 combined) conversation under the default,
|
|
19
|
+
* - the greedy mapping stays injective (no ref or hyp reused — the
|
|
20
|
+
* usedHyp/usedRef guard), and
|
|
21
|
+
* - greedy reproduces the exact arm's DER on separable inputs (where greedy
|
|
22
|
+
* is provably optimal), so the fallback is not silently wrong.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
const seg = (
|
|
26
|
+
speaker: string,
|
|
27
|
+
startMs: number,
|
|
28
|
+
endMs: number,
|
|
29
|
+
): DiarizationSegment => ({ speaker, startMs, endMs });
|
|
30
|
+
|
|
31
|
+
/** A hyp→ref mapping must be injective: every hyp key distinct (free) AND every
|
|
32
|
+
* ref value distinct (the property the greedy usedRef guard enforces). */
|
|
33
|
+
function expectInjective(mapping: DerResult["mapping"]): void {
|
|
34
|
+
const hyps = Object.keys(mapping);
|
|
35
|
+
const refs = Object.values(mapping);
|
|
36
|
+
expect(new Set(hyps).size).toBe(hyps.length);
|
|
37
|
+
expect(new Set(refs).size).toBe(refs.length);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
describe("computeDiarizationErrorRate — greedy mapping fallback", () => {
|
|
41
|
+
it("forces the greedy arm via a low maxExactSpeakers and still scores a perfect 2-speaker match", () => {
|
|
42
|
+
const reference = [seg("alice", 0, 1000), seg("bob", 1000, 2000)];
|
|
43
|
+
const hypothesis = [seg("spk0", 0, 1000), seg("spk1", 1000, 2000)];
|
|
44
|
+
// 2 + 2 = 4 combined > maxExact 3 → greedy path (would be exact by default).
|
|
45
|
+
const result = computeDiarizationErrorRate(reference, hypothesis, {
|
|
46
|
+
maxExactSpeakers: 3,
|
|
47
|
+
});
|
|
48
|
+
expect(result.der).toBe(0);
|
|
49
|
+
expect(result.confusionMs).toBe(0);
|
|
50
|
+
expect(result.mapping).toEqual({ spk0: "alice", spk1: "bob" });
|
|
51
|
+
expectInjective(result.mapping);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it("takes the greedy path on a genuine 4-speaker / 8-combined conversation (default maxExact 7)", () => {
|
|
55
|
+
// 4 ref + 4 hyp = 8 > 7 default → greedy, no options needed.
|
|
56
|
+
const reference = [
|
|
57
|
+
seg("a", 0, 1000),
|
|
58
|
+
seg("b", 1000, 2000),
|
|
59
|
+
seg("c", 2000, 3000),
|
|
60
|
+
seg("d", 3000, 4000),
|
|
61
|
+
];
|
|
62
|
+
const hypothesis = [
|
|
63
|
+
seg("w", 0, 1000),
|
|
64
|
+
seg("x", 1000, 2000),
|
|
65
|
+
seg("y", 2000, 3000),
|
|
66
|
+
seg("z", 3000, 4000),
|
|
67
|
+
];
|
|
68
|
+
const result = computeDiarizationErrorRate(reference, hypothesis);
|
|
69
|
+
expect(result.der).toBe(0);
|
|
70
|
+
expect(result.totalReferenceMs).toBeCloseTo(4000, -1);
|
|
71
|
+
expect(Object.keys(result.mapping)).toHaveLength(4);
|
|
72
|
+
expectInjective(result.mapping);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it("greedy attributes a swapped span as confusion, not missed/false-alarm, with a tie-break-invariant DER", () => {
|
|
76
|
+
// 4 ref speakers; the hypothesis reuses label "w" for both a's and d's
|
|
77
|
+
// span. One of those two spans is necessarily a confusion regardless of
|
|
78
|
+
// which the greedy tie-break maps w onto — so the DER is deterministic.
|
|
79
|
+
const reference = [
|
|
80
|
+
seg("a", 0, 1000),
|
|
81
|
+
seg("b", 1000, 2000),
|
|
82
|
+
seg("c", 2000, 3000),
|
|
83
|
+
seg("d", 3000, 4000),
|
|
84
|
+
];
|
|
85
|
+
const hypothesis = [
|
|
86
|
+
seg("w", 0, 1000),
|
|
87
|
+
seg("x", 1000, 2000),
|
|
88
|
+
seg("y", 2000, 3000),
|
|
89
|
+
seg("w", 3000, 4000),
|
|
90
|
+
];
|
|
91
|
+
// 4 ref + 3 hyp = 7 combined; force greedy with maxExact 5.
|
|
92
|
+
const result = computeDiarizationErrorRate(reference, hypothesis, {
|
|
93
|
+
maxExactSpeakers: 5,
|
|
94
|
+
});
|
|
95
|
+
expect(result.missedMs).toBe(0);
|
|
96
|
+
expect(result.falseAlarmMs).toBe(0);
|
|
97
|
+
expect(result.confusionMs).toBeCloseTo(1000, -1); // exactly one swapped span
|
|
98
|
+
expect(result.der).toBeCloseTo(0.25, 2); // 1000 confusion / 4000 ref
|
|
99
|
+
expectInjective(result.mapping); // w mapped once, x and y once each
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("leaves a zero-overlap hypothesis speaker unmapped (greedy skips s<=0 pairs → false alarm)", () => {
|
|
103
|
+
const reference = [seg("a", 0, 1000), seg("b", 1000, 2000)];
|
|
104
|
+
// spk2 talks over a stretch with no reference speaker at all (2000-3000).
|
|
105
|
+
const hypothesis = [
|
|
106
|
+
seg("h0", 0, 1000),
|
|
107
|
+
seg("h1", 1000, 2000),
|
|
108
|
+
seg("h2", 2000, 3000),
|
|
109
|
+
];
|
|
110
|
+
const result = computeDiarizationErrorRate(reference, hypothesis, {
|
|
111
|
+
maxExactSpeakers: 4,
|
|
112
|
+
});
|
|
113
|
+
// h2 never co-occurs with any ref speaker → no mapping entry for it.
|
|
114
|
+
expect(result.mapping.h2).toBeUndefined();
|
|
115
|
+
expect(result.falseAlarmMs).toBeCloseTo(1000, -1);
|
|
116
|
+
expectInjective(result.mapping);
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it("greedy reproduces the exact arm's DER on a separable 3-speaker case", () => {
|
|
120
|
+
const reference = [
|
|
121
|
+
seg("a", 0, 1000),
|
|
122
|
+
seg("b", 1000, 2000),
|
|
123
|
+
seg("c", 2000, 3000),
|
|
124
|
+
];
|
|
125
|
+
const hypothesis = [
|
|
126
|
+
seg("p", 0, 1000),
|
|
127
|
+
seg("q", 1000, 2000),
|
|
128
|
+
seg("p", 2000, 3000), // c collapsed onto p → a confusion either arm
|
|
129
|
+
];
|
|
130
|
+
const exact = computeDiarizationErrorRate(reference, hypothesis, {
|
|
131
|
+
maxExactSpeakers: 16,
|
|
132
|
+
});
|
|
133
|
+
const greedy = computeDiarizationErrorRate(reference, hypothesis, {
|
|
134
|
+
maxExactSpeakers: 0,
|
|
135
|
+
});
|
|
136
|
+
expect(greedy.der).toBeCloseTo(exact.der, 5);
|
|
137
|
+
expect(greedy.confusionMs).toBeCloseTo(exact.confusionMs, -1);
|
|
138
|
+
expectInjective(greedy.mapping);
|
|
139
|
+
});
|
|
140
|
+
});
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
computeDiarizationErrorRate,
|
|
4
|
+
type DiarizationSegment,
|
|
5
|
+
diarizationWithinBudget,
|
|
6
|
+
} from "./diarization-error-rate";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Diarization Error Rate scorer (issue #9147). The voice scenarios carry a
|
|
10
|
+
* `maxDer` threshold and an `expectedSpeakerLabel` per turn, but nothing
|
|
11
|
+
* computed DER — so a wrong speaker attribution or a missed overlapping talker
|
|
12
|
+
* passed silently. This pins the four DER components (missed / false-alarm /
|
|
13
|
+
* confusion / correct) and the key property that DER is invariant to how the
|
|
14
|
+
* diarizer NAMES its speakers (it's the partition that matters, not the labels).
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
const seg = (
|
|
18
|
+
speaker: string,
|
|
19
|
+
startMs: number,
|
|
20
|
+
endMs: number,
|
|
21
|
+
): DiarizationSegment => ({
|
|
22
|
+
speaker,
|
|
23
|
+
startMs,
|
|
24
|
+
endMs,
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
describe("computeDiarizationErrorRate", () => {
|
|
28
|
+
it("is 0 for a perfect match (even with different speaker label names)", () => {
|
|
29
|
+
const reference = [seg("alice", 0, 1000), seg("bob", 1000, 2000)];
|
|
30
|
+
// hypothesis splits the timeline identically but calls them spk0/spk1.
|
|
31
|
+
const hypothesis = [seg("spk0", 0, 1000), seg("spk1", 1000, 2000)];
|
|
32
|
+
const result = computeDiarizationErrorRate(reference, hypothesis);
|
|
33
|
+
expect(result.der).toBe(0);
|
|
34
|
+
expect(result.confusionMs).toBe(0);
|
|
35
|
+
// optimal mapping pairs the equivalent speakers.
|
|
36
|
+
expect(result.mapping).toEqual({ spk0: "alice", spk1: "bob" });
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("counts missed speech when the system misses a speaker", () => {
|
|
40
|
+
const reference = [seg("alice", 0, 1000), seg("bob", 1000, 2000)];
|
|
41
|
+
const hypothesis = [seg("spk0", 0, 1000)]; // bob's 1000ms missed entirely
|
|
42
|
+
const result = computeDiarizationErrorRate(reference, hypothesis);
|
|
43
|
+
expect(result.missedMs).toBeCloseTo(1000, -1);
|
|
44
|
+
expect(result.der).toBeCloseTo(0.5, 1); // 1000 missed / 2000 ref
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it("counts false alarm when the system hallucinates speech", () => {
|
|
48
|
+
const reference = [seg("alice", 0, 1000)];
|
|
49
|
+
const hypothesis = [seg("spk0", 0, 1000), seg("spk1", 1000, 2000)];
|
|
50
|
+
const result = computeDiarizationErrorRate(reference, hypothesis);
|
|
51
|
+
expect(result.falseAlarmMs).toBeCloseTo(1000, -1);
|
|
52
|
+
expect(result.totalReferenceMs).toBeCloseTo(1000, -1);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it("counts confusion when the same span is attributed to a swapped speaker", () => {
|
|
56
|
+
// 3 distinct ref speakers; hypothesis collapses the 3rd onto speaker 1's id,
|
|
57
|
+
// so the 3rd span is a confusion (wrong speaker), not missed or false alarm.
|
|
58
|
+
const reference = [
|
|
59
|
+
seg("a", 0, 1000),
|
|
60
|
+
seg("b", 1000, 2000),
|
|
61
|
+
seg("c", 2000, 3000),
|
|
62
|
+
];
|
|
63
|
+
const hypothesis = [
|
|
64
|
+
seg("x", 0, 1000),
|
|
65
|
+
seg("y", 1000, 2000),
|
|
66
|
+
seg("x", 2000, 3000),
|
|
67
|
+
];
|
|
68
|
+
const result = computeDiarizationErrorRate(reference, hypothesis);
|
|
69
|
+
expect(result.missedMs).toBe(0);
|
|
70
|
+
expect(result.falseAlarmMs).toBe(0);
|
|
71
|
+
expect(result.confusionMs).toBeCloseTo(1000, -1); // c's span mapped to x≠c
|
|
72
|
+
expect(result.der).toBeCloseTo(1 / 3, 2);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it("handles overlapping speech (both speakers active in one span)", () => {
|
|
76
|
+
// alice 0-2000, bob 1000-2000 → 1000ms of overlap (2 ref speakers).
|
|
77
|
+
const reference = [seg("alice", 0, 2000), seg("bob", 1000, 2000)];
|
|
78
|
+
const hypothesis = [seg("spk0", 0, 2000), seg("spk1", 1000, 2000)];
|
|
79
|
+
const result = computeDiarizationErrorRate(reference, hypothesis);
|
|
80
|
+
// ref speaker-time = 2000 (alice) + 1000 (bob overlap) = 3000ms.
|
|
81
|
+
expect(result.totalReferenceMs).toBeCloseTo(3000, -1);
|
|
82
|
+
expect(result.der).toBe(0); // perfectly diarized overlap
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it("penalizes a missed overlapping talker", () => {
|
|
86
|
+
const reference = [seg("alice", 0, 2000), seg("bob", 1000, 2000)];
|
|
87
|
+
const hypothesis = [seg("spk0", 0, 2000)]; // bob's overlapping 1000ms missed
|
|
88
|
+
const result = computeDiarizationErrorRate(reference, hypothesis);
|
|
89
|
+
expect(result.missedMs).toBeCloseTo(1000, -1);
|
|
90
|
+
expect(result.der).toBeCloseTo(1000 / 3000, 2);
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
describe("diarizationWithinBudget", () => {
|
|
95
|
+
it("gates a hypothesis against the scenario maxDer", () => {
|
|
96
|
+
expect(diarizationWithinBudget({ der: 0.1 }, 0.15)).toBe(true);
|
|
97
|
+
expect(diarizationWithinBudget({ der: 0.2 }, 0.15)).toBe(false);
|
|
98
|
+
expect(diarizationWithinBudget({ der: 0 }, 0)).toBe(true);
|
|
99
|
+
});
|
|
100
|
+
});
|