@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,596 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice turn controller — the turn-taking layer above `VoiceScheduler`.
|
|
3
|
+
*
|
|
4
|
+
* Sits between W1's `VadEvent` stream + W2's `StreamingTranscriber` events
|
|
5
|
+
* and the generation path (the runtime message handler / the local engine's
|
|
6
|
+
* `generate`, which routes through `voiceStreamingArgs` → `VoiceScheduler` →
|
|
7
|
+
* phrase chunker → TTS). Implements the brief's items A4 / A5 / A6:
|
|
8
|
+
*
|
|
9
|
+
* - `speech-start` → fire `prewarm(roomId)` immediately (the
|
|
10
|
+
* response-handler stable prefix / MTP
|
|
11
|
+
* slot KV-prefill) — before STT finishes.
|
|
12
|
+
* - `speech-pause(ms > thr)` → kick a SPECULATIVE response off W2's
|
|
13
|
+
* current partial transcript. The generate
|
|
14
|
+
* call gets an `AbortSignal`; the in-flight
|
|
15
|
+
* generation is stashed.
|
|
16
|
+
* - `speech-active` / a new `speech-start` / VAD re-trigger
|
|
17
|
+
* → ABORT the speculative generation (the abort
|
|
18
|
+
* propagates into `dispatcher.generate`).
|
|
19
|
+
* - `speech-end` (no new speech)
|
|
20
|
+
* → finalize: flush the transcriber for the
|
|
21
|
+
* final transcript; if the speculative result
|
|
22
|
+
* is still valid against it, PROMOTE it; else
|
|
23
|
+
* discard and run the real turn on the
|
|
24
|
+
* finalized transcript.
|
|
25
|
+
*
|
|
26
|
+
* Barge-in: while the agent is speaking the controller flips
|
|
27
|
+
* `scheduler.bargeIn.setAgentSpeaking(true)` (and binds the VAD into the
|
|
28
|
+
* barge-in controller). A provisional `pause-tts` pauses TTS in the
|
|
29
|
+
* scheduler; a `blip` → `resume-tts`; ASR-confirmed words → `hard-stop` →
|
|
30
|
+
* the scheduler drains the ring buffer + flushes the chunker, and the
|
|
31
|
+
* controller aborts the in-flight turn (the same `AbortSignal` the engine
|
|
32
|
+
* threads into `dispatcher.generate`). The transcriber's `words` event is
|
|
33
|
+
* wired into `bargeIn.onWordsDetected({wordCount})` so a blip alone only
|
|
34
|
+
* pauses, but real recognized words hard-stop.
|
|
35
|
+
*
|
|
36
|
+
* No fallback sludge: `prewarm` failures surface via `onError`; a speculative
|
|
37
|
+
* abort is a real `AbortSignal.abort()`, never a swallowed flag.
|
|
38
|
+
*/
|
|
39
|
+
|
|
40
|
+
import type { BargeInController } from "./barge-in";
|
|
41
|
+
import {
|
|
42
|
+
EOT_MID_CLAUSE_THRESHOLD,
|
|
43
|
+
type EotClassifier,
|
|
44
|
+
turnSignalFromProbability,
|
|
45
|
+
type VoiceTurnSignal,
|
|
46
|
+
} from "./eot-classifier";
|
|
47
|
+
import type { VoiceScheduler } from "./scheduler";
|
|
48
|
+
import type {
|
|
49
|
+
StreamingTranscriber,
|
|
50
|
+
TranscriberEvent,
|
|
51
|
+
TranscriptUpdate,
|
|
52
|
+
VadEvent,
|
|
53
|
+
VadEventSource,
|
|
54
|
+
VoiceInputSource,
|
|
55
|
+
VoiceSegment,
|
|
56
|
+
VoiceSpeaker,
|
|
57
|
+
VoiceTurnMetadata,
|
|
58
|
+
} from "./types";
|
|
59
|
+
|
|
60
|
+
/** Outcome of one generation pass (speculative or final). */
|
|
61
|
+
export interface VoiceTurnOutcome {
|
|
62
|
+
/** The transcript the generation ran against (so the controller can
|
|
63
|
+
* decide whether a speculative result is still valid). */
|
|
64
|
+
transcript: string;
|
|
65
|
+
/** Voice attribution metadata for the transcript that produced this outcome. */
|
|
66
|
+
source?: VoiceInputSource;
|
|
67
|
+
speaker?: VoiceSpeaker;
|
|
68
|
+
segments?: VoiceSegment[];
|
|
69
|
+
turn?: VoiceTurnMetadata;
|
|
70
|
+
/** Final reply text the model produced (already streamed into TTS by the
|
|
71
|
+
* generate callee). May be empty for an IGNORE turn. */
|
|
72
|
+
replyText: string;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export interface VoiceGenerateRequest {
|
|
76
|
+
/** Best transcript available at the time the request is issued. */
|
|
77
|
+
transcript: string;
|
|
78
|
+
/** Optional source/speaker metadata for attribution-only storage. */
|
|
79
|
+
source?: VoiceInputSource;
|
|
80
|
+
speaker?: VoiceSpeaker;
|
|
81
|
+
segments?: VoiceSegment[];
|
|
82
|
+
turn?: VoiceTurnMetadata;
|
|
83
|
+
/** True for the finalized turn (post `speech-end` + `flush()`), false for
|
|
84
|
+
* a speculative pass off a partial. */
|
|
85
|
+
final: boolean;
|
|
86
|
+
/** Aborted when speech resumes (speculative) or on a hard-stop barge-in. */
|
|
87
|
+
signal: AbortSignal;
|
|
88
|
+
/**
|
|
89
|
+
* Semantic turn-taking signal available at request issue time. Response
|
|
90
|
+
* handlers can deterministically suppress/accept without waiting for another
|
|
91
|
+
* model token when this says the next speaker is not the agent.
|
|
92
|
+
*/
|
|
93
|
+
turnSignal?: VoiceTurnSignal;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export interface VoiceTurnControllerDeps {
|
|
97
|
+
/** W1: the authoritative VAD event stream (a `VadDetector` is structurally one of these). */
|
|
98
|
+
vad: VadEventSource;
|
|
99
|
+
/** W2: the live streaming transcriber. The controller subscribes to its
|
|
100
|
+
* events and calls `flush()` on `speech-end`. */
|
|
101
|
+
transcriber: StreamingTranscriber;
|
|
102
|
+
/** W9: the voice scheduler — used for the barge-in controller + agent-speaking flag. */
|
|
103
|
+
scheduler: VoiceScheduler;
|
|
104
|
+
/**
|
|
105
|
+
* KV-prefill / response-handler-prefix prewarm. Called on `speech-start`.
|
|
106
|
+
* Fire-and-forget; a rejection is surfaced via `onError`, not swallowed.
|
|
107
|
+
* (In the engine this wraps `engine.prewarmConversation(roomId, ...)` /
|
|
108
|
+
* `runtime.prewarmResponseHandler(roomId)`.)
|
|
109
|
+
*/
|
|
110
|
+
prewarm?: (roomId: string) => void | Promise<void>;
|
|
111
|
+
/** Optional cached first-audio filler played immediately on speech-start. */
|
|
112
|
+
playFirstAudioFiller?: () => string | null;
|
|
113
|
+
/**
|
|
114
|
+
* Semantic turn detector layered with VAD/STT. It runs continuously on
|
|
115
|
+
* partial transcripts so `speech-pause` can decide whether to speculate or
|
|
116
|
+
* wait for the user to continue.
|
|
117
|
+
*/
|
|
118
|
+
turnDetector?: EotClassifier;
|
|
119
|
+
/**
|
|
120
|
+
* Run a generation pass. The callee builds the message, calls the runtime
|
|
121
|
+
* message handler / `useModel`, and streams `replyText` into TTS via the
|
|
122
|
+
* scheduler. Must honour `request.signal` (abort = stop the LLM/drafter at
|
|
123
|
+
* the next kernel boundary). Resolves with the produced reply + the
|
|
124
|
+
* transcript it ran against. Rejecting with the request's `AbortError` is
|
|
125
|
+
* fine — the controller treats that as "aborted".
|
|
126
|
+
*/
|
|
127
|
+
generate: (request: VoiceGenerateRequest) => Promise<VoiceTurnOutcome>;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
export interface VoiceTurnControllerConfig {
|
|
131
|
+
/** Conversation / room id passed to `prewarm` and (implicitly) `generate`. */
|
|
132
|
+
roomId: string;
|
|
133
|
+
/**
|
|
134
|
+
* Minimum `speech-pause` duration before a speculative response is kicked.
|
|
135
|
+
* Default 300 ms — long enough that mid-sentence breath pauses don't
|
|
136
|
+
* trigger one, short enough to win latency on a real end-of-utterance.
|
|
137
|
+
*/
|
|
138
|
+
speculatePauseMs?: number;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
export interface VoiceTurnControllerEvents {
|
|
142
|
+
/** A speculative generation was started off a partial transcript. */
|
|
143
|
+
onSpeculativeStart?(transcript: string): void;
|
|
144
|
+
/** The in-flight speculative generation was aborted (speech resumed). */
|
|
145
|
+
onSpeculativeAbort?(): void;
|
|
146
|
+
/** The speculative result was promoted as the turn's answer (it matched the final transcript). */
|
|
147
|
+
onSpeculativePromoted?(outcome: VoiceTurnOutcome): void;
|
|
148
|
+
/** A turn finished (promoted speculative OR a fresh final run). */
|
|
149
|
+
onTurnComplete?(outcome: VoiceTurnOutcome): void;
|
|
150
|
+
/** `prewarm` rejected, or a `generate` pass rejected with a non-abort error. */
|
|
151
|
+
onError?(error: Error): void;
|
|
152
|
+
/** A VAD pause/end was suppressed because semantic turn-taking says user continues. */
|
|
153
|
+
onTurnSuppressed?(transcript: string, signal: VoiceTurnSignal): void;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const DEFAULT_SPECULATE_PAUSE_MS = 300;
|
|
157
|
+
|
|
158
|
+
interface InFlightGeneration {
|
|
159
|
+
/** Transcript the generation ran against. */
|
|
160
|
+
transcript: string;
|
|
161
|
+
controller: AbortController;
|
|
162
|
+
promise: Promise<VoiceTurnOutcome | null>;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
export class VoiceTurnController {
|
|
166
|
+
private readonly deps: VoiceTurnControllerDeps;
|
|
167
|
+
private readonly events: VoiceTurnControllerEvents;
|
|
168
|
+
private readonly roomId: string;
|
|
169
|
+
private readonly speculatePauseMs: number;
|
|
170
|
+
private readonly bargeIn: BargeInController;
|
|
171
|
+
|
|
172
|
+
private speculative: InFlightGeneration | null = null;
|
|
173
|
+
/** A finalize() in progress (awaiting `transcriber.flush()` + generate). */
|
|
174
|
+
private finalizing: Promise<void> | null = null;
|
|
175
|
+
private latestPartial = "";
|
|
176
|
+
private latestTurnSignal: {
|
|
177
|
+
transcript: string;
|
|
178
|
+
signal: VoiceTurnSignal;
|
|
179
|
+
sequence: number;
|
|
180
|
+
} | null = null;
|
|
181
|
+
private turnSignalSequence = 0;
|
|
182
|
+
private started = false;
|
|
183
|
+
private vadUnsub: (() => void) | null = null;
|
|
184
|
+
private transcriberUnsub: (() => void) | null = null;
|
|
185
|
+
private bargeSignalUnsub: (() => void) | null = null;
|
|
186
|
+
private activeFinalController: AbortController | null = null;
|
|
187
|
+
/** True once `speech-end` ran and finalize is pending/done for this segment. */
|
|
188
|
+
private segmentEnded = false;
|
|
189
|
+
private latestUpdate: TranscriptUpdate | null = null;
|
|
190
|
+
|
|
191
|
+
constructor(
|
|
192
|
+
deps: VoiceTurnControllerDeps,
|
|
193
|
+
config: VoiceTurnControllerConfig,
|
|
194
|
+
events: VoiceTurnControllerEvents = {},
|
|
195
|
+
) {
|
|
196
|
+
this.deps = deps;
|
|
197
|
+
this.events = events;
|
|
198
|
+
this.roomId = config.roomId;
|
|
199
|
+
this.speculatePauseMs = Math.max(
|
|
200
|
+
0,
|
|
201
|
+
config.speculatePauseMs ?? DEFAULT_SPECULATE_PAUSE_MS,
|
|
202
|
+
);
|
|
203
|
+
this.bargeIn = deps.scheduler.bargeIn;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/** Subscribe to the VAD + transcriber streams and start turn-taking. Idempotent. */
|
|
207
|
+
start(): void {
|
|
208
|
+
if (this.started) return;
|
|
209
|
+
this.started = true;
|
|
210
|
+
// Barge-in controller takes the VAD directly so it can pause/resume TTS
|
|
211
|
+
// while the agent is speaking; the scheduler already listens to its
|
|
212
|
+
// `onSignal` stream.
|
|
213
|
+
this.bargeIn.bindVad(this.deps.vad);
|
|
214
|
+
this.bargeSignalUnsub = this.bargeIn.onSignal((signal) => {
|
|
215
|
+
if (signal.type !== "hard-stop") return;
|
|
216
|
+
this.abortSpeculative();
|
|
217
|
+
if (
|
|
218
|
+
this.activeFinalController &&
|
|
219
|
+
!this.activeFinalController.signal.aborted
|
|
220
|
+
) {
|
|
221
|
+
this.activeFinalController.abort();
|
|
222
|
+
}
|
|
223
|
+
});
|
|
224
|
+
this.vadUnsub = this.deps.vad.onVadEvent((e) => this.onVadEvent(e));
|
|
225
|
+
this.transcriberUnsub = this.deps.transcriber.on((e) =>
|
|
226
|
+
this.onTranscriberEvent(e),
|
|
227
|
+
);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/** Detach from the streams and abort any in-flight speculative generation. */
|
|
231
|
+
stop(): void {
|
|
232
|
+
if (!this.started) return;
|
|
233
|
+
this.started = false;
|
|
234
|
+
this.vadUnsub?.();
|
|
235
|
+
this.vadUnsub = null;
|
|
236
|
+
this.transcriberUnsub?.();
|
|
237
|
+
this.transcriberUnsub = null;
|
|
238
|
+
this.bargeIn.unbindVad();
|
|
239
|
+
this.bargeSignalUnsub?.();
|
|
240
|
+
this.bargeSignalUnsub = null;
|
|
241
|
+
this.abortSpeculative();
|
|
242
|
+
if (
|
|
243
|
+
this.activeFinalController &&
|
|
244
|
+
!this.activeFinalController.signal.aborted
|
|
245
|
+
) {
|
|
246
|
+
this.activeFinalController.abort();
|
|
247
|
+
}
|
|
248
|
+
this.activeFinalController = null;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// --- VAD ---------------------------------------------------------------
|
|
252
|
+
|
|
253
|
+
private onVadEvent(event: VadEvent): void {
|
|
254
|
+
switch (event.type) {
|
|
255
|
+
case "speech-start": {
|
|
256
|
+
// New utterance onset. If we were mid-finalize from a previous
|
|
257
|
+
// segment, that segment got *more* speech — abort the speculative
|
|
258
|
+
// run for it (the finalize promise still resolves; its abort is
|
|
259
|
+
// honoured). Reset segment state + the barge-in episode so the next
|
|
260
|
+
// hard-stop gets a fresh `BargeInCancelToken`.
|
|
261
|
+
this.segmentEnded = false;
|
|
262
|
+
this.latestUpdate = null;
|
|
263
|
+
this.latestPartial = "";
|
|
264
|
+
this.abortSpeculative();
|
|
265
|
+
this.bargeIn.reset();
|
|
266
|
+
this.playFirstAudioFiller();
|
|
267
|
+
void this.firePrewarm();
|
|
268
|
+
break;
|
|
269
|
+
}
|
|
270
|
+
case "speech-active": {
|
|
271
|
+
// Speech is ongoing again — any speculative response we kicked on a
|
|
272
|
+
// pause is stale. Abort it.
|
|
273
|
+
if (this.speculative) this.abortSpeculative();
|
|
274
|
+
break;
|
|
275
|
+
}
|
|
276
|
+
case "speech-pause": {
|
|
277
|
+
if (
|
|
278
|
+
event.pauseDurationMs >= this.speculatePauseMs &&
|
|
279
|
+
!this.speculative &&
|
|
280
|
+
!this.segmentEnded
|
|
281
|
+
) {
|
|
282
|
+
this.maybeStartSpeculative(this.latestPartial, this.latestUpdate);
|
|
283
|
+
}
|
|
284
|
+
break;
|
|
285
|
+
}
|
|
286
|
+
case "speech-end": {
|
|
287
|
+
this.segmentEnded = true;
|
|
288
|
+
this.beginFinalize();
|
|
289
|
+
break;
|
|
290
|
+
}
|
|
291
|
+
case "blip":
|
|
292
|
+
// Handled entirely by the barge-in controller (resume-tts when the
|
|
293
|
+
// agent is speaking; nothing otherwise). No turn-taking effect.
|
|
294
|
+
break;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
private onTranscriberEvent(event: TranscriberEvent): void {
|
|
299
|
+
switch (event.kind) {
|
|
300
|
+
case "partial":
|
|
301
|
+
this.latestPartial = event.update.partial;
|
|
302
|
+
this.latestUpdate = event.update;
|
|
303
|
+
this.queueTurnSignalRefresh(event.update.partial);
|
|
304
|
+
break;
|
|
305
|
+
case "final":
|
|
306
|
+
this.latestPartial = event.update.partial;
|
|
307
|
+
this.latestUpdate = event.update;
|
|
308
|
+
this.queueTurnSignalRefresh(event.update.partial);
|
|
309
|
+
break;
|
|
310
|
+
case "words":
|
|
311
|
+
// ASR confirmed real words during a barge-in window — promote a
|
|
312
|
+
// provisional `pause-tts` into a `hard-stop` (TTS cancelled + LLM
|
|
313
|
+
// aborted). A blip alone would never reach here.
|
|
314
|
+
this.bargeIn.onWordsDetected({
|
|
315
|
+
wordCount: event.words.length,
|
|
316
|
+
partialText: event.words.join(" "),
|
|
317
|
+
timestampMs: Date.now(),
|
|
318
|
+
});
|
|
319
|
+
break;
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// --- prewarm -----------------------------------------------------------
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* C2 — public idle prewarm entry point. Callers (e.g. the UI when a
|
|
327
|
+
* conversation opens) invoke this to materialize the KV cache for the
|
|
328
|
+
* response-handler stable prefix BEFORE the user starts speaking, so the
|
|
329
|
+
* first speech-start has nothing left to do. Fire-and-forget: the
|
|
330
|
+
* returned promise is `void` because we don't want callers blocking on
|
|
331
|
+
* prewarm; failures surface via `onError` exactly like the speech-start
|
|
332
|
+
* path. Idempotent — repeated calls just re-prewarm.
|
|
333
|
+
*/
|
|
334
|
+
prewarmOnIdle(): void {
|
|
335
|
+
void this.firePrewarm();
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
private async firePrewarm(): Promise<void> {
|
|
339
|
+
if (!this.deps.prewarm) return;
|
|
340
|
+
try {
|
|
341
|
+
await this.deps.prewarm(this.roomId);
|
|
342
|
+
} catch (err) {
|
|
343
|
+
this.events.onError?.(toError(err));
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
private playFirstAudioFiller(): void {
|
|
348
|
+
if (!this.deps.playFirstAudioFiller) return;
|
|
349
|
+
try {
|
|
350
|
+
this.deps.playFirstAudioFiller();
|
|
351
|
+
} catch (err) {
|
|
352
|
+
this.events.onError?.(toError(err));
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// --- speculative generation -------------------------------------------
|
|
357
|
+
|
|
358
|
+
private maybeStartSpeculative(
|
|
359
|
+
transcript: string,
|
|
360
|
+
update: TranscriptUpdate | null,
|
|
361
|
+
): void {
|
|
362
|
+
const text = transcript.trim();
|
|
363
|
+
if (text.length === 0) return;
|
|
364
|
+
if (!this.deps.turnDetector) {
|
|
365
|
+
this.startSpeculative(text, update, null);
|
|
366
|
+
return;
|
|
367
|
+
}
|
|
368
|
+
void this.startSpeculativeAfterTurnSignal(text, update);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
private async startSpeculativeAfterTurnSignal(
|
|
372
|
+
text: string,
|
|
373
|
+
update: TranscriptUpdate | null,
|
|
374
|
+
): Promise<void> {
|
|
375
|
+
const turnSignal = await this.ensureTurnSignal(text);
|
|
376
|
+
if (
|
|
377
|
+
!this.started ||
|
|
378
|
+
this.segmentEnded ||
|
|
379
|
+
this.speculative ||
|
|
380
|
+
this.latestPartial.trim() !== text
|
|
381
|
+
) {
|
|
382
|
+
return;
|
|
383
|
+
}
|
|
384
|
+
if (turnSignal && shouldSuppressAgentSpeech(turnSignal)) {
|
|
385
|
+
this.events.onTurnSuppressed?.(text, turnSignal);
|
|
386
|
+
return;
|
|
387
|
+
}
|
|
388
|
+
this.startSpeculative(text, update, turnSignal);
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
private startSpeculative(
|
|
392
|
+
text: string,
|
|
393
|
+
update: TranscriptUpdate | null,
|
|
394
|
+
turnSignal: VoiceTurnSignal | null,
|
|
395
|
+
): void {
|
|
396
|
+
const controller = new AbortController();
|
|
397
|
+
this.events.onSpeculativeStart?.(text);
|
|
398
|
+
const promise = this.runGenerate({
|
|
399
|
+
transcript: text,
|
|
400
|
+
...voiceRequestMetadata(update),
|
|
401
|
+
final: false,
|
|
402
|
+
signal: controller.signal,
|
|
403
|
+
...(turnSignal ? { turnSignal } : {}),
|
|
404
|
+
});
|
|
405
|
+
this.speculative = { transcript: text, controller, promise };
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
private abortSpeculative(): void {
|
|
409
|
+
const spec = this.speculative;
|
|
410
|
+
if (!spec) return;
|
|
411
|
+
this.speculative = null;
|
|
412
|
+
if (!spec.controller.signal.aborted) spec.controller.abort();
|
|
413
|
+
this.events.onSpeculativeAbort?.();
|
|
414
|
+
// Drop the partial TTS the speculative run may have already streamed —
|
|
415
|
+
// it was generated against a stale partial transcript. This is NOT a
|
|
416
|
+
// user barge-in, so use the dedicated drop path (no `onCancel`).
|
|
417
|
+
this.deps.scheduler.cancelPendingTts();
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// --- finalize ----------------------------------------------------------
|
|
421
|
+
|
|
422
|
+
private beginFinalize(): void {
|
|
423
|
+
// Serialize finalize calls — `speech-end` should only fire once per
|
|
424
|
+
// segment, but be defensive against a VAD that repeats it.
|
|
425
|
+
if (this.finalizing) return;
|
|
426
|
+
this.finalizing = this.finalize().finally(() => {
|
|
427
|
+
this.finalizing = null;
|
|
428
|
+
});
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
private async finalize(): Promise<void> {
|
|
432
|
+
let finalUpdate: TranscriptUpdate;
|
|
433
|
+
try {
|
|
434
|
+
finalUpdate = await this.deps.transcriber.flush();
|
|
435
|
+
} catch (err) {
|
|
436
|
+
// Flush failure aborts any speculative run and bubbles up — no silent
|
|
437
|
+
// empty-transcript turn.
|
|
438
|
+
this.abortSpeculative();
|
|
439
|
+
this.events.onError?.(toError(err));
|
|
440
|
+
return;
|
|
441
|
+
}
|
|
442
|
+
const finalTranscript = finalUpdate.partial.trim();
|
|
443
|
+
// If a new `speech-start` arrived while we were flushing, that segment
|
|
444
|
+
// got more speech — drop this finalize.
|
|
445
|
+
if (!this.segmentEnded) {
|
|
446
|
+
this.abortSpeculative();
|
|
447
|
+
return;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
const spec = this.speculative;
|
|
451
|
+
if (spec && spec.transcript === finalTranscript) {
|
|
452
|
+
// The speculative run is valid — promote it (its TTS has already been
|
|
453
|
+
// streaming).
|
|
454
|
+
this.speculative = null;
|
|
455
|
+
let outcome: VoiceTurnOutcome | null;
|
|
456
|
+
try {
|
|
457
|
+
outcome = await spec.promise;
|
|
458
|
+
} catch (err) {
|
|
459
|
+
outcome = null;
|
|
460
|
+
this.events.onError?.(toError(err));
|
|
461
|
+
}
|
|
462
|
+
if (outcome) {
|
|
463
|
+
this.events.onSpeculativePromoted?.(outcome);
|
|
464
|
+
this.events.onTurnComplete?.(outcome);
|
|
465
|
+
return;
|
|
466
|
+
}
|
|
467
|
+
// Speculative aborted or failed after all — fall through to a fresh
|
|
468
|
+
// final run below.
|
|
469
|
+
} else if (spec) {
|
|
470
|
+
// The partial we speculated off didn't survive — discard it (its TTS
|
|
471
|
+
// is stale).
|
|
472
|
+
this.abortSpeculative();
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
if (finalTranscript.length === 0) {
|
|
476
|
+
// Nothing was said (a blip the VAD let through). No turn.
|
|
477
|
+
return;
|
|
478
|
+
}
|
|
479
|
+
const finalTurnSignal = await this.ensureTurnSignal(finalTranscript);
|
|
480
|
+
if (finalTurnSignal && shouldSuppressAgentSpeech(finalTurnSignal)) {
|
|
481
|
+
this.abortSpeculative();
|
|
482
|
+
this.events.onTurnSuppressed?.(finalTranscript, finalTurnSignal);
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
const controller = new AbortController();
|
|
486
|
+
this.activeFinalController = controller;
|
|
487
|
+
let outcome: VoiceTurnOutcome | null;
|
|
488
|
+
try {
|
|
489
|
+
outcome = await this.runGenerate({
|
|
490
|
+
transcript: finalTranscript,
|
|
491
|
+
...voiceRequestMetadata(finalUpdate),
|
|
492
|
+
final: true,
|
|
493
|
+
signal: controller.signal,
|
|
494
|
+
...(finalTurnSignal ? { turnSignal: finalTurnSignal } : {}),
|
|
495
|
+
});
|
|
496
|
+
} catch (err) {
|
|
497
|
+
outcome = null;
|
|
498
|
+
this.events.onError?.(toError(err));
|
|
499
|
+
} finally {
|
|
500
|
+
if (this.activeFinalController === controller) {
|
|
501
|
+
this.activeFinalController = null;
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
if (outcome) this.events.onTurnComplete?.(outcome);
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
// --- generate adapter --------------------------------------------------
|
|
508
|
+
|
|
509
|
+
private async runGenerate(
|
|
510
|
+
request: VoiceGenerateRequest,
|
|
511
|
+
): Promise<VoiceTurnOutcome | null> {
|
|
512
|
+
try {
|
|
513
|
+
return await this.deps.generate(request);
|
|
514
|
+
} catch (err) {
|
|
515
|
+
if (isAbortError(err) || request.signal.aborted) return null;
|
|
516
|
+
this.events.onError?.(toError(err));
|
|
517
|
+
return null;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
// --- semantic turn detector ------------------------------------------
|
|
522
|
+
|
|
523
|
+
private queueTurnSignalRefresh(transcript: string): void {
|
|
524
|
+
if (!this.deps.turnDetector || transcript.trim().length === 0) return;
|
|
525
|
+
void this.computeTurnSignal(transcript);
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
private async ensureTurnSignal(
|
|
529
|
+
transcript: string,
|
|
530
|
+
): Promise<VoiceTurnSignal | null> {
|
|
531
|
+
const text = transcript.trim();
|
|
532
|
+
if (!this.deps.turnDetector || text.length === 0) return null;
|
|
533
|
+
const cached = this.latestTurnSignal;
|
|
534
|
+
if (cached && cached.transcript === text) return cached.signal;
|
|
535
|
+
return this.computeTurnSignal(text);
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
private async computeTurnSignal(
|
|
539
|
+
transcript: string,
|
|
540
|
+
): Promise<VoiceTurnSignal | null> {
|
|
541
|
+
const detector = this.deps.turnDetector;
|
|
542
|
+
if (!detector) return null;
|
|
543
|
+
const text = transcript.trim();
|
|
544
|
+
if (text.length === 0) return null;
|
|
545
|
+
const sequence = ++this.turnSignalSequence;
|
|
546
|
+
try {
|
|
547
|
+
const signal = detector.signal
|
|
548
|
+
? await detector.signal(text)
|
|
549
|
+
: turnSignalFromProbability({
|
|
550
|
+
probability: await detector.score(text),
|
|
551
|
+
transcript: text,
|
|
552
|
+
source: "custom",
|
|
553
|
+
model: detector.constructor.name,
|
|
554
|
+
});
|
|
555
|
+
const current = this.latestTurnSignal;
|
|
556
|
+
if (!current || sequence >= current.sequence) {
|
|
557
|
+
this.latestTurnSignal = { transcript: text, signal, sequence };
|
|
558
|
+
}
|
|
559
|
+
return signal;
|
|
560
|
+
} catch (err) {
|
|
561
|
+
this.events.onError?.(toError(err));
|
|
562
|
+
return null;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
function shouldSuppressAgentSpeech(signal: VoiceTurnSignal): boolean {
|
|
568
|
+
return (
|
|
569
|
+
signal.agentShouldSpeak === false ||
|
|
570
|
+
signal.nextSpeaker === "user" ||
|
|
571
|
+
signal.endOfTurnProbability < EOT_MID_CLAUSE_THRESHOLD
|
|
572
|
+
);
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
function isAbortError(err: unknown): boolean {
|
|
576
|
+
return (
|
|
577
|
+
err instanceof Error &&
|
|
578
|
+
(err.name === "AbortError" || err.message.toLowerCase().includes("abort"))
|
|
579
|
+
);
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
function toError(err: unknown): Error {
|
|
583
|
+
return err instanceof Error ? err : new Error(String(err));
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
function voiceRequestMetadata(
|
|
587
|
+
update: TranscriptUpdate | null,
|
|
588
|
+
): Pick<VoiceGenerateRequest, "source" | "speaker" | "segments" | "turn"> {
|
|
589
|
+
if (!update) return {};
|
|
590
|
+
return {
|
|
591
|
+
...(update.source ? { source: update.source } : {}),
|
|
592
|
+
...(update.speaker ? { speaker: update.speaker } : {}),
|
|
593
|
+
...(update.segments ? { segments: update.segments } : {}),
|
|
594
|
+
...(update.turn ? { turn: update.turn } : {}),
|
|
595
|
+
};
|
|
596
|
+
}
|