@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BargeInController tests — the blip-vs-words distinction.
|
|
3
|
+
*
|
|
4
|
+
* - legacy `attach`/`onMicActive`/`cancelSignal`/`reset` still work
|
|
5
|
+
* (`VoiceScheduler` depends on them).
|
|
6
|
+
* - VAD-driven flow while the agent is speaking:
|
|
7
|
+
* speech-active → pause-tts
|
|
8
|
+
* blip → resume-tts
|
|
9
|
+
* ASR word → hard-stop (cancel token tripped, AbortSignal aborted)
|
|
10
|
+
* no ASR word → resume-tts after the grace window
|
|
11
|
+
* - barge-in is inert while the agent is NOT speaking.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { describe, expect, it, vi } from "vitest";
|
|
15
|
+
import { BargeInController } from "./barge-in";
|
|
16
|
+
import type { BargeInSignal, VadEvent, VadEventListener } from "./types";
|
|
17
|
+
|
|
18
|
+
/** A standalone fake VAD event source the controller can bind to. */
|
|
19
|
+
class FakeVad {
|
|
20
|
+
private readonly listeners = new Set<VadEventListener>();
|
|
21
|
+
onVadEvent(l: VadEventListener): () => void {
|
|
22
|
+
this.listeners.add(l);
|
|
23
|
+
return () => this.listeners.delete(l);
|
|
24
|
+
}
|
|
25
|
+
emit(e: VadEvent): void {
|
|
26
|
+
for (const l of this.listeners) l(e);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function speechActive(ts: number): VadEvent {
|
|
31
|
+
return {
|
|
32
|
+
type: "speech-active",
|
|
33
|
+
timestampMs: ts,
|
|
34
|
+
probability: 0.9,
|
|
35
|
+
speechDurationMs: 200,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
function blip(ts: number): VadEvent {
|
|
39
|
+
return { type: "blip", timestampMs: ts, durationMs: 80, peakRms: 0.2 };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
describe("BargeInController — legacy API", () => {
|
|
43
|
+
it("flips the cancel signal and notifies listeners on onMicActive", () => {
|
|
44
|
+
const c = new BargeInController();
|
|
45
|
+
let n = 0;
|
|
46
|
+
c.attach({ onCancel: () => n++ });
|
|
47
|
+
expect(c.cancelSignal().cancelled).toBe(false);
|
|
48
|
+
c.onMicActive();
|
|
49
|
+
expect(c.cancelSignal().cancelled).toBe(true);
|
|
50
|
+
expect(n).toBe(1);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it("reset issues a fresh cancel signal", () => {
|
|
54
|
+
const c = new BargeInController();
|
|
55
|
+
c.onMicActive();
|
|
56
|
+
expect(c.cancelSignal().cancelled).toBe(true);
|
|
57
|
+
c.reset();
|
|
58
|
+
expect(c.cancelSignal().cancelled).toBe(false);
|
|
59
|
+
expect(c.currentCancelToken()).toBeNull();
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
describe("BargeInController — VAD-driven barge-in", () => {
|
|
64
|
+
it("does nothing while the agent is not speaking", () => {
|
|
65
|
+
const c = new BargeInController();
|
|
66
|
+
const vad = new FakeVad();
|
|
67
|
+
c.bindVad(vad);
|
|
68
|
+
const signals: BargeInSignal[] = [];
|
|
69
|
+
c.onSignal((s) => signals.push(s));
|
|
70
|
+
vad.emit(speechActive(100));
|
|
71
|
+
vad.emit(blip(200));
|
|
72
|
+
expect(signals).toHaveLength(0);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it("pauses TTS on speech-active and resumes on a blip", () => {
|
|
76
|
+
const c = new BargeInController();
|
|
77
|
+
const vad = new FakeVad();
|
|
78
|
+
c.bindVad(vad);
|
|
79
|
+
c.setAgentSpeaking(true);
|
|
80
|
+
const signals: BargeInSignal[] = [];
|
|
81
|
+
c.onSignal((s) => signals.push(s));
|
|
82
|
+
|
|
83
|
+
vad.emit(speechActive(100));
|
|
84
|
+
expect(signals.map((s) => s.type)).toEqual(["pause-tts"]);
|
|
85
|
+
// A second speech-active while already paused — no duplicate pause.
|
|
86
|
+
vad.emit(speechActive(132));
|
|
87
|
+
expect(signals.map((s) => s.type)).toEqual(["pause-tts"]);
|
|
88
|
+
// Blip → resume.
|
|
89
|
+
vad.emit(blip(300));
|
|
90
|
+
expect(signals.map((s) => s.type)).toEqual(["pause-tts", "resume-tts"]);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it("hard-stops (cancel token + AbortSignal) when ASR confirms a word", () => {
|
|
94
|
+
const c = new BargeInController();
|
|
95
|
+
const vad = new FakeVad();
|
|
96
|
+
c.bindVad(vad);
|
|
97
|
+
c.setAgentSpeaking(true);
|
|
98
|
+
const signals: BargeInSignal[] = [];
|
|
99
|
+
c.onSignal((s) => signals.push(s));
|
|
100
|
+
let onCancelCalls = 0;
|
|
101
|
+
c.attach({ onCancel: () => onCancelCalls++ });
|
|
102
|
+
|
|
103
|
+
vad.emit(speechActive(100)); // pause-tts
|
|
104
|
+
expect(signals.map((s) => s.type)).toEqual(["pause-tts"]);
|
|
105
|
+
|
|
106
|
+
c.onWordsDetected({
|
|
107
|
+
wordCount: 1,
|
|
108
|
+
partialText: "hey wait",
|
|
109
|
+
timestampMs: 250,
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
expect(signals.map((s) => s.type)).toEqual(["pause-tts", "hard-stop"]);
|
|
113
|
+
const hard = signals.find((s) => s.type === "hard-stop");
|
|
114
|
+
expect(hard && hard.type === "hard-stop").toBe(true);
|
|
115
|
+
if (hard && hard.type === "hard-stop") {
|
|
116
|
+
expect(hard.token.cancelled).toBe(true);
|
|
117
|
+
expect(hard.token.reason).toBe("barge-in-words");
|
|
118
|
+
expect(hard.token.signal.aborted).toBe(true);
|
|
119
|
+
}
|
|
120
|
+
expect(c.currentCancelToken()?.cancelled).toBe(true);
|
|
121
|
+
expect(c.cancelSignal().cancelled).toBe(true);
|
|
122
|
+
expect(onCancelCalls).toBe(1);
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
it("ignores onWordsDetected with zero words", () => {
|
|
126
|
+
const c = new BargeInController();
|
|
127
|
+
const vad = new FakeVad();
|
|
128
|
+
c.bindVad(vad);
|
|
129
|
+
c.setAgentSpeaking(true);
|
|
130
|
+
const signals: BargeInSignal[] = [];
|
|
131
|
+
c.onSignal((s) => signals.push(s));
|
|
132
|
+
vad.emit(speechActive(100));
|
|
133
|
+
c.onWordsDetected({ wordCount: 0, partialText: "", timestampMs: 200 });
|
|
134
|
+
expect(signals.map((s) => s.type)).toEqual(["pause-tts"]);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
it("resumes TTS after the grace window when ASR never confirms a word", () => {
|
|
138
|
+
vi.useFakeTimers();
|
|
139
|
+
try {
|
|
140
|
+
const c = new BargeInController({ wordsGraceMs: 500 });
|
|
141
|
+
const vad = new FakeVad();
|
|
142
|
+
c.bindVad(vad);
|
|
143
|
+
c.setAgentSpeaking(true);
|
|
144
|
+
const signals: BargeInSignal[] = [];
|
|
145
|
+
c.onSignal((s) => signals.push(s));
|
|
146
|
+
|
|
147
|
+
vad.emit(speechActive(100)); // pause-tts, arms 500ms deadline
|
|
148
|
+
expect(signals.map((s) => s.type)).toEqual(["pause-tts"]);
|
|
149
|
+
vad.emit({
|
|
150
|
+
type: "speech-pause",
|
|
151
|
+
timestampMs: 300,
|
|
152
|
+
pauseDurationMs: 200,
|
|
153
|
+
});
|
|
154
|
+
vad.emit({ type: "speech-end", timestampMs: 500, speechDurationMs: 350 });
|
|
155
|
+
// Still inside the grace window.
|
|
156
|
+
vi.advanceTimersByTime(400);
|
|
157
|
+
expect(signals.map((s) => s.type)).toEqual(["pause-tts"]);
|
|
158
|
+
// Past it → resume.
|
|
159
|
+
vi.advanceTimersByTime(200);
|
|
160
|
+
expect(signals.map((s) => s.type)).toEqual(["pause-tts", "resume-tts"]);
|
|
161
|
+
} finally {
|
|
162
|
+
vi.useRealTimers();
|
|
163
|
+
}
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it("a word arriving before the grace deadline cancels the resume", () => {
|
|
167
|
+
vi.useFakeTimers();
|
|
168
|
+
try {
|
|
169
|
+
const c = new BargeInController({ wordsGraceMs: 500 });
|
|
170
|
+
const vad = new FakeVad();
|
|
171
|
+
c.bindVad(vad);
|
|
172
|
+
c.setAgentSpeaking(true);
|
|
173
|
+
const signals: BargeInSignal[] = [];
|
|
174
|
+
c.onSignal((s) => signals.push(s));
|
|
175
|
+
vad.emit(speechActive(100));
|
|
176
|
+
vi.advanceTimersByTime(300);
|
|
177
|
+
c.onWordsDetected({
|
|
178
|
+
wordCount: 2,
|
|
179
|
+
partialText: "stop please",
|
|
180
|
+
timestampMs: 400,
|
|
181
|
+
});
|
|
182
|
+
vi.advanceTimersByTime(500);
|
|
183
|
+
expect(signals.map((s) => s.type)).toEqual(["pause-tts", "hard-stop"]);
|
|
184
|
+
} finally {
|
|
185
|
+
vi.useRealTimers();
|
|
186
|
+
}
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
it("ignores stale ASR words after the interruption window resumes TTS", () => {
|
|
190
|
+
vi.useFakeTimers();
|
|
191
|
+
try {
|
|
192
|
+
const c = new BargeInController({ wordsGraceMs: 500 });
|
|
193
|
+
const vad = new FakeVad();
|
|
194
|
+
c.bindVad(vad);
|
|
195
|
+
c.setAgentSpeaking(true);
|
|
196
|
+
const signals: BargeInSignal[] = [];
|
|
197
|
+
c.onSignal((s) => signals.push(s));
|
|
198
|
+
|
|
199
|
+
vad.emit(speechActive(100));
|
|
200
|
+
vi.advanceTimersByTime(600);
|
|
201
|
+
expect(signals.map((s) => s.type)).toEqual(["pause-tts", "resume-tts"]);
|
|
202
|
+
|
|
203
|
+
c.onWordsDetected({
|
|
204
|
+
wordCount: 2,
|
|
205
|
+
partialText: "late stale",
|
|
206
|
+
timestampMs: 800,
|
|
207
|
+
});
|
|
208
|
+
expect(signals.map((s) => s.type)).toEqual(["pause-tts", "resume-tts"]);
|
|
209
|
+
expect(c.currentCancelToken()).toBeNull();
|
|
210
|
+
} finally {
|
|
211
|
+
vi.useRealTimers();
|
|
212
|
+
}
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
it("setAgentSpeaking(false) clears a pending word-confirm without resuming", () => {
|
|
216
|
+
vi.useFakeTimers();
|
|
217
|
+
try {
|
|
218
|
+
const c = new BargeInController({ wordsGraceMs: 500 });
|
|
219
|
+
const vad = new FakeVad();
|
|
220
|
+
c.bindVad(vad);
|
|
221
|
+
c.setAgentSpeaking(true);
|
|
222
|
+
const signals: BargeInSignal[] = [];
|
|
223
|
+
c.onSignal((s) => signals.push(s));
|
|
224
|
+
vad.emit(speechActive(100)); // pause-tts
|
|
225
|
+
c.setAgentSpeaking(false); // agent finished its turn
|
|
226
|
+
vi.advanceTimersByTime(1000);
|
|
227
|
+
expect(signals.map((s) => s.type)).toEqual(["pause-tts"]);
|
|
228
|
+
} finally {
|
|
229
|
+
vi.useRealTimers();
|
|
230
|
+
}
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
it("unbindVad detaches the controller from VAD events", () => {
|
|
234
|
+
const c = new BargeInController();
|
|
235
|
+
const vad = new FakeVad();
|
|
236
|
+
const unbind = c.bindVad(vad);
|
|
237
|
+
c.setAgentSpeaking(true);
|
|
238
|
+
const signals: BargeInSignal[] = [];
|
|
239
|
+
c.onSignal((s) => signals.push(s));
|
|
240
|
+
unbind();
|
|
241
|
+
vad.emit(speechActive(100));
|
|
242
|
+
expect(signals).toHaveLength(0);
|
|
243
|
+
});
|
|
244
|
+
});
|
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Barge-in controller — distinguishes a blip from real speech while the
|
|
3
|
+
* agent is talking, and turns that into TTS pause/resume/hard-stop plus an
|
|
4
|
+
* LLM-generation abort.
|
|
5
|
+
*
|
|
6
|
+
* Inputs:
|
|
7
|
+
* - the `VadEvent` stream from `VadDetector` (subscribe via `bindVad()`),
|
|
8
|
+
* - W2's ASR word-confirm callback (`onWordsDetected()` — the
|
|
9
|
+
* `WordsDetectedSink` contract).
|
|
10
|
+
*
|
|
11
|
+
* Behaviour while the agent is speaking (`agentSpeaking === true`):
|
|
12
|
+
* - `speech-active` → emit `pause-tts`. (Provisional — could still be a
|
|
13
|
+
* blip; the energy-duration heuristic guesses, ASR
|
|
14
|
+
* confirms.)
|
|
15
|
+
* - `blip` (or a short `speech-end` before any words)
|
|
16
|
+
* → emit `resume-tts`. The agent keeps talking.
|
|
17
|
+
* - `onWordsDetected({wordCount ≥ 1})` → emit `hard-stop` with a fresh
|
|
18
|
+
* `BargeInCancelToken`. Hard-stop means: cancel TTS
|
|
19
|
+
* *and* abort the in-flight LLM / MTP drafter
|
|
20
|
+
* generation. The engine layer (W9) threads
|
|
21
|
+
* `token.signal` into `dispatcher.generate` and polls
|
|
22
|
+
* `token.cancelled` at kernel boundaries.
|
|
23
|
+
* - `speech-end` with a long-enough segment but no ASR words yet →
|
|
24
|
+
* treated as words-pending: emit `hard-stop` only
|
|
25
|
+
* once ASR confirms; if ASR never confirms within
|
|
26
|
+
* `wordsGraceMs`, resume TTS (it was non-speech the
|
|
27
|
+
* Silero VAD let through).
|
|
28
|
+
*
|
|
29
|
+
* Legacy API (still used by `VoiceScheduler` and `EngineVoiceBridge`):
|
|
30
|
+
* `attach({onCancel})`, `onMicActive()`, `cancelSignal()`, `reset()` — a
|
|
31
|
+
* thin "everything cancelled" path. `onMicActive()` is now equivalent to
|
|
32
|
+
* `hardStop("manual")`.
|
|
33
|
+
*
|
|
34
|
+
* No fallback sludge: a `hard-stop` always carries a real `AbortSignal`; the
|
|
35
|
+
* controller never swallows a VAD event.
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
import type {
|
|
39
|
+
BargeInCancelToken,
|
|
40
|
+
BargeInSignal,
|
|
41
|
+
BargeInSignalListener,
|
|
42
|
+
VadEvent,
|
|
43
|
+
VadEventListener,
|
|
44
|
+
WordsDetectedSink,
|
|
45
|
+
} from "./types";
|
|
46
|
+
|
|
47
|
+
/** Minimal structural view of `VadDetector` — avoids a module dependency on
|
|
48
|
+
* `vad.ts` (which pulls in the fused `libelizainference` VAD FFI surface). */
|
|
49
|
+
interface VadEventSource {
|
|
50
|
+
onVadEvent(listener: VadEventListener): () => void;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// --- Legacy interfaces (kept; `VoiceScheduler` depends on them) ------------
|
|
54
|
+
|
|
55
|
+
export interface BargeInListener {
|
|
56
|
+
onCancel(): void;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface CancelSignal {
|
|
60
|
+
cancelled: boolean;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// --- New: cancel token --------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
function makeCancelToken(
|
|
66
|
+
reason: BargeInCancelToken["reason"],
|
|
67
|
+
): BargeInCancelToken {
|
|
68
|
+
const controller = new AbortController();
|
|
69
|
+
const token: BargeInCancelToken = {
|
|
70
|
+
cancelled: false,
|
|
71
|
+
reason: null,
|
|
72
|
+
signal: controller.signal,
|
|
73
|
+
};
|
|
74
|
+
const trip = (r: BargeInCancelToken["reason"]) => {
|
|
75
|
+
if (token.cancelled) return;
|
|
76
|
+
token.cancelled = true;
|
|
77
|
+
token.reason = r;
|
|
78
|
+
controller.abort();
|
|
79
|
+
};
|
|
80
|
+
if (reason) trip(reason);
|
|
81
|
+
// Expose the tripper on a non-enumerable slot for the controller to use.
|
|
82
|
+
Object.defineProperty(token, "__trip", { value: trip, enumerable: false });
|
|
83
|
+
return token;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function tripToken(
|
|
87
|
+
token: BargeInCancelToken,
|
|
88
|
+
reason: BargeInCancelToken["reason"],
|
|
89
|
+
): void {
|
|
90
|
+
const trip = (
|
|
91
|
+
token as unknown as { __trip?: (r: BargeInCancelToken["reason"]) => void }
|
|
92
|
+
).__trip;
|
|
93
|
+
if (trip) trip(reason);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export interface BargeInControllerConfig {
|
|
97
|
+
/**
|
|
98
|
+
* After a `speech-active` (TTS paused) with no ASR word confirmation,
|
|
99
|
+
* resume TTS if ASR has not reported ≥1 word within this window. Default
|
|
100
|
+
* 600 ms. Long enough for a streaming ASR partial; short enough that a
|
|
101
|
+
* cough doesn't keep the agent muted.
|
|
102
|
+
*/
|
|
103
|
+
wordsGraceMs?: number;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export class BargeInController implements WordsDetectedSink {
|
|
107
|
+
private readonly listeners = new Set<BargeInListener>();
|
|
108
|
+
private readonly signalListeners = new Set<BargeInSignalListener>();
|
|
109
|
+
private readonly wordsGraceMs: number;
|
|
110
|
+
|
|
111
|
+
/** Legacy single-shot cancel flag, reset by `reset()`. */
|
|
112
|
+
private signal: CancelSignal = { cancelled: false };
|
|
113
|
+
|
|
114
|
+
/** True while the agent's TTS is playing. The turn controller / scheduler
|
|
115
|
+
* flips this via `setAgentSpeaking()`. Barge-in logic only acts while
|
|
116
|
+
* this is true. */
|
|
117
|
+
private agentSpeaking = false;
|
|
118
|
+
/** True while we have emitted `pause-tts` and are waiting on the
|
|
119
|
+
* blip-vs-words decision. */
|
|
120
|
+
private awaitingWordConfirm = false;
|
|
121
|
+
private wordConfirmDeadlineTimer: ReturnType<typeof setTimeout> | null = null;
|
|
122
|
+
private wordConfirmExpiresAtMs: number | null = null;
|
|
123
|
+
private lastEventTimestampMs = 0;
|
|
124
|
+
private vadUnsub: (() => void) | null = null;
|
|
125
|
+
|
|
126
|
+
constructor(config: BargeInControllerConfig = {}) {
|
|
127
|
+
this.wordsGraceMs = config.wordsGraceMs ?? 600;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// --- New subscription API ---------------------------------------------------
|
|
131
|
+
|
|
132
|
+
/** Subscribe to `pause-tts` / `resume-tts` / `hard-stop`. */
|
|
133
|
+
onSignal(listener: BargeInSignalListener): () => void {
|
|
134
|
+
this.signalListeners.add(listener);
|
|
135
|
+
return () => this.signalListeners.delete(listener);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/** Wire this controller to a `VadDetector`. Returns an unsubscribe fn. */
|
|
139
|
+
bindVad(detector: VadEventSource): () => void {
|
|
140
|
+
this.unbindVad();
|
|
141
|
+
this.vadUnsub = detector.onVadEvent((e: VadEvent) => this.onVadEvent(e));
|
|
142
|
+
return () => this.unbindVad();
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
unbindVad(): void {
|
|
146
|
+
if (this.vadUnsub) {
|
|
147
|
+
this.vadUnsub();
|
|
148
|
+
this.vadUnsub = null;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/** The turn controller flips this when TTS starts/stops playing. */
|
|
153
|
+
setAgentSpeaking(speaking: boolean): void {
|
|
154
|
+
if (this.agentSpeaking === speaking) return;
|
|
155
|
+
this.agentSpeaking = speaking;
|
|
156
|
+
if (!speaking) {
|
|
157
|
+
// Agent stopped talking on its own — drop any pending word-confirm.
|
|
158
|
+
this.clearWordConfirm();
|
|
159
|
+
this.awaitingWordConfirm = false;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
get isAgentSpeaking(): boolean {
|
|
164
|
+
return this.agentSpeaking;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// --- VAD event handling -----------------------------------------------------
|
|
168
|
+
|
|
169
|
+
private onVadEvent(event: VadEvent): void {
|
|
170
|
+
this.lastEventTimestampMs = event.timestampMs;
|
|
171
|
+
if (!this.agentSpeaking) return;
|
|
172
|
+
switch (event.type) {
|
|
173
|
+
case "speech-start":
|
|
174
|
+
case "speech-active": {
|
|
175
|
+
if (!this.awaitingWordConfirm) {
|
|
176
|
+
this.awaitingWordConfirm = true;
|
|
177
|
+
this.emitSignal({
|
|
178
|
+
type: "pause-tts",
|
|
179
|
+
timestampMs: event.timestampMs,
|
|
180
|
+
});
|
|
181
|
+
this.armWordConfirmDeadline(event.timestampMs);
|
|
182
|
+
}
|
|
183
|
+
break;
|
|
184
|
+
}
|
|
185
|
+
case "blip": {
|
|
186
|
+
// Definitely not speech — resume immediately.
|
|
187
|
+
if (this.awaitingWordConfirm) {
|
|
188
|
+
this.awaitingWordConfirm = false;
|
|
189
|
+
// Stop the pending auto-resume timer, but keep the ASR grace
|
|
190
|
+
// window alive. A VAD blip decision can arrive before the ASR
|
|
191
|
+
// partial for the same audio; if words land inside the original
|
|
192
|
+
// window, they are authoritative and should still hard-stop.
|
|
193
|
+
this.clearWordConfirm({ keepWindow: true });
|
|
194
|
+
this.emitSignal({
|
|
195
|
+
type: "resume-tts",
|
|
196
|
+
timestampMs: event.timestampMs,
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
case "speech-pause":
|
|
202
|
+
// Still ambiguous; keep TTS paused, wait on ASR / the deadline.
|
|
203
|
+
break;
|
|
204
|
+
case "speech-end": {
|
|
205
|
+
// The Silero VAD considers this a finished segment. If ASR hasn't
|
|
206
|
+
// confirmed words by now, the grace deadline will resume TTS; if it
|
|
207
|
+
// has, `onWordsDetected` already hard-stopped. Nothing extra here.
|
|
208
|
+
break;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// --- ASR word-confirm sink (WordsDetectedSink) ------------------------------
|
|
214
|
+
|
|
215
|
+
onWordsDetected(args: {
|
|
216
|
+
wordCount: number;
|
|
217
|
+
partialText: string;
|
|
218
|
+
timestampMs: number;
|
|
219
|
+
}): void {
|
|
220
|
+
if (args.wordCount < 1) return;
|
|
221
|
+
const withinConfirmWindow =
|
|
222
|
+
this.wordConfirmExpiresAtMs != null &&
|
|
223
|
+
args.timestampMs <= this.wordConfirmExpiresAtMs;
|
|
224
|
+
if (
|
|
225
|
+
!this.agentSpeaking ||
|
|
226
|
+
(!this.awaitingWordConfirm && !withinConfirmWindow)
|
|
227
|
+
) {
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
// Authoritative: real user speech. Hard-stop.
|
|
231
|
+
this.hardStop("barge-in-words", args.timestampMs);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// --- Hard stop --------------------------------------------------------------
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Cancel TTS + abort the in-flight LLM / drafter generation. Returns the
|
|
238
|
+
* `BargeInCancelToken` whose `signal` the engine layer aborts on. Idempotent
|
|
239
|
+
* within a single barge-in episode — calling it again returns the same
|
|
240
|
+
* token until `reset()`.
|
|
241
|
+
*/
|
|
242
|
+
hardStop(
|
|
243
|
+
reason: NonNullable<BargeInCancelToken["reason"]> = "manual",
|
|
244
|
+
timestampMs: number = this.lastEventTimestampMs || Date.now(),
|
|
245
|
+
): BargeInCancelToken {
|
|
246
|
+
this.clearWordConfirm();
|
|
247
|
+
this.awaitingWordConfirm = false;
|
|
248
|
+
if (!this.activeToken) {
|
|
249
|
+
this.activeToken = makeCancelToken(null);
|
|
250
|
+
}
|
|
251
|
+
tripToken(this.activeToken, reason);
|
|
252
|
+
// Legacy cancel flag + listeners.
|
|
253
|
+
this.signal.cancelled = true;
|
|
254
|
+
for (const l of this.listeners) l.onCancel();
|
|
255
|
+
this.emitSignal({
|
|
256
|
+
type: "hard-stop",
|
|
257
|
+
timestampMs,
|
|
258
|
+
token: this.activeToken,
|
|
259
|
+
});
|
|
260
|
+
return this.activeToken;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
private activeToken: BargeInCancelToken | null = null;
|
|
264
|
+
|
|
265
|
+
/** The cancel token for the current barge-in episode (null until a
|
|
266
|
+
* `hard-stop`). The engine threads `.signal` into generation. */
|
|
267
|
+
currentCancelToken(): BargeInCancelToken | null {
|
|
268
|
+
return this.activeToken;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// --- Legacy API (VoiceScheduler / EngineVoiceBridge) ------------------------
|
|
272
|
+
|
|
273
|
+
/** @deprecated Use `currentCancelToken()`; kept for `VoiceScheduler`. */
|
|
274
|
+
cancelSignal(): CancelSignal {
|
|
275
|
+
return this.signal;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
attach(listener: BargeInListener): () => void {
|
|
279
|
+
this.listeners.add(listener);
|
|
280
|
+
return () => this.listeners.delete(listener);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/** @deprecated Equivalent to `hardStop("manual")`; kept for the bridge. */
|
|
284
|
+
onMicActive(): void {
|
|
285
|
+
this.hardStop("manual");
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
reset(): void {
|
|
289
|
+
this.clearWordConfirm();
|
|
290
|
+
this.awaitingWordConfirm = false;
|
|
291
|
+
this.activeToken = null;
|
|
292
|
+
this.signal = { cancelled: false };
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// --- internals --------------------------------------------------------------
|
|
296
|
+
|
|
297
|
+
private emitSignal(signal: BargeInSignal): void {
|
|
298
|
+
for (const l of this.signalListeners) l(signal);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
private armWordConfirmDeadline(timestampMs: number): void {
|
|
302
|
+
this.clearWordConfirm();
|
|
303
|
+
this.wordConfirmExpiresAtMs = timestampMs + this.wordsGraceMs;
|
|
304
|
+
this.wordConfirmDeadlineTimer = setTimeout(() => {
|
|
305
|
+
this.wordConfirmDeadlineTimer = null;
|
|
306
|
+
if (this.awaitingWordConfirm && this.agentSpeaking) {
|
|
307
|
+
// ASR never confirmed a word — the Silero VAD let through
|
|
308
|
+
// non-speech. Resume TTS.
|
|
309
|
+
this.awaitingWordConfirm = false;
|
|
310
|
+
this.emitSignal({
|
|
311
|
+
type: "resume-tts",
|
|
312
|
+
timestampMs: timestampMs + this.wordsGraceMs,
|
|
313
|
+
});
|
|
314
|
+
}
|
|
315
|
+
this.wordConfirmExpiresAtMs = null;
|
|
316
|
+
}, this.wordsGraceMs);
|
|
317
|
+
// Don't keep the event loop alive on this timer.
|
|
318
|
+
if (
|
|
319
|
+
this.wordConfirmDeadlineTimer &&
|
|
320
|
+
typeof (this.wordConfirmDeadlineTimer as { unref?: () => void }).unref ===
|
|
321
|
+
"function"
|
|
322
|
+
) {
|
|
323
|
+
(this.wordConfirmDeadlineTimer as { unref: () => void }).unref();
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
private clearWordConfirm(options: { keepWindow?: boolean } = {}): void {
|
|
328
|
+
if (this.wordConfirmDeadlineTimer) {
|
|
329
|
+
clearTimeout(this.wordConfirmDeadlineTimer);
|
|
330
|
+
this.wordConfirmDeadlineTimer = null;
|
|
331
|
+
}
|
|
332
|
+
if (!options.keepWindow) {
|
|
333
|
+
this.wordConfirmExpiresAtMs = null;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice cancellation coordinator — Wave 3 W3-9.
|
|
3
|
+
*
|
|
4
|
+
* Single brain that owns one `VoiceCancellationToken` per active voice turn
|
|
5
|
+
* and binds every cancellation source into it:
|
|
6
|
+
*
|
|
7
|
+
* 1. VAD start-of-speech while the agent is speaking (barge-in).
|
|
8
|
+
* 2. `BargeInController.hardStop` (ASR-confirmed barge-in words).
|
|
9
|
+
* 3. Turn-detector EOT revocation (user resumed mid-tentative-pause).
|
|
10
|
+
* 4. Runtime turn abort (`TurnControllerRegistry` "aborted" event).
|
|
11
|
+
*
|
|
12
|
+
* On any cancel, it fans out to:
|
|
13
|
+
*
|
|
14
|
+
* 1. The voice token's `AbortSignal` — every fetch / model call wired to
|
|
15
|
+
* `signal` aborts at the next yield point.
|
|
16
|
+
* 2. `runtime.turnControllers.abortTurn(roomId, reason)` — the runtime's
|
|
17
|
+
* planner-loop / action handlers see the abort within one tick
|
|
18
|
+
* (between model calls / between actions / between provider calls).
|
|
19
|
+
* 3. Optional `slotAbort(slotId)` — invokes the registered LM
|
|
20
|
+
* slot-abort callback (typically `MtpLlamaServer.abortSlot` which
|
|
21
|
+
* either aborts in-flight HTTP fetches against that slot or, on a
|
|
22
|
+
* capable fork, calls the slot-cancel REST route).
|
|
23
|
+
* 4. Optional `ttsStop()` — invokes the registered TTS-stop callback
|
|
24
|
+
* (typically `EngineVoiceBridge.triggerBargeIn` which drains the
|
|
25
|
+
* audio sink + cancels the FFI/HTTP synthesis path).
|
|
26
|
+
*
|
|
27
|
+
* The coordinator is intentionally a plain class — no engine coupling. The
|
|
28
|
+
* engine bridge (and tests) construct one with the structural runtime + the
|
|
29
|
+
* appropriate callbacks.
|
|
30
|
+
*/
|
|
31
|
+
import { type VoiceCancellationReason, VoiceCancellationRegistry, type VoiceCancellationToken } from "@elizaos/shared";
|
|
32
|
+
/**
|
|
33
|
+
* Minimum runtime surface this coordinator needs. Matches a subset of
|
|
34
|
+
* `AgentRuntime.turnControllers`. Structural so unit tests can pass a fake.
|
|
35
|
+
*/
|
|
36
|
+
export interface CoordinatorRuntime {
|
|
37
|
+
turnControllers: {
|
|
38
|
+
abortTurn(roomId: string, reason: string): boolean;
|
|
39
|
+
onEvent(listener: (event: {
|
|
40
|
+
type: "started" | "completed" | "errored" | "aborted" | "aborted-cleanup";
|
|
41
|
+
roomId: string;
|
|
42
|
+
reason?: string;
|
|
43
|
+
}) => void): () => void;
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
export interface VoiceCancellationCoordinatorOptions {
|
|
47
|
+
/** The runtime to bind to. */
|
|
48
|
+
runtime: CoordinatorRuntime;
|
|
49
|
+
/**
|
|
50
|
+
* Abort the inference server slot. Wired to `MtpLlamaServer.abortSlot`
|
|
51
|
+
* in production. Async — the coordinator does NOT await it (the slot
|
|
52
|
+
* abort path is best-effort; the AbortSignal closure on the fetch is the
|
|
53
|
+
* authoritative cancel).
|
|
54
|
+
*/
|
|
55
|
+
slotAbort?: (slotId: number, reason: VoiceCancellationReason) => void;
|
|
56
|
+
/**
|
|
57
|
+
* Hard-stop the TTS pipeline (audio sink drain + FFI/HTTP synthesis
|
|
58
|
+
* cancel). Wired to `EngineVoiceBridge.triggerBargeIn`. Synchronous —
|
|
59
|
+
* the audio sink drain MUST happen within one tick of `abort()`.
|
|
60
|
+
*/
|
|
61
|
+
ttsStop?: (reason: VoiceCancellationReason) => void;
|
|
62
|
+
/**
|
|
63
|
+
* Optional pre-existing registry. Tests inject one to inspect token
|
|
64
|
+
* lifecycle directly. Production creates a fresh registry per session.
|
|
65
|
+
*/
|
|
66
|
+
registry?: VoiceCancellationRegistry;
|
|
67
|
+
}
|
|
68
|
+
export declare class VoiceCancellationCoordinator {
|
|
69
|
+
private readonly runtime;
|
|
70
|
+
private readonly slotAbort;
|
|
71
|
+
private readonly ttsStop;
|
|
72
|
+
private readonly registry;
|
|
73
|
+
/** Active turns keyed by roomId. One per room. */
|
|
74
|
+
private readonly armed;
|
|
75
|
+
constructor(opts: VoiceCancellationCoordinatorOptions);
|
|
76
|
+
/**
|
|
77
|
+
* Begin a new voice turn for `roomId`. If a previous turn was active,
|
|
78
|
+
* it is aborted with `"external"` (the regular replace-on-arm semantics
|
|
79
|
+
* inherited from `VoiceCancellationRegistry`).
|
|
80
|
+
*/
|
|
81
|
+
armTurn(args: {
|
|
82
|
+
roomId: string;
|
|
83
|
+
runId: string;
|
|
84
|
+
slot?: number;
|
|
85
|
+
}): VoiceCancellationToken;
|
|
86
|
+
/** Fetch the current voice token for `roomId`, or null. */
|
|
87
|
+
current(roomId: string): VoiceCancellationToken | null;
|
|
88
|
+
/** Snapshot of armed room ids. */
|
|
89
|
+
armedRoomIds(): string[];
|
|
90
|
+
/**
|
|
91
|
+
* Abort the active turn for `roomId` with the given reason. Idempotent.
|
|
92
|
+
* Returns true when a live token was aborted.
|
|
93
|
+
*/
|
|
94
|
+
abort(roomId: string, reason: VoiceCancellationReason): boolean;
|
|
95
|
+
/**
|
|
96
|
+
* Trip the active token because VAD reported start-of-speech while the
|
|
97
|
+
* agent was speaking. Equivalent to `abort(roomId, "barge-in")` but
|
|
98
|
+
* keeps the call-site grep-able as the canonical barge-in entry point.
|
|
99
|
+
*/
|
|
100
|
+
bargeIn(roomId: string): boolean;
|
|
101
|
+
/**
|
|
102
|
+
* Trip the active token because the turn detector revoked the previous
|
|
103
|
+
* EOT decision (user resumed within the rollback window).
|
|
104
|
+
*/
|
|
105
|
+
revokeEot(roomId: string): boolean;
|
|
106
|
+
/**
|
|
107
|
+
* Wire a `BargeInController.onSignal` listener into this coordinator.
|
|
108
|
+
* The controller emits `hard-stop` when ASR confirms barge-in words;
|
|
109
|
+
* this glue translates it into `coordinator.bargeIn(roomId)` so the
|
|
110
|
+
* canonical token (and every downstream consumer) sees the abort.
|
|
111
|
+
*
|
|
112
|
+
* Returns the unsubscribe function from `onSignal`. Production callers
|
|
113
|
+
* (the engine bridge) call this once per `BargeInController` per
|
|
114
|
+
* room and keep the handle until session teardown.
|
|
115
|
+
*/
|
|
116
|
+
bindBargeInController(roomId: string, controller: {
|
|
117
|
+
onSignal(listener: (signal: {
|
|
118
|
+
type: string;
|
|
119
|
+
}) => void): () => void;
|
|
120
|
+
}): () => void;
|
|
121
|
+
/**
|
|
122
|
+
* Tear down. Cancels every armed turn and unsubscribes from the
|
|
123
|
+
* runtime. Safe to call multiple times.
|
|
124
|
+
*/
|
|
125
|
+
dispose(): void;
|
|
126
|
+
}
|
|
127
|
+
//# sourceMappingURL=cancellation-coordinator.d.ts.map
|