@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,727 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice state machine — explicit state-machine wrapper that drives the
|
|
3
|
+
* optimistic-rollback path the C1 checkpoint enables.
|
|
4
|
+
*
|
|
5
|
+
* This is the thin layer the brief calls for: the existing
|
|
6
|
+
* `OptimisticRollbackController` already covers the pre-draft / draft-response
|
|
7
|
+
* path; this module adds the SPEAKING state and the barge-in restore path on
|
|
8
|
+
* top, exposed as a single state-machine surface (`getState`, `dispatch`)
|
|
9
|
+
* that the voice loop drives from VAD events + scheduler events.
|
|
10
|
+
*
|
|
11
|
+
* IDLE
|
|
12
|
+
* │ speech-start
|
|
13
|
+
* ▼
|
|
14
|
+
* LISTENING ────────────── speech-pause ────────────▶ PAUSE_TENTATIVE
|
|
15
|
+
* ▲ │
|
|
16
|
+
* │ speech-active (within 2× hangover) │
|
|
17
|
+
* │ ──── discard C1 ──────────────────────────────────────┤
|
|
18
|
+
* │ │
|
|
19
|
+
* │ │ speech-end
|
|
20
|
+
* │ ▼
|
|
21
|
+
* │ SPEAKING
|
|
22
|
+
* │ │
|
|
23
|
+
* │ ◀──── restore C1, re-enter LISTENING ───── barge-in ──┘
|
|
24
|
+
*
|
|
25
|
+
* Key transitions (all wired to a `CheckpointManagerLike`):
|
|
26
|
+
*
|
|
27
|
+
* - `speech-pause` (LISTENING → PAUSE_TENTATIVE)
|
|
28
|
+
* Save checkpoint named "pre-draft" (C1). Kick the drafter on the
|
|
29
|
+
* current partial transcript via the caller-supplied `startDrafter`.
|
|
30
|
+
*
|
|
31
|
+
* - `speech-active` within 2× hangover (PAUSE_TENTATIVE → LISTENING)
|
|
32
|
+
* Discard C1. Abort the speculative drafter. No rollback is required
|
|
33
|
+
* because the drafter's KV writes were speculative against a snapshot
|
|
34
|
+
* we never committed.
|
|
35
|
+
*
|
|
36
|
+
* - `speech-end` (PAUSE_TENTATIVE → SPEAKING)
|
|
37
|
+
* Commit the ASR final. The drafter's output is promoted: callers
|
|
38
|
+
* wire the verifier on top via `onCommit`. **Retain** C1 — a barge-in
|
|
39
|
+
* while the agent is speaking must roll the KV cache back to the
|
|
40
|
+
* pre-draft point so the next user turn doesn't see the agent's own
|
|
41
|
+
* half-spoken response in the prompt.
|
|
42
|
+
*
|
|
43
|
+
* - `barge-in` (SPEAKING → LISTENING)
|
|
44
|
+
* Restore C1. Hand the new user speech to the next LISTENING turn.
|
|
45
|
+
* The same C1 may be restored multiple times — useful when two
|
|
46
|
+
* consecutive barge-ins land before the next checkpoint is taken
|
|
47
|
+
* (the test suite covers this).
|
|
48
|
+
*
|
|
49
|
+
* No fallback sludge: a checkpoint failure surfaces via `onError`. The
|
|
50
|
+
* state machine never silently downgrades to a non-checkpointed path —
|
|
51
|
+
* callers turn the feature off via the constructor option.
|
|
52
|
+
*/
|
|
53
|
+
|
|
54
|
+
import type {
|
|
55
|
+
CheckpointHandle,
|
|
56
|
+
CheckpointManagerLike,
|
|
57
|
+
} from "./checkpoint-manager";
|
|
58
|
+
import type { ContextPartial } from "./eager-context-builder";
|
|
59
|
+
import {
|
|
60
|
+
EOT_COMMIT_SILENCE_MS,
|
|
61
|
+
EOT_COMMIT_THRESHOLD,
|
|
62
|
+
EOT_HANGOVER_EXTENSION_MS,
|
|
63
|
+
EOT_MID_CLAUSE_THRESHOLD,
|
|
64
|
+
EOT_TENTATIVE_SILENCE_MS,
|
|
65
|
+
EOT_TENTATIVE_THRESHOLD,
|
|
66
|
+
type EotClassifier,
|
|
67
|
+
} from "./eot-classifier";
|
|
68
|
+
import type { OptimisticGenerationPolicy } from "./optimistic-policy";
|
|
69
|
+
import {
|
|
70
|
+
type PrefillOptimisticOptions,
|
|
71
|
+
type PrefillOptimisticResult,
|
|
72
|
+
prefillOptimistic,
|
|
73
|
+
} from "./prefill-client";
|
|
74
|
+
|
|
75
|
+
/** Public state. Closed union — exhaustive switches catch new variants. */
|
|
76
|
+
export type VoiceState = "IDLE" | "LISTENING" | "PAUSE_TENTATIVE" | "SPEAKING";
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Events that drive the state machine. Wall-clock timestamps are caller-
|
|
80
|
+
* supplied so the machine is testable without a fake clock.
|
|
81
|
+
*/
|
|
82
|
+
export type VoiceStateEvent =
|
|
83
|
+
| { type: "speech-start"; timestampMs: number }
|
|
84
|
+
| { type: "speech-pause"; timestampMs: number; partialTranscript: string }
|
|
85
|
+
| { type: "speech-active"; timestampMs: number }
|
|
86
|
+
| { type: "speech-end"; timestampMs: number; finalTranscript: string }
|
|
87
|
+
| { type: "barge-in"; timestampMs: number }
|
|
88
|
+
/**
|
|
89
|
+
* Tier-3 — streamed partial transcript chunk from the ASR. When an
|
|
90
|
+
* `eotClassifier` is configured the machine will run `checkEot()` and may
|
|
91
|
+
* transition to PAUSE_TENTATIVE early or commit immediately depending on
|
|
92
|
+
* the returned probability and the elapsed silence since the last speech
|
|
93
|
+
* audio frame (provided by the caller via `silenceSinceMs`).
|
|
94
|
+
*/
|
|
95
|
+
| {
|
|
96
|
+
type: "partial-transcript";
|
|
97
|
+
timestampMs: number;
|
|
98
|
+
text: string;
|
|
99
|
+
/** Milliseconds of silence elapsed since the last speech audio frame. */
|
|
100
|
+
silenceSinceMs: number;
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Reason a speculative drafter handle was aborted by the state machine.
|
|
105
|
+
*
|
|
106
|
+
* - `resumed` — `speech-active` re-entered LISTENING; the draft was
|
|
107
|
+
* speculative against a transcript that turned out to be
|
|
108
|
+
* still provisional.
|
|
109
|
+
* - `barge-in` — the user interrupted while the agent was speaking; the
|
|
110
|
+
* draft's downstream TTS has already been hard-stopped.
|
|
111
|
+
* - `shutdown` — `dispose()` was called.
|
|
112
|
+
*/
|
|
113
|
+
export type DrafterAbortReason = "resumed" | "barge-in" | "shutdown";
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Handle returned by `startDrafter`. The state machine calls `abort()`
|
|
117
|
+
* (idempotent) when the draft must be cancelled.
|
|
118
|
+
*/
|
|
119
|
+
export interface DrafterHandle {
|
|
120
|
+
abort(reason: DrafterAbortReason): void;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Caller-supplied drafter starter. Receives the partial transcript at the
|
|
125
|
+
* `speech-pause` instant and a turn id. Must return synchronously; the
|
|
126
|
+
* draft itself runs in the background until the state machine calls
|
|
127
|
+
* `abort()` or the draft completes (which is observed via `onCommit`).
|
|
128
|
+
*/
|
|
129
|
+
export type StartDrafterFn = (args: {
|
|
130
|
+
partialTranscript: string;
|
|
131
|
+
turnId: string;
|
|
132
|
+
/** Aborted when the drafter must be cancelled. */
|
|
133
|
+
signal: AbortSignal;
|
|
134
|
+
}) => DrafterHandle;
|
|
135
|
+
|
|
136
|
+
export interface VoiceStateMachineEvents {
|
|
137
|
+
/** State transition occurred. Called AFTER the new state is set. */
|
|
138
|
+
onStateChange?(prev: VoiceState, next: VoiceState, turnId: string): void;
|
|
139
|
+
/** Speculative drafter was started on `speech-pause`. */
|
|
140
|
+
onDrafterStart?(turnId: string, partialTranscript: string): void;
|
|
141
|
+
/** Speculative drafter was aborted (resumed / barge-in / shutdown). */
|
|
142
|
+
onDrafterAbort?(turnId: string, reason: DrafterAbortReason): void;
|
|
143
|
+
/**
|
|
144
|
+
* `speech-end` reached SPEAKING. The verifier should now run on top of
|
|
145
|
+
* the speculative drafter output against the final transcript.
|
|
146
|
+
*
|
|
147
|
+
* `prefillResult` is present when the C7 optimistic prefill completed
|
|
148
|
+
* before `speech-end` arrived. The verifier can resume generation from
|
|
149
|
+
* `prefillResult.checkpointHandle` to skip one full prefill RTT.
|
|
150
|
+
*/
|
|
151
|
+
onCommit?(
|
|
152
|
+
turnId: string,
|
|
153
|
+
finalTranscript: string,
|
|
154
|
+
prefillResult?: PrefillOptimisticResult,
|
|
155
|
+
): void;
|
|
156
|
+
/**
|
|
157
|
+
* A barge-in restored C1. The voice loop should drop any in-flight TTS
|
|
158
|
+
* (separate concern owned by the barge-in controller) and begin a new
|
|
159
|
+
* LISTENING turn with the new user audio.
|
|
160
|
+
*/
|
|
161
|
+
onRollback?(turnId: string, restoredFrom: CheckpointHandle): void;
|
|
162
|
+
/**
|
|
163
|
+
* Surfaced when `CheckpointManager.{save,restore,discard}` rejects.
|
|
164
|
+
* The state machine continues — checkpoint failures must not break the
|
|
165
|
+
* voice loop — but the operator can flip the feature flag off in
|
|
166
|
+
* response.
|
|
167
|
+
*/
|
|
168
|
+
onError?(
|
|
169
|
+
op: "save" | "restore" | "discard",
|
|
170
|
+
error: unknown,
|
|
171
|
+
turnId: string,
|
|
172
|
+
): void;
|
|
173
|
+
/**
|
|
174
|
+
* Fired when the Tier-3 EOT classifier scores a partial transcript.
|
|
175
|
+
* Useful for telemetry and debugging — P values are emitted before the
|
|
176
|
+
* state machine decides whether to act on them.
|
|
177
|
+
*/
|
|
178
|
+
onEotScore?(turnId: string, text: string, pDone: number): void;
|
|
179
|
+
/**
|
|
180
|
+
* Fired when the C7 optimistic prefill completes (either successfully or
|
|
181
|
+
* with an error). On success `result` is set; on error `error` is set.
|
|
182
|
+
* The state machine never blocks on the prefill result — it resolves or
|
|
183
|
+
* rejects in the background while PAUSE_TENTATIVE is active.
|
|
184
|
+
*/
|
|
185
|
+
onPrefill?(
|
|
186
|
+
turnId: string,
|
|
187
|
+
result: PrefillOptimisticResult | null,
|
|
188
|
+
error: unknown | null,
|
|
189
|
+
): void;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export interface VoiceStateMachineOptions {
|
|
193
|
+
/** Slot identifier for the conversation pinning. */
|
|
194
|
+
slotId: string;
|
|
195
|
+
/**
|
|
196
|
+
* Whether to actually call into the `CheckpointManager`. When `false`,
|
|
197
|
+
* the state machine still transitions through the same states but
|
|
198
|
+
* never saves/restores. Default `true` — callers turn the feature off
|
|
199
|
+
* here when upstream `--ctx-checkpoints` is unavailable.
|
|
200
|
+
*/
|
|
201
|
+
enableCheckpoints?: boolean;
|
|
202
|
+
/**
|
|
203
|
+
* VAD pause hangover (ms). The rollback window is `2 × hangoverMs`. If
|
|
204
|
+
* a `speech-active` arrives later than this after a `speech-pause`, the
|
|
205
|
+
* machine treats the pause as a real speech-end equivalent (it commits
|
|
206
|
+
* instead of discarding).
|
|
207
|
+
*/
|
|
208
|
+
pauseHangoverMs?: number;
|
|
209
|
+
checkpointManager: CheckpointManagerLike;
|
|
210
|
+
/** Drafter starter — see `StartDrafterFn`. */
|
|
211
|
+
startDrafter: StartDrafterFn;
|
|
212
|
+
/** Events sink. */
|
|
213
|
+
events?: VoiceStateMachineEvents;
|
|
214
|
+
/**
|
|
215
|
+
* Tier-3 semantic EOT classifier. When provided, partial transcripts are
|
|
216
|
+
* scored on each `partial-transcript` dispatch:
|
|
217
|
+
*
|
|
218
|
+
* P ≥ 0.9 AND silence ≥ 50 ms → commit immediately (skip remaining hangover)
|
|
219
|
+
* P ≥ 0.6 AND silence ≥ 20 ms → enter PAUSE_TENTATIVE early (start drafter)
|
|
220
|
+
* P < 0.4 → extend hangover by 50 ms (user is mid-clause)
|
|
221
|
+
*
|
|
222
|
+
* When absent the machine behaves as before (tiers 1 + 2 only).
|
|
223
|
+
*/
|
|
224
|
+
eotClassifier?: EotClassifier;
|
|
225
|
+
/**
|
|
226
|
+
* C7 — optimistic prefill configuration. When provided the machine fires
|
|
227
|
+
* `prefillOptimistic` on `PAUSE_TENTATIVE` entry (fire-and-forget) so the
|
|
228
|
+
* KV cache is pre-warmed with the partial transcript by the time ASR
|
|
229
|
+
* finalizes. The prefill result is passed to `onCommit` via `prefillResult`.
|
|
230
|
+
*
|
|
231
|
+
* Omit to disable the prefill path entirely.
|
|
232
|
+
*/
|
|
233
|
+
prefillConfig?: {
|
|
234
|
+
/** Base URL of the llama-server (`http://host:port`). */
|
|
235
|
+
baseUrl: string;
|
|
236
|
+
/** `CheckpointManager` options forwarded to `prefillOptimistic`. */
|
|
237
|
+
checkpointOptions?: Omit<PrefillOptimisticOptions, "checkpointManager">;
|
|
238
|
+
/**
|
|
239
|
+
* Optional deterministic context from `EagerContextBuilder` (C3).
|
|
240
|
+
* When supplied, the prefill `/completion` call includes the system
|
|
241
|
+
* prompt + conversation history so the KV cache is maximally warm.
|
|
242
|
+
*/
|
|
243
|
+
getContext?: () => ContextPartial | null;
|
|
244
|
+
};
|
|
245
|
+
/**
|
|
246
|
+
* W3-9 / F1 — optional optimistic-generation policy. When provided, the
|
|
247
|
+
* machine consults `policy.shouldStartOptimisticLm(eotProb)` at the
|
|
248
|
+
* `firePrefill` site before kicking off the speculative prefill. When
|
|
249
|
+
* the policy says no (e.g. on battery, or below the configured EOT
|
|
250
|
+
* threshold) `firePrefill` is a no-op and `handleSpeechEnd` falls back
|
|
251
|
+
* to a regular (non-prefilled) verifier pass. Omit to keep the prior
|
|
252
|
+
* behaviour (fire on every PAUSE_TENTATIVE entry regardless of EOT
|
|
253
|
+
* probability).
|
|
254
|
+
*/
|
|
255
|
+
optimisticPolicy?: OptimisticGenerationPolicy;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Lowered from 220ms; further reduction gated on semantic EOT classifier (V2).
|
|
259
|
+
const DEFAULT_PAUSE_HANGOVER_MS = 100;
|
|
260
|
+
const ROLLBACK_WINDOW_MULTIPLIER = 2;
|
|
261
|
+
const C1_NAME = "pre-draft";
|
|
262
|
+
|
|
263
|
+
interface ActiveDraft {
|
|
264
|
+
handle: DrafterHandle;
|
|
265
|
+
controller: AbortController;
|
|
266
|
+
turnId: string;
|
|
267
|
+
/** Partial transcript captured at speech-pause. */
|
|
268
|
+
partial: string;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Explicit state-machine implementation. Stateful (state + active
|
|
273
|
+
* checkpoint + drafter handle); methods are NOT thread-safe — call them
|
|
274
|
+
* from a single event loop.
|
|
275
|
+
*/
|
|
276
|
+
export class VoiceStateMachine {
|
|
277
|
+
private readonly slotId: string;
|
|
278
|
+
private readonly enabled: boolean;
|
|
279
|
+
private readonly pauseHangoverMs: number;
|
|
280
|
+
private readonly mgr: CheckpointManagerLike;
|
|
281
|
+
private readonly startDrafterFn: StartDrafterFn;
|
|
282
|
+
private readonly events: VoiceStateMachineEvents;
|
|
283
|
+
/** Tier-3 semantic EOT classifier. Optional — omit for tiers 1+2 only. */
|
|
284
|
+
private readonly eotClassifier: EotClassifier | undefined;
|
|
285
|
+
|
|
286
|
+
private state: VoiceState = "IDLE";
|
|
287
|
+
private turnCounter = 0;
|
|
288
|
+
/** Most recent C1 handle. Retained across `speech-end` until barge-in or next IDLE. */
|
|
289
|
+
private checkpoint: CheckpointHandle | null = null;
|
|
290
|
+
private activeDraft: ActiveDraft | null = null;
|
|
291
|
+
private pauseTimestampMs: number | null = null;
|
|
292
|
+
private disposed = false;
|
|
293
|
+
/**
|
|
294
|
+
* Accumulated hangover extension from EOT mid-clause detections (ms).
|
|
295
|
+
* Reset on each new turn (speech-start). Added to the effective hangover
|
|
296
|
+
* so that consecutive mid-clause detections stack.
|
|
297
|
+
*/
|
|
298
|
+
private eotHangoverExtensionMs = 0;
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* C7 — in-flight prefill promise. Set on PAUSE_TENTATIVE entry; awaited
|
|
302
|
+
* (or discarded) on SPEECH_END / SPEECH_ACTIVE_REBOUND. Fire-and-forget
|
|
303
|
+
* from the perspective of the state machine — the result is surfaced via
|
|
304
|
+
* `onPrefill` and `onCommit(prefillResult)`.
|
|
305
|
+
*/
|
|
306
|
+
private prefillPromise: Promise<PrefillOptimisticResult> | null = null;
|
|
307
|
+
private readonly prefillConfig: VoiceStateMachineOptions["prefillConfig"];
|
|
308
|
+
/** W3-9 / F1 — optimistic-generation policy gate for `firePrefill`. */
|
|
309
|
+
private readonly optimisticPolicy: OptimisticGenerationPolicy | undefined;
|
|
310
|
+
/**
|
|
311
|
+
* Most recently observed EOT probability from the Tier-3 classifier.
|
|
312
|
+
* Used as the `eotProb` argument to `prefillOptimistic` when PAUSE_TENTATIVE
|
|
313
|
+
* is entered. Starts at 0.5 (uncertain). Updated on each `partial-transcript`
|
|
314
|
+
* event when an EOT classifier is wired.
|
|
315
|
+
*/
|
|
316
|
+
private latestEotProb = 0.5;
|
|
317
|
+
|
|
318
|
+
constructor(opts: VoiceStateMachineOptions) {
|
|
319
|
+
this.slotId = opts.slotId;
|
|
320
|
+
this.enabled = opts.enableCheckpoints ?? true;
|
|
321
|
+
this.pauseHangoverMs = opts.pauseHangoverMs ?? DEFAULT_PAUSE_HANGOVER_MS;
|
|
322
|
+
this.mgr = opts.checkpointManager;
|
|
323
|
+
this.startDrafterFn = opts.startDrafter;
|
|
324
|
+
this.events = opts.events ?? {};
|
|
325
|
+
this.eotClassifier = opts.eotClassifier;
|
|
326
|
+
this.prefillConfig = opts.prefillConfig;
|
|
327
|
+
this.optimisticPolicy = opts.optimisticPolicy;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/** Current state — read-only view for tests / telemetry. */
|
|
331
|
+
getState(): VoiceState {
|
|
332
|
+
return this.state;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/** Internal turn id for the current turn. Stable across pause/active. */
|
|
336
|
+
getTurnId(): string {
|
|
337
|
+
return turnIdString(this.turnCounter);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* Active checkpoint handle, if any. Exposed for tests; production code
|
|
342
|
+
* should use `onCommit` / `onRollback` events instead.
|
|
343
|
+
*/
|
|
344
|
+
getActiveCheckpoint(): CheckpointHandle | null {
|
|
345
|
+
return this.checkpoint;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Accumulated EOT hangover extension (ms). The `VadDetector` (Tier 2)
|
|
350
|
+
* should add this to its effective pause hangover so mid-clause pauses
|
|
351
|
+
* are not committed prematurely. Resets to 0 on each `speech-start`.
|
|
352
|
+
*/
|
|
353
|
+
getEotHangoverExtensionMs(): number {
|
|
354
|
+
return this.eotHangoverExtensionMs;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Drive the machine. Returns a promise that resolves after any async
|
|
359
|
+
* checkpoint work for this event finishes (await it in tests for
|
|
360
|
+
* deterministic assertions). Callers in production may ignore the
|
|
361
|
+
* returned promise — events fire synchronously regardless.
|
|
362
|
+
*/
|
|
363
|
+
async dispatch(event: VoiceStateEvent): Promise<void> {
|
|
364
|
+
if (this.disposed) return;
|
|
365
|
+
switch (event.type) {
|
|
366
|
+
case "speech-start":
|
|
367
|
+
return this.handleSpeechStart();
|
|
368
|
+
case "speech-pause":
|
|
369
|
+
return this.handleSpeechPause(
|
|
370
|
+
event.timestampMs,
|
|
371
|
+
event.partialTranscript,
|
|
372
|
+
);
|
|
373
|
+
case "speech-active":
|
|
374
|
+
return this.handleSpeechActive(event.timestampMs);
|
|
375
|
+
case "speech-end":
|
|
376
|
+
return this.handleSpeechEnd(event.finalTranscript);
|
|
377
|
+
case "barge-in":
|
|
378
|
+
return this.handleBargeIn();
|
|
379
|
+
case "partial-transcript":
|
|
380
|
+
return this.handlePartialTranscript(
|
|
381
|
+
event.timestampMs,
|
|
382
|
+
event.text,
|
|
383
|
+
event.silenceSinceMs,
|
|
384
|
+
);
|
|
385
|
+
default: {
|
|
386
|
+
const _exhaustive: never = event;
|
|
387
|
+
void _exhaustive;
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
/**
|
|
393
|
+
* Tear down: abort any in-flight drafter, discard the live checkpoint.
|
|
394
|
+
* Safe to call multiple times. After `dispose` the machine ignores
|
|
395
|
+
* further events.
|
|
396
|
+
*/
|
|
397
|
+
async dispose(): Promise<void> {
|
|
398
|
+
if (this.disposed) return;
|
|
399
|
+
this.disposed = true;
|
|
400
|
+
if (this.activeDraft) {
|
|
401
|
+
this.activeDraft.controller.abort();
|
|
402
|
+
this.activeDraft.handle.abort("shutdown");
|
|
403
|
+
this.events.onDrafterAbort?.(this.activeDraft.turnId, "shutdown");
|
|
404
|
+
this.activeDraft = null;
|
|
405
|
+
}
|
|
406
|
+
if (this.checkpoint && this.enabled) {
|
|
407
|
+
const handle = this.checkpoint;
|
|
408
|
+
this.checkpoint = null;
|
|
409
|
+
try {
|
|
410
|
+
await this.mgr.discardCheckpoint(handle);
|
|
411
|
+
} catch (error) {
|
|
412
|
+
this.events.onError?.("discard", error, this.getTurnId());
|
|
413
|
+
}
|
|
414
|
+
} else {
|
|
415
|
+
this.checkpoint = null;
|
|
416
|
+
}
|
|
417
|
+
this.setState("IDLE");
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// --- handlers --------------------------------------------------------
|
|
421
|
+
|
|
422
|
+
private handleSpeechStart(): void {
|
|
423
|
+
if (this.state === "IDLE") {
|
|
424
|
+
this.turnCounter += 1;
|
|
425
|
+
}
|
|
426
|
+
this.pauseTimestampMs = null;
|
|
427
|
+
this.eotHangoverExtensionMs = 0;
|
|
428
|
+
this.latestEotProb = 0.5;
|
|
429
|
+
this.prefillPromise = null;
|
|
430
|
+
this.setState("LISTENING");
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
private async handleSpeechPause(
|
|
434
|
+
timestampMs: number,
|
|
435
|
+
partialTranscript: string,
|
|
436
|
+
): Promise<void> {
|
|
437
|
+
if (this.state !== "LISTENING") return;
|
|
438
|
+
this.pauseTimestampMs = timestampMs;
|
|
439
|
+
this.setState("PAUSE_TENTATIVE");
|
|
440
|
+
const turnId = this.getTurnId();
|
|
441
|
+
|
|
442
|
+
if (this.enabled) {
|
|
443
|
+
try {
|
|
444
|
+
this.checkpoint = await this.mgr.saveCheckpoint(this.slotId, C1_NAME);
|
|
445
|
+
} catch (error) {
|
|
446
|
+
// Continue without a checkpoint — the drafter still runs, but a
|
|
447
|
+
// barge-in won't have anything to restore from. Surface for the
|
|
448
|
+
// operator to act on.
|
|
449
|
+
this.events.onError?.("save", error, turnId);
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// State may have changed while we were awaiting the save (a fast
|
|
454
|
+
// `speech-active` rebound, for instance). Only kick the drafter if
|
|
455
|
+
// we're still in PAUSE_TENTATIVE. TS narrows `this.state` from the
|
|
456
|
+
// entry guard (LISTENING) and doesn't see that `setState` or an
|
|
457
|
+
// `await` may have mutated it — read through `currentState()` which
|
|
458
|
+
// returns the wider `VoiceState` union.
|
|
459
|
+
if (this.currentState() !== "PAUSE_TENTATIVE") return;
|
|
460
|
+
|
|
461
|
+
// C7 — fire optimistic prefill in the background (fire-and-forget).
|
|
462
|
+
// The drafter and the prefill run concurrently; if the prefill finishes
|
|
463
|
+
// before SPEECH_END the verifier can start from the prefilled KV state.
|
|
464
|
+
this.firePrefill(partialTranscript, this.latestEotProb, turnId);
|
|
465
|
+
|
|
466
|
+
this.startSpeculativeDrafter(partialTranscript, turnId);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
private async handleSpeechActive(timestampMs: number): Promise<void> {
|
|
470
|
+
if (this.state !== "PAUSE_TENTATIVE") return;
|
|
471
|
+
const pauseAt = this.pauseTimestampMs;
|
|
472
|
+
const rollbackWindowMs = this.pauseHangoverMs * ROLLBACK_WINDOW_MULTIPLIER;
|
|
473
|
+
if (pauseAt !== null && timestampMs - pauseAt > rollbackWindowMs) {
|
|
474
|
+
// Outside the rollback window — treat as speech-end equivalent.
|
|
475
|
+
// The drafter keeps running; we promote to SPEAKING. The voice loop
|
|
476
|
+
// expects the verifier to take over from here. There's no final
|
|
477
|
+
// transcript to pass in this branch since the user never produced
|
|
478
|
+
// one — callers that hit this path are unusual; surface via state
|
|
479
|
+
// change only.
|
|
480
|
+
this.setState("SPEAKING");
|
|
481
|
+
return;
|
|
482
|
+
}
|
|
483
|
+
// Within the rollback window — abort the drafter and discard C1.
|
|
484
|
+
this.abortActiveDraft("resumed");
|
|
485
|
+
// C7 — drop the in-flight prefill (SPEECH_ACTIVE_REBOUND). The prefill
|
|
486
|
+
// checkpoint will be cleaned up by the server's slot-reuse eviction
|
|
487
|
+
// (no explicit discard REST call is available on the emulated path).
|
|
488
|
+
this.prefillPromise = null;
|
|
489
|
+
if (this.enabled && this.checkpoint) {
|
|
490
|
+
const handle = this.checkpoint;
|
|
491
|
+
this.checkpoint = null;
|
|
492
|
+
try {
|
|
493
|
+
await this.mgr.discardCheckpoint(handle);
|
|
494
|
+
} catch (error) {
|
|
495
|
+
this.events.onError?.("discard", error, this.getTurnId());
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
this.pauseTimestampMs = null;
|
|
499
|
+
this.setState("LISTENING");
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
private async handleSpeechEnd(finalTranscript: string): Promise<void> {
|
|
503
|
+
if (this.state !== "PAUSE_TENTATIVE") {
|
|
504
|
+
// `speech-end` without a prior `speech-pause` — happens when the
|
|
505
|
+
// user finishes a single short utterance with no mid-clause pause.
|
|
506
|
+
// No checkpoint exists; just transition to SPEAKING.
|
|
507
|
+
if (this.state === "LISTENING") {
|
|
508
|
+
this.setState("SPEAKING");
|
|
509
|
+
this.events.onCommit?.(this.getTurnId(), finalTranscript);
|
|
510
|
+
}
|
|
511
|
+
return;
|
|
512
|
+
}
|
|
513
|
+
// C1 was saved on `speech-pause`. Retain it through SPEAKING so a
|
|
514
|
+
// barge-in can restore. The drafter stays alive — its output is what
|
|
515
|
+
// the verifier and TTS will stream from.
|
|
516
|
+
this.pauseTimestampMs = null;
|
|
517
|
+
|
|
518
|
+
// C7 — if the prefill is still in-flight, await it (non-blocking for
|
|
519
|
+
// the user — the drafter has already started; we just want the handle
|
|
520
|
+
// so the verifier can start from the prefilled KV state).
|
|
521
|
+
let prefillResult: PrefillOptimisticResult | undefined;
|
|
522
|
+
const inflight = this.prefillPromise;
|
|
523
|
+
this.prefillPromise = null;
|
|
524
|
+
if (inflight) {
|
|
525
|
+
try {
|
|
526
|
+
prefillResult = await inflight;
|
|
527
|
+
} catch {
|
|
528
|
+
// Prefill failed — the verifier runs a regular (non-prefilled) pass.
|
|
529
|
+
prefillResult = undefined;
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
this.setState("SPEAKING");
|
|
534
|
+
this.events.onCommit?.(this.getTurnId(), finalTranscript, prefillResult);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
private async handleBargeIn(): Promise<void> {
|
|
538
|
+
if (this.state !== "SPEAKING") return;
|
|
539
|
+
const turnId = this.getTurnId();
|
|
540
|
+
this.abortActiveDraft("barge-in");
|
|
541
|
+
if (this.enabled && this.checkpoint) {
|
|
542
|
+
const handle = this.checkpoint;
|
|
543
|
+
try {
|
|
544
|
+
await this.mgr.restoreCheckpoint(handle);
|
|
545
|
+
this.events.onRollback?.(turnId, handle);
|
|
546
|
+
} catch (error) {
|
|
547
|
+
this.events.onError?.("restore", error, turnId);
|
|
548
|
+
}
|
|
549
|
+
// Retain the handle — two consecutive barge-ins should be able to
|
|
550
|
+
// restore from the same C1. The handle is discarded on the next
|
|
551
|
+
// `speech-end` of a new turn (when a fresh C1 takes its place) or
|
|
552
|
+
// on `dispose()`.
|
|
553
|
+
}
|
|
554
|
+
this.turnCounter += 1;
|
|
555
|
+
this.setState("LISTENING");
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
/**
|
|
559
|
+
* Handle a partial transcript chunk from streaming ASR.
|
|
560
|
+
*
|
|
561
|
+
* When an `eotClassifier` is configured, scores the text and applies:
|
|
562
|
+
*
|
|
563
|
+
* P ≥ EOT_COMMIT_THRESHOLD AND silence ≥ EOT_COMMIT_SILENCE_MS
|
|
564
|
+
* → behave as `speech-end` (commit immediately, skip remaining hangover)
|
|
565
|
+
*
|
|
566
|
+
* P ≥ EOT_TENTATIVE_THRESHOLD AND silence ≥ EOT_TENTATIVE_SILENCE_MS
|
|
567
|
+
* AND state is LISTENING
|
|
568
|
+
* → behave as `speech-pause` (enter PAUSE_TENTATIVE, start drafter)
|
|
569
|
+
*
|
|
570
|
+
* P < EOT_MID_CLAUSE_THRESHOLD
|
|
571
|
+
* → accumulate EOT_HANGOVER_EXTENSION_MS into the hangover extension
|
|
572
|
+
* (the VadDetector reads this via `getEotHangoverExtensionMs()`)
|
|
573
|
+
*
|
|
574
|
+
* No-ops when `eotClassifier` is not set, or when the machine is not in
|
|
575
|
+
* LISTENING or PAUSE_TENTATIVE.
|
|
576
|
+
*/
|
|
577
|
+
private async handlePartialTranscript(
|
|
578
|
+
timestampMs: number,
|
|
579
|
+
text: string,
|
|
580
|
+
silenceSinceMs: number,
|
|
581
|
+
): Promise<void> {
|
|
582
|
+
if (!this.eotClassifier) return;
|
|
583
|
+
const validStates: VoiceState[] = ["LISTENING", "PAUSE_TENTATIVE"];
|
|
584
|
+
if (!validStates.includes(this.currentState())) return;
|
|
585
|
+
|
|
586
|
+
const pDone = await this.checkEot(text);
|
|
587
|
+
this.latestEotProb = pDone;
|
|
588
|
+
this.events.onEotScore?.(this.getTurnId(), text, pDone);
|
|
589
|
+
|
|
590
|
+
// Re-check state after async classifier — it may have changed.
|
|
591
|
+
const stateNow = this.currentState();
|
|
592
|
+
if (!validStates.includes(stateNow)) return;
|
|
593
|
+
|
|
594
|
+
if (
|
|
595
|
+
pDone >= EOT_COMMIT_THRESHOLD &&
|
|
596
|
+
silenceSinceMs >= EOT_COMMIT_SILENCE_MS
|
|
597
|
+
) {
|
|
598
|
+
// Treat as speech-end: commit immediately.
|
|
599
|
+
// Use the partial as the final transcript (streaming ASR may not have
|
|
600
|
+
// finalized yet; callers that have the final transcript should prefer
|
|
601
|
+
// dispatching `speech-end` directly).
|
|
602
|
+
this.handleSpeechEnd(text);
|
|
603
|
+
return;
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
if (
|
|
607
|
+
pDone >= EOT_TENTATIVE_THRESHOLD &&
|
|
608
|
+
silenceSinceMs >= EOT_TENTATIVE_SILENCE_MS &&
|
|
609
|
+
stateNow === "LISTENING"
|
|
610
|
+
) {
|
|
611
|
+
// Enter PAUSE_TENTATIVE early — start the speculative drafter now.
|
|
612
|
+
await this.handleSpeechPause(timestampMs, text);
|
|
613
|
+
return;
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
if (pDone < EOT_MID_CLAUSE_THRESHOLD) {
|
|
617
|
+
// User is mid-clause — accumulate extra patience into the hangover.
|
|
618
|
+
this.eotHangoverExtensionMs += EOT_HANGOVER_EXTENSION_MS;
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
/**
|
|
623
|
+
* Score the partial transcript with the Tier-3 EOT classifier.
|
|
624
|
+
* Returns 0.5 when no classifier is configured (uncertain — let tiers 1+2 decide).
|
|
625
|
+
*/
|
|
626
|
+
private async checkEot(partial: string): Promise<number> {
|
|
627
|
+
if (!this.eotClassifier) return 0.5;
|
|
628
|
+
return this.eotClassifier.score(partial);
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// --- internal helpers ----------------------------------------------
|
|
632
|
+
|
|
633
|
+
/**
|
|
634
|
+
* C7 — fire the optimistic prefill in the background and store the
|
|
635
|
+
* promise so `handleSpeechEnd` can await it. The machine never awaits
|
|
636
|
+
* here — it stays in PAUSE_TENTATIVE whether or not the prefill has
|
|
637
|
+
* finished. On `SPEECH_ACTIVE_REBOUND` the promise is discarded; on
|
|
638
|
+
* `SPEECH_END` it is awaited (or its cached result used) and passed
|
|
639
|
+
* through `onCommit(prefillResult)`.
|
|
640
|
+
*
|
|
641
|
+
* W3-9 / F1 — when an `optimisticPolicy` is configured, this is gated on
|
|
642
|
+
* `policy.shouldStartOptimisticLm(eotProb)`. The policy folds the
|
|
643
|
+
* device's power source (plugged-in / battery / unknown), the user's
|
|
644
|
+
* explicit override, and the EOT threshold into a single decision; when
|
|
645
|
+
* it returns false the prefill is suppressed and `handleSpeechEnd`
|
|
646
|
+
* runs a regular (non-prefilled) verifier pass.
|
|
647
|
+
*/
|
|
648
|
+
private firePrefill(
|
|
649
|
+
partialText: string,
|
|
650
|
+
eotProb: number,
|
|
651
|
+
turnId: string,
|
|
652
|
+
): void {
|
|
653
|
+
if (!this.prefillConfig) return;
|
|
654
|
+
if (
|
|
655
|
+
this.optimisticPolicy &&
|
|
656
|
+
!this.optimisticPolicy.shouldStartOptimisticLm(eotProb)
|
|
657
|
+
) {
|
|
658
|
+
return;
|
|
659
|
+
}
|
|
660
|
+
const { baseUrl, checkpointOptions, getContext } = this.prefillConfig;
|
|
661
|
+
const context = getContext?.() ?? undefined;
|
|
662
|
+
const promise = prefillOptimistic(
|
|
663
|
+
{
|
|
664
|
+
baseUrl,
|
|
665
|
+
slotId: this.slotId,
|
|
666
|
+
partialText,
|
|
667
|
+
eotProb,
|
|
668
|
+
...(context !== undefined ? { context } : {}),
|
|
669
|
+
},
|
|
670
|
+
{
|
|
671
|
+
checkpointManager: this.mgr,
|
|
672
|
+
...checkpointOptions,
|
|
673
|
+
},
|
|
674
|
+
);
|
|
675
|
+
this.prefillPromise = promise;
|
|
676
|
+
// Surface the result (or error) via `onPrefill` without blocking the machine.
|
|
677
|
+
promise.then(
|
|
678
|
+
(result) => {
|
|
679
|
+
this.events.onPrefill?.(turnId, result, null);
|
|
680
|
+
},
|
|
681
|
+
(error) => {
|
|
682
|
+
this.events.onPrefill?.(turnId, null, error);
|
|
683
|
+
},
|
|
684
|
+
);
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
private startSpeculativeDrafter(partial: string, turnId: string): void {
|
|
688
|
+
const controller = new AbortController();
|
|
689
|
+
const handle = this.startDrafterFn({
|
|
690
|
+
partialTranscript: partial,
|
|
691
|
+
turnId,
|
|
692
|
+
signal: controller.signal,
|
|
693
|
+
});
|
|
694
|
+
this.activeDraft = { handle, controller, turnId, partial };
|
|
695
|
+
this.events.onDrafterStart?.(turnId, partial);
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
private abortActiveDraft(reason: DrafterAbortReason): void {
|
|
699
|
+
const draft = this.activeDraft;
|
|
700
|
+
if (!draft) return;
|
|
701
|
+
this.activeDraft = null;
|
|
702
|
+
if (!draft.controller.signal.aborted) draft.controller.abort();
|
|
703
|
+
draft.handle.abort(reason);
|
|
704
|
+
this.events.onDrafterAbort?.(draft.turnId, reason);
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
private setState(next: VoiceState): void {
|
|
708
|
+
const prev = this.state;
|
|
709
|
+
if (prev === next) return;
|
|
710
|
+
this.state = next;
|
|
711
|
+
this.events.onStateChange?.(prev, next, this.getTurnId());
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
/**
|
|
715
|
+
* Returns `this.state` as the wider `VoiceState` union. Used in
|
|
716
|
+
* post-`await` re-checks where the entry-guard narrowing would
|
|
717
|
+
* otherwise convince TS the state can't have changed (it doesn't track
|
|
718
|
+
* mutations through `setState`).
|
|
719
|
+
*/
|
|
720
|
+
private currentState(): VoiceState {
|
|
721
|
+
return this.state;
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
function turnIdString(n: number): string {
|
|
726
|
+
return `turn-${n.toString(36)}`;
|
|
727
|
+
}
|