@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Optimistic prefill client (C7) — implements `/v1/prefill` against the
|
|
3
|
+
* llama.cpp REST API in three phases:
|
|
4
|
+
*
|
|
5
|
+
* Phase 1 — `slot/save`: snapshot the pre-user-message KV state so a
|
|
6
|
+
* rollback can restore it if speech continues (SPEECH_ACTIVE_REBOUND).
|
|
7
|
+
*
|
|
8
|
+
* Phase 2 — `POST /completion` with stream=false + cache_prompt=true:
|
|
9
|
+
* run the model's prefill over `partialText` without sampling any
|
|
10
|
+
* output tokens. This warms the KV cache so the subsequent real
|
|
11
|
+
* generation can skip one full prefill RTT.
|
|
12
|
+
*
|
|
13
|
+
* Phase 3 — `slot/save` again: snapshot the post-prefill KV state under a
|
|
14
|
+
* separate name. The voice state machine passes this handle to the
|
|
15
|
+
* verifier so generation resumes from the prefilled position.
|
|
16
|
+
*
|
|
17
|
+
* The upstream `/v1/prefill` endpoint is absent — the fork PR that
|
|
18
|
+
* adds it is tracked in `docs/eliza-1-optimistic-rollback.md`. Until it
|
|
19
|
+
* lands, phases 1–3 are emulated via the existing slot-save REST path. When
|
|
20
|
+
* the upstream endpoint ships the body of `prefillOptimistic` switches to a
|
|
21
|
+
* single REST call — callers see no signature change.
|
|
22
|
+
*
|
|
23
|
+
* Upstream endpoint contract: replace phases 2+3 with a single
|
|
24
|
+
* `POST /v1/prefill { slotId, partialText, eotProb }` once llama.cpp exposes
|
|
25
|
+
* it. That call must run the model prefill against `slotId`, save the resulting
|
|
26
|
+
* KV checkpoint, and return `{ handle, eotProb }`.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import { logger } from "@elizaos/core";
|
|
30
|
+
import type {
|
|
31
|
+
CheckpointHandle,
|
|
32
|
+
CheckpointManagerLike,
|
|
33
|
+
} from "./checkpoint-manager";
|
|
34
|
+
import type { ContextPartial } from "./eager-context-builder";
|
|
35
|
+
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// Public types — match the task spec so existing callers are unaffected
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Input contract for the optimistic prefill call. `partialText` is the
|
|
42
|
+
* current partial transcript; `eotProb` is the caller's estimate that the
|
|
43
|
+
* user has stopped speaking (from VAD hangover progress or the EOT classifier).
|
|
44
|
+
*/
|
|
45
|
+
export interface PrefillOptimisticArgs {
|
|
46
|
+
/** Base URL of the llama-server (`http://host:port`). */
|
|
47
|
+
baseUrl: string;
|
|
48
|
+
/** Slot id pinning this conversation. */
|
|
49
|
+
slotId: string;
|
|
50
|
+
/** Partial transcript to prefill against. Non-empty. */
|
|
51
|
+
partialText: string;
|
|
52
|
+
/**
|
|
53
|
+
* Probability the partial is end-of-turn (0..1). Today recorded as
|
|
54
|
+
* telemetry only; once `/v1/prefill` lands the server uses it to decide
|
|
55
|
+
* whether to also kick the drafter inline.
|
|
56
|
+
*/
|
|
57
|
+
eotProb: number;
|
|
58
|
+
/**
|
|
59
|
+
* Deterministic context from `EagerContextBuilder` (C3). Used to build the
|
|
60
|
+
* system prompt passed to the prefill `/completion` call so the KV cache
|
|
61
|
+
* covers both the system prompt and the partial transcript. Optional — when
|
|
62
|
+
* absent, only the partial transcript is prefilled.
|
|
63
|
+
*/
|
|
64
|
+
context?: ContextPartial;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export interface PrefillOptimisticResult {
|
|
68
|
+
/**
|
|
69
|
+
* Handle to the POST-prefill KV snapshot. Pass to
|
|
70
|
+
* `CheckpointManager.restoreCheckpoint` on SPEECH_END so the verifier
|
|
71
|
+
* resumes from the prefilled position.
|
|
72
|
+
*/
|
|
73
|
+
checkpointHandle: CheckpointHandle;
|
|
74
|
+
/**
|
|
75
|
+
* Approximate token count of the prefilled text. Derived from a rough
|
|
76
|
+
* whitespace tokenizer since the REST emulation path doesn't return a token count;
|
|
77
|
+
* once the upstream endpoint lands, the server returns the real count.
|
|
78
|
+
*/
|
|
79
|
+
tokenCount: number;
|
|
80
|
+
/**
|
|
81
|
+
* Wall-clock milliseconds the prefill round-trip took (phases 1–3).
|
|
82
|
+
*/
|
|
83
|
+
prefillMs: number;
|
|
84
|
+
/**
|
|
85
|
+
* Backend label. `slot-save-emulation` = pre-upstream emulation path;
|
|
86
|
+
* `prefill-v1` = native `/v1/prefill` endpoint.
|
|
87
|
+
*/
|
|
88
|
+
backend: "slot-save-emulation" | "prefill-v1";
|
|
89
|
+
/**
|
|
90
|
+
* End-of-turn probability echoed back from the server. Today equals the
|
|
91
|
+
* caller's `eotProb` (the emulation path has nothing to refine it with); once the
|
|
92
|
+
* upstream endpoint lands, the server returns its own model estimate.
|
|
93
|
+
*/
|
|
94
|
+
eotProb: number;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export interface PrefillOptimisticOptions {
|
|
98
|
+
checkpointManager: CheckpointManagerLike;
|
|
99
|
+
/**
|
|
100
|
+
* Name to use for the PRE-prefill snapshot (C1 — used by the rollback path
|
|
101
|
+
* on SPEECH_ACTIVE_REBOUND). Defaults to `pre-prefill`.
|
|
102
|
+
*/
|
|
103
|
+
preCheckpointName?: string;
|
|
104
|
+
/**
|
|
105
|
+
* Name to use for the POST-prefill snapshot (the one the verifier starts
|
|
106
|
+
* from on SPEECH_END). Defaults to `post-prefill`.
|
|
107
|
+
*/
|
|
108
|
+
postCheckpointName?: string;
|
|
109
|
+
/**
|
|
110
|
+
* Optional fetch implementation for tests. Defaults to global `fetch`.
|
|
111
|
+
*/
|
|
112
|
+
fetchImpl?: typeof fetch;
|
|
113
|
+
/**
|
|
114
|
+
* Request timeout for the `/completion` prefill call (ms). Default 5 000 ms.
|
|
115
|
+
* The call is a no-sample prefill-only pass, so it should complete in
|
|
116
|
+
* O(transcript_tokens / throughput) — typically well under 1 s for short
|
|
117
|
+
* partials.
|
|
118
|
+
*/
|
|
119
|
+
prefillTimeoutMs?: number;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const DEFAULT_PRE_CHECKPOINT_NAME = "pre-prefill";
|
|
123
|
+
const DEFAULT_POST_CHECKPOINT_NAME = "post-prefill";
|
|
124
|
+
const DEFAULT_PREFILL_TIMEOUT_MS = 5_000;
|
|
125
|
+
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
// Main function
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Run the three-phase optimistic prefill and return a checkpoint handle for
|
|
132
|
+
* the post-prefill KV state.
|
|
133
|
+
*
|
|
134
|
+
* Voice state machine wiring:
|
|
135
|
+
* - Call on `PAUSE_TENTATIVE` entry with `eotProb` from the EOT classifier.
|
|
136
|
+
* - On `SPEECH_ACTIVE_REBOUND` (within rollback window): restore to the
|
|
137
|
+
* PRE-prefill checkpoint (C1 saved in phase 1) via the checkpoint manager.
|
|
138
|
+
* The post-prefill handle returned here is no longer needed.
|
|
139
|
+
* - On `SPEECH_END`: pass `result.checkpointHandle` to the verifier so it
|
|
140
|
+
* can resume generation from the prefilled KV state, saving one full
|
|
141
|
+
* prefill RTT.
|
|
142
|
+
*/
|
|
143
|
+
export async function prefillOptimistic(
|
|
144
|
+
args: PrefillOptimisticArgs,
|
|
145
|
+
opts: PrefillOptimisticOptions,
|
|
146
|
+
): Promise<PrefillOptimisticResult> {
|
|
147
|
+
assertPartialText(args.partialText);
|
|
148
|
+
assertEotProb(args.eotProb);
|
|
149
|
+
assertBaseUrl(args.baseUrl);
|
|
150
|
+
|
|
151
|
+
const startMs = Date.now();
|
|
152
|
+
const fetchImpl = opts.fetchImpl ?? fetch;
|
|
153
|
+
const preName = opts.preCheckpointName ?? DEFAULT_PRE_CHECKPOINT_NAME;
|
|
154
|
+
const postName = opts.postCheckpointName ?? DEFAULT_POST_CHECKPOINT_NAME;
|
|
155
|
+
const timeoutMs = opts.prefillTimeoutMs ?? DEFAULT_PREFILL_TIMEOUT_MS;
|
|
156
|
+
|
|
157
|
+
// ------------------------------------------------------------------
|
|
158
|
+
// Phase 1: snapshot pre-user-message KV state (rollback target for
|
|
159
|
+
// SPEECH_ACTIVE_REBOUND).
|
|
160
|
+
// ------------------------------------------------------------------
|
|
161
|
+
await opts.checkpointManager.saveCheckpoint(args.slotId, preName);
|
|
162
|
+
|
|
163
|
+
// ------------------------------------------------------------------
|
|
164
|
+
// Phase 2: POST to /completion with the partial text to warm the KV
|
|
165
|
+
// cache. We request max_tokens=0 / stream=false so the server
|
|
166
|
+
// only runs the prefill pass without sampling any tokens.
|
|
167
|
+
//
|
|
168
|
+
// Upstream replacement: use a single POST /v1/prefill once
|
|
169
|
+
// llama.cpp exposes that endpoint.
|
|
170
|
+
// ------------------------------------------------------------------
|
|
171
|
+
await runPrefillCompletion({
|
|
172
|
+
baseUrl: args.baseUrl,
|
|
173
|
+
partialText: args.partialText,
|
|
174
|
+
context: args.context,
|
|
175
|
+
timeoutMs,
|
|
176
|
+
fetchImpl,
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
// ------------------------------------------------------------------
|
|
180
|
+
// Phase 3: snapshot post-prefill KV state (the handle the verifier
|
|
181
|
+
// resumes from on SPEECH_END).
|
|
182
|
+
// ------------------------------------------------------------------
|
|
183
|
+
const postHandle = await opts.checkpointManager.saveCheckpoint(
|
|
184
|
+
args.slotId,
|
|
185
|
+
postName,
|
|
186
|
+
);
|
|
187
|
+
|
|
188
|
+
const prefillMs = Date.now() - startMs;
|
|
189
|
+
const tokenCount = estimateTokenCount(args.partialText);
|
|
190
|
+
|
|
191
|
+
return {
|
|
192
|
+
checkpointHandle: postHandle,
|
|
193
|
+
tokenCount,
|
|
194
|
+
prefillMs,
|
|
195
|
+
backend: "slot-save-emulation",
|
|
196
|
+
eotProb: args.eotProb,
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// ---------------------------------------------------------------------------
|
|
201
|
+
// Phase 2 helper — no-sample /completion call
|
|
202
|
+
// ---------------------------------------------------------------------------
|
|
203
|
+
|
|
204
|
+
interface RunPrefillCompletionOpts {
|
|
205
|
+
baseUrl: string;
|
|
206
|
+
partialText: string;
|
|
207
|
+
context?: ContextPartial;
|
|
208
|
+
timeoutMs: number;
|
|
209
|
+
fetchImpl: typeof fetch;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* POST to `/completion` with `max_tokens: 0` to prefill the KV cache without
|
|
214
|
+
* decoding any output tokens. The system prompt is prepended from the
|
|
215
|
+
* deterministic context half (C3) when available.
|
|
216
|
+
*
|
|
217
|
+
* On HTTP error or timeout we swallow and log a warning — a prefill failure
|
|
218
|
+
* means the verifier will run a regular (non-prefilled) generation, not a
|
|
219
|
+
* crash. The checkpoint state is still valid (phase 1 snapshot is intact).
|
|
220
|
+
*/
|
|
221
|
+
async function runPrefillCompletion(
|
|
222
|
+
opts: RunPrefillCompletionOpts,
|
|
223
|
+
): Promise<void> {
|
|
224
|
+
const { baseUrl, partialText, context, timeoutMs, fetchImpl } = opts;
|
|
225
|
+
|
|
226
|
+
// Build the prompt: deterministic system blocks (if any) + partial transcript.
|
|
227
|
+
const systemText = context?.systemBlocks.filter(Boolean).join("\n\n") ?? "";
|
|
228
|
+
const historyLines = (context?.historyBlocks ?? [])
|
|
229
|
+
.map((h) => `${h.role === "user" ? "User" : "Assistant"}: ${h.content}`)
|
|
230
|
+
.join("\n");
|
|
231
|
+
|
|
232
|
+
const promptParts: string[] = [];
|
|
233
|
+
if (systemText) promptParts.push(systemText);
|
|
234
|
+
if (historyLines) promptParts.push(historyLines);
|
|
235
|
+
promptParts.push(`User: ${partialText}`);
|
|
236
|
+
const prompt = promptParts.join("\n\n");
|
|
237
|
+
|
|
238
|
+
const url = `${baseUrl.replace(/\/$/, "")}/completion`;
|
|
239
|
+
const body = {
|
|
240
|
+
prompt,
|
|
241
|
+
// Zero tokens — prefill only, no decode.
|
|
242
|
+
n_predict: 0,
|
|
243
|
+
// Prefill into the cached slot.
|
|
244
|
+
cache_prompt: true,
|
|
245
|
+
// No sampling needed.
|
|
246
|
+
stream: false,
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
const controller = new AbortController();
|
|
250
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
251
|
+
try {
|
|
252
|
+
const resp = await fetchImpl(url, {
|
|
253
|
+
method: "POST",
|
|
254
|
+
headers: { "Content-Type": "application/json" },
|
|
255
|
+
body: JSON.stringify(body),
|
|
256
|
+
signal: controller.signal,
|
|
257
|
+
});
|
|
258
|
+
if (!resp.ok) {
|
|
259
|
+
// Non-200 — prefill attempt failed, but we continue (phase 3 still runs).
|
|
260
|
+
// In the real `/v1/prefill` path the server would surface a clear error;
|
|
261
|
+
// for the emulation path we tolerate it.
|
|
262
|
+
logger.warn(
|
|
263
|
+
{ status: resp.status },
|
|
264
|
+
"[prefill-client] /completion returned non-200 — continuing without prefill warm",
|
|
265
|
+
);
|
|
266
|
+
}
|
|
267
|
+
} catch (err) {
|
|
268
|
+
// Timeout or network failure — swallow.
|
|
269
|
+
const reason =
|
|
270
|
+
err instanceof Error && err.name === "AbortError"
|
|
271
|
+
? "timeout"
|
|
272
|
+
: String(err);
|
|
273
|
+
logger.warn(
|
|
274
|
+
{ reason },
|
|
275
|
+
"[prefill-client] /completion prefill failed — continuing without prefill warm",
|
|
276
|
+
);
|
|
277
|
+
} finally {
|
|
278
|
+
clearTimeout(timer);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// ---------------------------------------------------------------------------
|
|
283
|
+
// Helpers
|
|
284
|
+
// ---------------------------------------------------------------------------
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Rough token-count estimator — whitespace word count. Replaced by the
|
|
288
|
+
* server-reported count once the upstream `/v1/prefill` endpoint lands.
|
|
289
|
+
*/
|
|
290
|
+
function estimateTokenCount(text: string): number {
|
|
291
|
+
return text.trim().split(/\s+/).filter(Boolean).length;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function assertPartialText(s: string): void {
|
|
295
|
+
if (typeof s !== "string" || s.trim().length === 0) {
|
|
296
|
+
throw new TypeError(
|
|
297
|
+
`[prefill-client] partialText must be a non-empty string (got ${JSON.stringify(s)})`,
|
|
298
|
+
);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function assertEotProb(p: number): void {
|
|
303
|
+
if (typeof p !== "number" || !Number.isFinite(p) || p < 0 || p > 1) {
|
|
304
|
+
throw new TypeError(
|
|
305
|
+
`[prefill-client] eotProb must be a finite number in [0, 1] (got ${p})`,
|
|
306
|
+
);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function assertBaseUrl(url: string): void {
|
|
311
|
+
if (typeof url !== "string" || url.trim().length === 0) {
|
|
312
|
+
throw new TypeError(
|
|
313
|
+
`[prefill-client] baseUrl must be a non-empty string (got ${JSON.stringify(url)})`,
|
|
314
|
+
);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prefix-preserving TTS rollback queue for barge-in handling.
|
|
3
|
+
*
|
|
4
|
+
* When the user barges in mid-response, the naive approach drops ALL
|
|
5
|
+
* in-flight audio chunks. This queue does better: it tags each audio
|
|
6
|
+
* chunk with the token range it covers, and on barge-in retains chunks
|
|
7
|
+
* whose token range ends at or before the divergence point (the last
|
|
8
|
+
* committed token index when the barge-in fires).
|
|
9
|
+
*
|
|
10
|
+
* If the new user utterance continues the topic, audio up to the
|
|
11
|
+
* divergence point plays smoothly. Chunks for tokens past the divergence
|
|
12
|
+
* are dropped.
|
|
13
|
+
*
|
|
14
|
+
* Data model:
|
|
15
|
+
*
|
|
16
|
+
* TaggedAudioChunk — a PCM buffer paired with [start, end] token indices
|
|
17
|
+
* (inclusive) and its duration in milliseconds.
|
|
18
|
+
*
|
|
19
|
+
* PrefixPreservingQueue — ordered queue of TaggedAudioChunk. On barge-in
|
|
20
|
+
* with a given divergencePoint:
|
|
21
|
+
* keep when chunk.tokenRange[1] <= divergencePoint
|
|
22
|
+
* drop when chunk.tokenRange[0] > divergencePoint
|
|
23
|
+
* trim when chunk straddles the point (tokenRange[0] <= point
|
|
24
|
+
* but tokenRange[1] > point) — kept whole; the scheduler
|
|
25
|
+
* treats sub-phrase granularity as a best-effort approximation.
|
|
26
|
+
*
|
|
27
|
+
* The old `handleBargeIn` path (ring-buffer drain + full stop) remains
|
|
28
|
+
* active as a fallback when the queue is not wired (e.g. the backend
|
|
29
|
+
* emits chunks without token-range tags). When the queue IS wired, the
|
|
30
|
+
* scheduler calls `rollbackAt(divergencePoint)` instead of a plain drain,
|
|
31
|
+
* and replays the retained prefix into the sink before resuming.
|
|
32
|
+
*/
|
|
33
|
+
export interface TaggedAudioChunk {
|
|
34
|
+
pcm: Float32Array;
|
|
35
|
+
/**
|
|
36
|
+
* Inclusive token-index range the audio chunk covers.
|
|
37
|
+
* [start, end] where start <= end. Both values are in the
|
|
38
|
+
* scheduler's token-index space (same as `Phrase.fromIndex` /
|
|
39
|
+
* `Phrase.toIndex`).
|
|
40
|
+
*/
|
|
41
|
+
tokenRange: [number, number];
|
|
42
|
+
/**
|
|
43
|
+
* Wall-clock duration of this chunk in milliseconds, computed from
|
|
44
|
+
* `pcm.length / sampleRate * 1000`. Stored here so the queue can
|
|
45
|
+
* report total retained duration to telemetry without knowing the
|
|
46
|
+
* sample rate.
|
|
47
|
+
*/
|
|
48
|
+
durationMs: number;
|
|
49
|
+
}
|
|
50
|
+
export interface RollbackResult {
|
|
51
|
+
/** Chunks retained (token range ends at or before divergencePoint). */
|
|
52
|
+
retained: TaggedAudioChunk[];
|
|
53
|
+
/** Chunks dropped (token range starts after divergencePoint). */
|
|
54
|
+
dropped: TaggedAudioChunk[];
|
|
55
|
+
/**
|
|
56
|
+
* Chunks that straddled the divergence point
|
|
57
|
+
* (started at or before, ended after) — kept in `retained` at phrase
|
|
58
|
+
* granularity. Callers can inspect this for telemetry.
|
|
59
|
+
*/
|
|
60
|
+
straddled: TaggedAudioChunk[];
|
|
61
|
+
/** Sum of retained chunk durations in milliseconds. */
|
|
62
|
+
retainedDurationMs: number;
|
|
63
|
+
/** Sum of dropped chunk durations in milliseconds. */
|
|
64
|
+
droppedDurationMs: number;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Prefix-preserving audio chunk queue.
|
|
68
|
+
*
|
|
69
|
+
* Usage:
|
|
70
|
+
* 1. On each audio chunk arriving from the TTS backend, call `enqueue`.
|
|
71
|
+
* 2. On barge-in, call `rollbackAt(divergencePoint)` — returns the
|
|
72
|
+
* partition of retained vs dropped chunks. The caller replays the
|
|
73
|
+
* retained prefix into the audio sink and discards the rest.
|
|
74
|
+
* 3. Call `clear()` to reset (e.g. on a new turn).
|
|
75
|
+
*
|
|
76
|
+
* Thread-safety: single-threaded JS — no locking needed.
|
|
77
|
+
*/
|
|
78
|
+
export declare class PrefixPreservingQueue {
|
|
79
|
+
private readonly chunks;
|
|
80
|
+
/** Number of chunks currently in the queue. */
|
|
81
|
+
get size(): number;
|
|
82
|
+
/**
|
|
83
|
+
* Add a tagged audio chunk to the tail of the queue. Chunks MUST be
|
|
84
|
+
* enqueued in token-range order (ascending `tokenRange[0]`) — the queue
|
|
85
|
+
* does not sort. Violations produce unspecified rollback behaviour.
|
|
86
|
+
*/
|
|
87
|
+
enqueue(chunk: TaggedAudioChunk): void;
|
|
88
|
+
/**
|
|
89
|
+
* Partition the queue at `divergencePoint` (the last committed token
|
|
90
|
+
* index). Clears the queue and returns the three-way split.
|
|
91
|
+
*
|
|
92
|
+
* Decision per chunk:
|
|
93
|
+
* chunk.tokenRange[1] <= divergencePoint → retained (prefix)
|
|
94
|
+
* chunk.tokenRange[0] > divergencePoint → dropped (post-divergence)
|
|
95
|
+
* otherwise (straddle) → retained (best-effort)
|
|
96
|
+
*
|
|
97
|
+
* After this call the queue is empty. Callers should replay `retained`
|
|
98
|
+
* into the audio sink.
|
|
99
|
+
*/
|
|
100
|
+
rollbackAt(divergencePoint: number): RollbackResult;
|
|
101
|
+
/**
|
|
102
|
+
* Drop all queued chunks without replaying any of them. Used by the
|
|
103
|
+
* hard-stop / full-cancel path as a fallback when the new utterance
|
|
104
|
+
* does not continue the topic.
|
|
105
|
+
*/
|
|
106
|
+
clear(): TaggedAudioChunk[];
|
|
107
|
+
/**
|
|
108
|
+
* Peek at the current queue without modifying it (snapshot for
|
|
109
|
+
* telemetry / tests).
|
|
110
|
+
*/
|
|
111
|
+
snapshot(): ReadonlyArray<TaggedAudioChunk>;
|
|
112
|
+
}
|
|
113
|
+
//# sourceMappingURL=prefix-preserving-queue.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prefix-preserving-queue.d.ts","sourceRoot":"","sources":["prefix-preserving-queue.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAEH,MAAM,WAAW,gBAAgB;IAChC,GAAG,EAAE,YAAY,CAAC;IAClB;;;;;OAKG;IACH,UAAU,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B;;;;;OAKG;IACH,UAAU,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,cAAc;IAC9B,uEAAuE;IACvE,QAAQ,EAAE,gBAAgB,EAAE,CAAC;IAC7B,iEAAiE;IACjE,OAAO,EAAE,gBAAgB,EAAE,CAAC;IAC5B;;;;OAIG;IACH,SAAS,EAAE,gBAAgB,EAAE,CAAC;IAC9B,uDAAuD;IACvD,kBAAkB,EAAE,MAAM,CAAC;IAC3B,sDAAsD;IACtD,iBAAiB,EAAE,MAAM,CAAC;CAC1B;AAED;;;;;;;;;;;GAWG;AACH,qBAAa,qBAAqB;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA0B;IAEjD,+CAA+C;IAC/C,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED;;;;OAIG;IACH,OAAO,CAAC,KAAK,EAAE,gBAAgB,GAAG,IAAI;IAItC;;;;;;;;;;;OAWG;IACH,UAAU,CAAC,eAAe,EAAE,MAAM,GAAG,cAAc;IAmCnD;;;;OAIG;IACH,KAAK,IAAI,gBAAgB,EAAE;IAK3B;;;OAGG;IACH,QAAQ,IAAI,aAAa,CAAC,gBAAgB,CAAC;CAG3C"}
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prefix-preserving TTS rollback queue for barge-in handling.
|
|
3
|
+
*
|
|
4
|
+
* When the user barges in mid-response, the naive approach drops ALL
|
|
5
|
+
* in-flight audio chunks. This queue does better: it tags each audio
|
|
6
|
+
* chunk with the token range it covers, and on barge-in retains chunks
|
|
7
|
+
* whose token range ends at or before the divergence point (the last
|
|
8
|
+
* committed token index when the barge-in fires).
|
|
9
|
+
*
|
|
10
|
+
* If the new user utterance continues the topic, audio up to the
|
|
11
|
+
* divergence point plays smoothly. Chunks for tokens past the divergence
|
|
12
|
+
* are dropped.
|
|
13
|
+
*
|
|
14
|
+
* Data model:
|
|
15
|
+
*
|
|
16
|
+
* TaggedAudioChunk — a PCM buffer paired with [start, end] token indices
|
|
17
|
+
* (inclusive) and its duration in milliseconds.
|
|
18
|
+
*
|
|
19
|
+
* PrefixPreservingQueue — ordered queue of TaggedAudioChunk. On barge-in
|
|
20
|
+
* with a given divergencePoint:
|
|
21
|
+
* keep when chunk.tokenRange[1] <= divergencePoint
|
|
22
|
+
* drop when chunk.tokenRange[0] > divergencePoint
|
|
23
|
+
* trim when chunk straddles the point (tokenRange[0] <= point
|
|
24
|
+
* but tokenRange[1] > point) — kept whole; the scheduler
|
|
25
|
+
* treats sub-phrase granularity as a best-effort approximation.
|
|
26
|
+
*
|
|
27
|
+
* The old `handleBargeIn` path (ring-buffer drain + full stop) remains
|
|
28
|
+
* active as a fallback when the queue is not wired (e.g. the backend
|
|
29
|
+
* emits chunks without token-range tags). When the queue IS wired, the
|
|
30
|
+
* scheduler calls `rollbackAt(divergencePoint)` instead of a plain drain,
|
|
31
|
+
* and replays the retained prefix into the sink before resuming.
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
export interface TaggedAudioChunk {
|
|
35
|
+
pcm: Float32Array;
|
|
36
|
+
/**
|
|
37
|
+
* Inclusive token-index range the audio chunk covers.
|
|
38
|
+
* [start, end] where start <= end. Both values are in the
|
|
39
|
+
* scheduler's token-index space (same as `Phrase.fromIndex` /
|
|
40
|
+
* `Phrase.toIndex`).
|
|
41
|
+
*/
|
|
42
|
+
tokenRange: [number, number];
|
|
43
|
+
/**
|
|
44
|
+
* Wall-clock duration of this chunk in milliseconds, computed from
|
|
45
|
+
* `pcm.length / sampleRate * 1000`. Stored here so the queue can
|
|
46
|
+
* report total retained duration to telemetry without knowing the
|
|
47
|
+
* sample rate.
|
|
48
|
+
*/
|
|
49
|
+
durationMs: number;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface RollbackResult {
|
|
53
|
+
/** Chunks retained (token range ends at or before divergencePoint). */
|
|
54
|
+
retained: TaggedAudioChunk[];
|
|
55
|
+
/** Chunks dropped (token range starts after divergencePoint). */
|
|
56
|
+
dropped: TaggedAudioChunk[];
|
|
57
|
+
/**
|
|
58
|
+
* Chunks that straddled the divergence point
|
|
59
|
+
* (started at or before, ended after) — kept in `retained` at phrase
|
|
60
|
+
* granularity. Callers can inspect this for telemetry.
|
|
61
|
+
*/
|
|
62
|
+
straddled: TaggedAudioChunk[];
|
|
63
|
+
/** Sum of retained chunk durations in milliseconds. */
|
|
64
|
+
retainedDurationMs: number;
|
|
65
|
+
/** Sum of dropped chunk durations in milliseconds. */
|
|
66
|
+
droppedDurationMs: number;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Prefix-preserving audio chunk queue.
|
|
71
|
+
*
|
|
72
|
+
* Usage:
|
|
73
|
+
* 1. On each audio chunk arriving from the TTS backend, call `enqueue`.
|
|
74
|
+
* 2. On barge-in, call `rollbackAt(divergencePoint)` — returns the
|
|
75
|
+
* partition of retained vs dropped chunks. The caller replays the
|
|
76
|
+
* retained prefix into the audio sink and discards the rest.
|
|
77
|
+
* 3. Call `clear()` to reset (e.g. on a new turn).
|
|
78
|
+
*
|
|
79
|
+
* Thread-safety: single-threaded JS — no locking needed.
|
|
80
|
+
*/
|
|
81
|
+
export class PrefixPreservingQueue {
|
|
82
|
+
private readonly chunks: TaggedAudioChunk[] = [];
|
|
83
|
+
|
|
84
|
+
/** Number of chunks currently in the queue. */
|
|
85
|
+
get size(): number {
|
|
86
|
+
return this.chunks.length;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Add a tagged audio chunk to the tail of the queue. Chunks MUST be
|
|
91
|
+
* enqueued in token-range order (ascending `tokenRange[0]`) — the queue
|
|
92
|
+
* does not sort. Violations produce unspecified rollback behaviour.
|
|
93
|
+
*/
|
|
94
|
+
enqueue(chunk: TaggedAudioChunk): void {
|
|
95
|
+
this.chunks.push(chunk);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Partition the queue at `divergencePoint` (the last committed token
|
|
100
|
+
* index). Clears the queue and returns the three-way split.
|
|
101
|
+
*
|
|
102
|
+
* Decision per chunk:
|
|
103
|
+
* chunk.tokenRange[1] <= divergencePoint → retained (prefix)
|
|
104
|
+
* chunk.tokenRange[0] > divergencePoint → dropped (post-divergence)
|
|
105
|
+
* otherwise (straddle) → retained (best-effort)
|
|
106
|
+
*
|
|
107
|
+
* After this call the queue is empty. Callers should replay `retained`
|
|
108
|
+
* into the audio sink.
|
|
109
|
+
*/
|
|
110
|
+
rollbackAt(divergencePoint: number): RollbackResult {
|
|
111
|
+
const retained: TaggedAudioChunk[] = [];
|
|
112
|
+
const dropped: TaggedAudioChunk[] = [];
|
|
113
|
+
const straddled: TaggedAudioChunk[] = [];
|
|
114
|
+
let retainedDurationMs = 0;
|
|
115
|
+
let droppedDurationMs = 0;
|
|
116
|
+
|
|
117
|
+
for (const chunk of this.chunks) {
|
|
118
|
+
const [start, end] = chunk.tokenRange;
|
|
119
|
+
if (end <= divergencePoint) {
|
|
120
|
+
// Fully before or at the divergence point — keep.
|
|
121
|
+
retained.push(chunk);
|
|
122
|
+
retainedDurationMs += chunk.durationMs;
|
|
123
|
+
} else if (start > divergencePoint) {
|
|
124
|
+
// Fully after the divergence point — drop.
|
|
125
|
+
dropped.push(chunk);
|
|
126
|
+
droppedDurationMs += chunk.durationMs;
|
|
127
|
+
} else {
|
|
128
|
+
// Straddles the divergence point — keep at phrase granularity.
|
|
129
|
+
retained.push(chunk);
|
|
130
|
+
straddled.push(chunk);
|
|
131
|
+
retainedDurationMs += chunk.durationMs;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
this.chunks.length = 0;
|
|
136
|
+
return {
|
|
137
|
+
retained,
|
|
138
|
+
dropped,
|
|
139
|
+
straddled,
|
|
140
|
+
retainedDurationMs,
|
|
141
|
+
droppedDurationMs,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Drop all queued chunks without replaying any of them. Used by the
|
|
147
|
+
* hard-stop / full-cancel path as a fallback when the new utterance
|
|
148
|
+
* does not continue the topic.
|
|
149
|
+
*/
|
|
150
|
+
clear(): TaggedAudioChunk[] {
|
|
151
|
+
const all = this.chunks.splice(0);
|
|
152
|
+
return all;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Peek at the current queue without modifying it (snapshot for
|
|
157
|
+
* telemetry / tests).
|
|
158
|
+
*/
|
|
159
|
+
snapshot(): ReadonlyArray<TaggedAudioChunk> {
|
|
160
|
+
return this.chunks.slice();
|
|
161
|
+
}
|
|
162
|
+
}
|