@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipelined parallel-generation scheduler — the fused mic→speech graph
|
|
3
|
+
* from `packages/inference/AGENTS.md` §4:
|
|
4
|
+
*
|
|
5
|
+
* mic / file → ASR → text tokens
|
|
6
|
+
* ↓
|
|
7
|
+
* scheduler ──→ MTP drafter (proposes N tokens)
|
|
8
|
+
* ∥ (overlap, not sequential)
|
|
9
|
+
* target verifier (text model)
|
|
10
|
+
* ↓
|
|
11
|
+
* accepted tokens → phrase chunker
|
|
12
|
+
* ↓ ↘
|
|
13
|
+
* speaker preset (cached) rollback queue
|
|
14
|
+
* ↓ ↙
|
|
15
|
+
* OmniVoice TTS ←── on-reject: cancel chunk
|
|
16
|
+
* ↓
|
|
17
|
+
* PCM ring buffer → audio out
|
|
18
|
+
*
|
|
19
|
+
* The headline contract: **the moment ASR emits its last token, the
|
|
20
|
+
* MTP drafter starts drafting AND the target starts verifying — they
|
|
21
|
+
* overlap.** Drafter speculation N tokens ahead happens concurrently
|
|
22
|
+
* with the target verifying the previous window; accepted tokens are
|
|
23
|
+
* handed to the phrase chunker within the same scheduler tick.
|
|
24
|
+
*
|
|
25
|
+
* GPU command buffers stay N=1 (no command-buffer batching for voice)
|
|
26
|
+
* so a barge-in cancel lands at the next kernel boundary, not after a
|
|
27
|
+
* batch flush.
|
|
28
|
+
*
|
|
29
|
+
* Why this lives next to `VoiceScheduler` and not inside it: the
|
|
30
|
+
* scheduler owns the *audio* side (chunker → TTS → ring buffer →
|
|
31
|
+
* rollback → barge-in). This module owns the *text-generation* side
|
|
32
|
+
* (audio source → ASR → drafter∥verifier loop) and feeds accepted /
|
|
33
|
+
* rejected ranges into the scheduler. Keeping them separate keeps the
|
|
34
|
+
* scheduler usable from text-only callers (which reach the same nodes
|
|
35
|
+
* via the same scheduler — AGENTS.md §4) without an ASR/drafter
|
|
36
|
+
* dependency.
|
|
37
|
+
*/
|
|
38
|
+
|
|
39
|
+
import { PartialStabilizer } from "./partial-stabilizer";
|
|
40
|
+
import type { VoiceScheduler } from "./scheduler";
|
|
41
|
+
import type {
|
|
42
|
+
PcmFrame,
|
|
43
|
+
RejectedTokenRange,
|
|
44
|
+
StreamingTranscriber,
|
|
45
|
+
TextToken,
|
|
46
|
+
TranscriptionAudio,
|
|
47
|
+
VerifierStreamEvent,
|
|
48
|
+
} from "./types";
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Split a transcript string into contiguous text tokens. The fused ASR
|
|
52
|
+
* tokenizer is shared with the text backbone (AGENTS.md §1 — zero
|
|
53
|
+
* re-tokenization), so the pipeline only needs *contiguous* token
|
|
54
|
+
* indices, not the model's exact subword boundaries; whitespace-aware
|
|
55
|
+
* word chunking is the closest stable approximation when only surface
|
|
56
|
+
* text is available. Empty input yields no tokens.
|
|
57
|
+
*
|
|
58
|
+
* `tokenIds`, when supplied, are the text-model vocabulary ids the fused
|
|
59
|
+
* ASR decoder emitted for `transcript`. When the lengths line up they are
|
|
60
|
+
* attached as `TextToken.id` so a downstream in-process handoff can skip
|
|
61
|
+
* re-tokenization; otherwise (mismatch — the surface split disagrees with
|
|
62
|
+
* the decoder's subword boundaries) the ids are dropped and only the
|
|
63
|
+
* word-chunk approximation is returned.
|
|
64
|
+
*/
|
|
65
|
+
export function splitTranscriptToTokens(
|
|
66
|
+
transcript: string,
|
|
67
|
+
startIndex = 0,
|
|
68
|
+
tokenIds?: ReadonlyArray<number>,
|
|
69
|
+
): TextToken[] {
|
|
70
|
+
const trimmed = transcript.trim();
|
|
71
|
+
if (trimmed.length === 0) return [];
|
|
72
|
+
// Keep leading whitespace attached to each chunk after the first so a
|
|
73
|
+
// join() round-trips to the original spacing (matches how the chunker
|
|
74
|
+
// reconstructs phrase text from token.text concatenation).
|
|
75
|
+
const parts = trimmed.split(/(?<=\S)(?=\s)/).filter((p) => p.length > 0);
|
|
76
|
+
const tokens: TextToken[] = [];
|
|
77
|
+
// Pass through real token ids only when the producer's id count matches
|
|
78
|
+
// the surface-chunk count — anything else means the two disagree on
|
|
79
|
+
// boundaries and a positional join would mislabel ids.
|
|
80
|
+
const ids =
|
|
81
|
+
tokenIds && tokenIds.length === parts.length ? tokenIds : undefined;
|
|
82
|
+
let i = startIndex;
|
|
83
|
+
for (let p = 0; p < parts.length; p++) {
|
|
84
|
+
const token: TextToken = { index: i++, text: parts[p] };
|
|
85
|
+
if (ids) token.id = ids[p];
|
|
86
|
+
tokens.push(token);
|
|
87
|
+
}
|
|
88
|
+
return tokens;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* MTP drafter. `propose` returns up to `maxDraft` candidate
|
|
93
|
+
* continuation tokens given the accepted prefix. N=1 command buffers —
|
|
94
|
+
* the implementation MUST keep its GPU dispatch short enough to cancel
|
|
95
|
+
* at the next kernel boundary (no command-buffer batching for voice).
|
|
96
|
+
* Honours `cancel.cancelled` between kernel ticks.
|
|
97
|
+
*/
|
|
98
|
+
export interface DraftProposer {
|
|
99
|
+
propose(args: {
|
|
100
|
+
prefix: ReadonlyArray<TextToken>;
|
|
101
|
+
maxDraft: number;
|
|
102
|
+
cancel: { cancelled: boolean };
|
|
103
|
+
}): Promise<TextToken[]>;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Target verifier (the text model). Given the accepted prefix plus a
|
|
108
|
+
* draft window, returns which leading draft tokens are accepted and the
|
|
109
|
+
* one corrected token at the first divergence (if any). When the draft
|
|
110
|
+
* is empty, the verifier still produces one token (plain autoregressive
|
|
111
|
+
* step). Honours `cancel.cancelled` between kernel ticks.
|
|
112
|
+
*/
|
|
113
|
+
export interface TargetVerifier {
|
|
114
|
+
verify(args: {
|
|
115
|
+
prefix: ReadonlyArray<TextToken>;
|
|
116
|
+
draft: ReadonlyArray<TextToken>;
|
|
117
|
+
cancel: { cancelled: boolean };
|
|
118
|
+
}): Promise<{
|
|
119
|
+
accepted: TextToken[];
|
|
120
|
+
/** Set when the verifier reached the natural end of generation. */
|
|
121
|
+
done: boolean;
|
|
122
|
+
}>;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
export interface VoicePipelineDeps {
|
|
126
|
+
scheduler: VoiceScheduler;
|
|
127
|
+
/**
|
|
128
|
+
* The live frame-fed ASR adapter (`voice/transcriber.ts` — fused
|
|
129
|
+
* `eliza_inference_asr_stream_*`, the fused batch adapter, or
|
|
130
|
+
* `MissingAsrTranscriber` deferring a hard failure). The pipeline drives
|
|
131
|
+
* it as a batch: it feeds the whole (VAD-gated) utterance buffer as one
|
|
132
|
+
* frame, `flush()`es to finalize, then splits the final transcript into
|
|
133
|
+
* contiguous text tokens (`splitTranscriptToTokens`). One `StreamingTranscriber`
|
|
134
|
+
* contract — there is no separate batch ASR interface.
|
|
135
|
+
*/
|
|
136
|
+
transcriber: StreamingTranscriber;
|
|
137
|
+
drafter: DraftProposer;
|
|
138
|
+
verifier: TargetVerifier;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
export interface VoicePipelineConfig {
|
|
142
|
+
/**
|
|
143
|
+
* Max tokens MTP drafts per round. Per-tier; small (≤8) so a
|
|
144
|
+
* rollback is cheap. The drafter and verifier overlap one round: while
|
|
145
|
+
* the verifier checks round k, the drafter speculates round k+1.
|
|
146
|
+
*/
|
|
147
|
+
maxDraftTokens: number;
|
|
148
|
+
/**
|
|
149
|
+
* Hard cap on generated tokens per turn (safety stop). The verifier's
|
|
150
|
+
* `done` flag is the normal stop; this bounds a runaway model.
|
|
151
|
+
*/
|
|
152
|
+
maxGeneratedTokens?: number;
|
|
153
|
+
/**
|
|
154
|
+
* A2 — when true, run streaming-ASR partials through a LocalAgreement-n
|
|
155
|
+
* stabilizer (`PartialStabilizer`) before splitting them into tokens
|
|
156
|
+
* and feeding the drafter. Off by default until the streaming-ASR
|
|
157
|
+
* fast path lands and validates the latency/quality trade. The
|
|
158
|
+
* `StreamingTranscriber.flush()`-driven batch path is unaffected (the
|
|
159
|
+
* stabilizer is a no-op on a single final partial).
|
|
160
|
+
*/
|
|
161
|
+
usePartialStabilizer?: boolean;
|
|
162
|
+
/**
|
|
163
|
+
* A2 — agreement count `n` for `PartialStabilizer` when enabled.
|
|
164
|
+
* Ignored when `usePartialStabilizer` is false. Default 2.
|
|
165
|
+
*/
|
|
166
|
+
partialStabilizerAgreementCount?: number;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
export interface VoicePipelineEvents {
|
|
170
|
+
/** Fired once, the instant ASR emits its final token (= drafter+verifier kick-off). */
|
|
171
|
+
onAsrComplete?(tokens: ReadonlyArray<TextToken>): void;
|
|
172
|
+
/**
|
|
173
|
+
* Fired exactly once per turn, right after the ASR phase finishes and
|
|
174
|
+
* before the first drafter/verifier round. ASR → text → TTS are
|
|
175
|
+
* sequential within a turn (AGENTS.md §4), so the idle ASR-model pages
|
|
176
|
+
* can be dropped now — wire this to `MmapRegionHandle.evictPages()`
|
|
177
|
+
* (`madvise(MADV_DONTNEED)` on POSIX) for the ASR region to claw back
|
|
178
|
+
* ~1 GB of peak RSS while TTS decodes. The pages page back in
|
|
179
|
+
* transparently on the next turn's `feed()`; a host that prefers to
|
|
180
|
+
* keep ASR resident simply doesn't supply this hook. May be async; the
|
|
181
|
+
* pipeline does not block on it (a slow trim must not delay first audio).
|
|
182
|
+
*/
|
|
183
|
+
onAsrPhaseComplete?(): void | Promise<void>;
|
|
184
|
+
/** Fired with each verifier accept/reject event before it hits the scheduler. */
|
|
185
|
+
onVerifierEvent?(event: VerifierStreamEvent): void;
|
|
186
|
+
/** Fired when the loop exits (verifier `done`, token cap, or barge-in cancel). */
|
|
187
|
+
onComplete?(reason: "done" | "token-cap" | "cancelled"): void;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const DEFAULT_MAX_GENERATED_TOKENS = 4096;
|
|
191
|
+
|
|
192
|
+
interface PipelineRun {
|
|
193
|
+
cancel: { cancelled: boolean };
|
|
194
|
+
done: Promise<"done" | "token-cap" | "cancelled">;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* One pipeline per active voice turn. Construct, call `run(audio)`,
|
|
199
|
+
* await the returned promise (or call `cancel()` for barge-in). The
|
|
200
|
+
* scheduler's barge-in controller also cancels an in-flight run — wire
|
|
201
|
+
* `bridge.triggerBargeIn()` and this run's `cancel()` to the same VAD
|
|
202
|
+
* signal so both the audio side (ring buffer drain) and the text side
|
|
203
|
+
* (stop drafting/verifying) abort together.
|
|
204
|
+
*/
|
|
205
|
+
export class VoicePipeline {
|
|
206
|
+
private readonly scheduler: VoiceScheduler;
|
|
207
|
+
private readonly transcriber: StreamingTranscriber;
|
|
208
|
+
private readonly drafter: DraftProposer;
|
|
209
|
+
private readonly verifier: TargetVerifier;
|
|
210
|
+
private readonly maxDraftTokens: number;
|
|
211
|
+
private readonly maxGeneratedTokens: number;
|
|
212
|
+
private readonly events: VoicePipelineEvents;
|
|
213
|
+
/**
|
|
214
|
+
* A2 — when `config.usePartialStabilizer === true`, this is the active
|
|
215
|
+
* `PartialStabilizer` instance. Streaming-ASR consumers feed partials
|
|
216
|
+
* through it; the batch path in `transcribeAll()` collapses on a single
|
|
217
|
+
* final partial so the stabilizer is a no-op there. Exposed via
|
|
218
|
+
* `getPartialStabilizer()` so the streaming-ASR adapter (separate agent)
|
|
219
|
+
* can plug straight in once it ships.
|
|
220
|
+
*/
|
|
221
|
+
private readonly partialStabilizer: PartialStabilizer | null;
|
|
222
|
+
private active: PipelineRun | null = null;
|
|
223
|
+
|
|
224
|
+
constructor(
|
|
225
|
+
deps: VoicePipelineDeps,
|
|
226
|
+
config: VoicePipelineConfig,
|
|
227
|
+
events: VoicePipelineEvents = {},
|
|
228
|
+
) {
|
|
229
|
+
this.scheduler = deps.scheduler;
|
|
230
|
+
this.transcriber = deps.transcriber;
|
|
231
|
+
this.drafter = deps.drafter;
|
|
232
|
+
this.verifier = deps.verifier;
|
|
233
|
+
this.maxDraftTokens = Math.max(1, Math.floor(config.maxDraftTokens));
|
|
234
|
+
this.maxGeneratedTokens = Math.max(
|
|
235
|
+
1,
|
|
236
|
+
Math.floor(config.maxGeneratedTokens ?? DEFAULT_MAX_GENERATED_TOKENS),
|
|
237
|
+
);
|
|
238
|
+
this.events = events;
|
|
239
|
+
this.partialStabilizer = config.usePartialStabilizer
|
|
240
|
+
? new PartialStabilizer({
|
|
241
|
+
agreementCount: config.partialStabilizerAgreementCount,
|
|
242
|
+
})
|
|
243
|
+
: null;
|
|
244
|
+
// A mic VAD barge-in cancels the audio side via the scheduler's
|
|
245
|
+
// barge-in controller; mirror it onto the text side so we stop
|
|
246
|
+
// drafting/verifying at the next kernel boundary too.
|
|
247
|
+
this.scheduler.bargeIn.attach({
|
|
248
|
+
onCancel: () => {
|
|
249
|
+
if (this.active) this.active.cancel.cancelled = true;
|
|
250
|
+
},
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/** True while a turn is in flight. */
|
|
255
|
+
isRunning(): boolean {
|
|
256
|
+
return this.active !== null;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* A2 — the active `PartialStabilizer` when the pipeline was built with
|
|
261
|
+
* `usePartialStabilizer: true`, otherwise null. The streaming-ASR
|
|
262
|
+
* adapter (separate agent) feeds partials into this instance and
|
|
263
|
+
* forwards the `stable` portion downstream. Returning null when the
|
|
264
|
+
* feature flag is off lets the adapter skip the work entirely.
|
|
265
|
+
*/
|
|
266
|
+
getPartialStabilizer(): PartialStabilizer | null {
|
|
267
|
+
return this.partialStabilizer;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Run one mic→speech turn. ASR streams first; the instant its last
|
|
272
|
+
* token lands, the drafter and verifier kick off concurrently and
|
|
273
|
+
* accepted tokens flow into the scheduler's chunker on the same tick.
|
|
274
|
+
* Resolves with the exit reason. Throws if a turn is already running.
|
|
275
|
+
*/
|
|
276
|
+
async run(
|
|
277
|
+
audio: TranscriptionAudio,
|
|
278
|
+
): Promise<"done" | "token-cap" | "cancelled"> {
|
|
279
|
+
if (this.active) {
|
|
280
|
+
throw new Error(
|
|
281
|
+
"[voice-pipeline] a turn is already running; cancel() it or await the previous run() first",
|
|
282
|
+
);
|
|
283
|
+
}
|
|
284
|
+
const cancel = { cancelled: false };
|
|
285
|
+
const done = this.execute(audio, cancel);
|
|
286
|
+
this.active = { cancel, done };
|
|
287
|
+
try {
|
|
288
|
+
return await done;
|
|
289
|
+
} finally {
|
|
290
|
+
this.active = null;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Barge-in: cancel the in-flight turn. Stops ASR, stops the
|
|
296
|
+
* drafter/verifier loop at the next kernel boundary, and triggers the
|
|
297
|
+
* scheduler's barge-in (ring buffer drain + chunker flush + in-flight
|
|
298
|
+
* TTS cancel). No-op when no turn is running.
|
|
299
|
+
*/
|
|
300
|
+
cancel(): void {
|
|
301
|
+
if (this.active) this.active.cancel.cancelled = true;
|
|
302
|
+
this.scheduler.bargeIn.onMicActive();
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
private async execute(
|
|
306
|
+
audio: TranscriptionAudio,
|
|
307
|
+
cancel: { cancelled: boolean },
|
|
308
|
+
): Promise<"done" | "token-cap" | "cancelled"> {
|
|
309
|
+
// --- ASR phase -----------------------------------------------------
|
|
310
|
+
// Drive the live `StreamingTranscriber` as a batch: feed the whole
|
|
311
|
+
// (already VAD-gated) utterance buffer as one frame, `flush()` to
|
|
312
|
+
// force-finalize, and split the final transcript into contiguous text
|
|
313
|
+
// tokens. The fused Qwen3-ASR decoder shares the text vocab (AGENTS.md
|
|
314
|
+
// §1), so when it reports token ids alongside the transcript they ride
|
|
315
|
+
// along as `TextToken.id`; when it omits them the word-chunk fallback
|
|
316
|
+
// is used.
|
|
317
|
+
const asrTokens = await this.transcribeAll(audio, cancel);
|
|
318
|
+
if (cancel.cancelled) return this.finish("cancelled");
|
|
319
|
+
// The instant ASR's last token has been emitted: drafter + verifier
|
|
320
|
+
// start. (`onAsrComplete` is the kick-off observability hook.)
|
|
321
|
+
this.events.onAsrComplete?.(asrTokens);
|
|
322
|
+
// ASR is done for this turn; text generation + TTS run next and never
|
|
323
|
+
// touch the ASR model again until the next turn. Let the host drop the
|
|
324
|
+
// idle ASR pages now (within-turn RSS trim, AGENTS.md §4). Fire-and-
|
|
325
|
+
// forget: a slow `madvise` must not delay the drafter kick-off.
|
|
326
|
+
if (this.events.onAsrPhaseComplete) {
|
|
327
|
+
void Promise.resolve(this.events.onAsrPhaseComplete()).catch(() => {});
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// --- overlapped drafter ∥ verifier loop ---------------------------
|
|
331
|
+
// Each round:
|
|
332
|
+
// 1. take the drafter's N proposed tokens (the previous round's
|
|
333
|
+
// `propose` ran concurrently with the previous verify),
|
|
334
|
+
// 2. SPECULATIVELY push them to the phrase chunker now — TTS for
|
|
335
|
+
// drafted phrases starts immediately (low first-audio latency),
|
|
336
|
+
// 3. concurrently: kick the *next* draft AND run the verifier,
|
|
337
|
+
// 4. when the verifier returns, drop the not-yet-spoken TTS chunks
|
|
338
|
+
// for any draft positions it rejected (rollback queue), then
|
|
339
|
+
// push the verifier's corrected token,
|
|
340
|
+
// 5. if a reject happened, the next draft we kicked is stale — drop
|
|
341
|
+
// it and re-draft from the corrected prefix.
|
|
342
|
+
// The drafter and verifier passes for a round overlap; that is the
|
|
343
|
+
// whole point ("the moment ASR emits its last token the MTP
|
|
344
|
+
// drafter starts drafting AND the target starts verifying").
|
|
345
|
+
const prefix: TextToken[] = [...asrTokens];
|
|
346
|
+
let nextIndex =
|
|
347
|
+
asrTokens.length > 0 ? asrTokens[asrTokens.length - 1].index + 1 : 0;
|
|
348
|
+
let generated = 0;
|
|
349
|
+
|
|
350
|
+
let pendingDraft = this.drafter.propose({
|
|
351
|
+
prefix,
|
|
352
|
+
maxDraft: this.maxDraftTokens,
|
|
353
|
+
cancel,
|
|
354
|
+
});
|
|
355
|
+
|
|
356
|
+
for (;;) {
|
|
357
|
+
if (cancel.cancelled) return this.finish("cancelled");
|
|
358
|
+
const draft = await pendingDraft;
|
|
359
|
+
if (cancel.cancelled) return this.finish("cancelled");
|
|
360
|
+
const indexedDraft = draft.map((t, i) => ({
|
|
361
|
+
index: nextIndex + i,
|
|
362
|
+
text: t.text,
|
|
363
|
+
}));
|
|
364
|
+
|
|
365
|
+
// (2) speculative TTS — push drafted tokens to the chunker now.
|
|
366
|
+
let speculated = 0;
|
|
367
|
+
for (const t of indexedDraft) {
|
|
368
|
+
if (generated + speculated >= this.maxGeneratedTokens) break;
|
|
369
|
+
await this.scheduler.accept(t);
|
|
370
|
+
speculated++;
|
|
371
|
+
}
|
|
372
|
+
if (speculated > 0) {
|
|
373
|
+
this.events.onVerifierEvent?.({
|
|
374
|
+
kind: "accept",
|
|
375
|
+
tokens: indexedDraft.slice(0, speculated),
|
|
376
|
+
});
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// (3) OVERLAP: kick next draft on the optimistic prefix, then verify.
|
|
380
|
+
const optimisticPrefix = [...prefix, ...indexedDraft];
|
|
381
|
+
let nextDraft: Promise<TextToken[]> | null = this.drafter.propose({
|
|
382
|
+
prefix: optimisticPrefix,
|
|
383
|
+
maxDraft: this.maxDraftTokens,
|
|
384
|
+
cancel,
|
|
385
|
+
});
|
|
386
|
+
const result = await this.verifier.verify({
|
|
387
|
+
prefix,
|
|
388
|
+
draft: indexedDraft,
|
|
389
|
+
cancel,
|
|
390
|
+
});
|
|
391
|
+
if (cancel.cancelled) return this.finish("cancelled");
|
|
392
|
+
|
|
393
|
+
// (4) how many leading draft tokens did the verifier keep?
|
|
394
|
+
const acceptedFromDraft = countMatchingPrefix(
|
|
395
|
+
result.accepted,
|
|
396
|
+
indexedDraft,
|
|
397
|
+
);
|
|
398
|
+
if (acceptedFromDraft < indexedDraft.length) {
|
|
399
|
+
// Rejected draft tail → drop the matching not-yet-spoken TTS chunks.
|
|
400
|
+
const range: RejectedTokenRange = {
|
|
401
|
+
fromIndex: nextIndex + acceptedFromDraft,
|
|
402
|
+
toIndex: nextIndex + indexedDraft.length - 1,
|
|
403
|
+
};
|
|
404
|
+
this.events.onVerifierEvent?.({
|
|
405
|
+
kind: "reject",
|
|
406
|
+
tokens: indexedDraft.slice(acceptedFromDraft),
|
|
407
|
+
});
|
|
408
|
+
await this.scheduler.reject(range);
|
|
409
|
+
nextDraft = null; // (5) stale — re-draft from the corrected prefix
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Commit the accepted prefix to our running state, then push the
|
|
413
|
+
// verifier's correction / bonus tokens (everything past the draft
|
|
414
|
+
// tokens it kept) to the chunker on this same tick.
|
|
415
|
+
for (let i = 0; i < acceptedFromDraft; i++) {
|
|
416
|
+
prefix.push(indexedDraft[i]);
|
|
417
|
+
generated++;
|
|
418
|
+
}
|
|
419
|
+
nextIndex += acceptedFromDraft;
|
|
420
|
+
|
|
421
|
+
const extra = result.accepted.slice(acceptedFromDraft);
|
|
422
|
+
const extraIndexed = extra.map((t, i) => ({
|
|
423
|
+
index: nextIndex + i,
|
|
424
|
+
text: t.text,
|
|
425
|
+
}));
|
|
426
|
+
if (extraIndexed.length > 0) {
|
|
427
|
+
this.events.onVerifierEvent?.({ kind: "accept", tokens: extraIndexed });
|
|
428
|
+
for (const t of extraIndexed) {
|
|
429
|
+
if (generated >= this.maxGeneratedTokens) break;
|
|
430
|
+
await this.scheduler.accept(t);
|
|
431
|
+
prefix.push(t);
|
|
432
|
+
nextIndex = t.index + 1;
|
|
433
|
+
generated++;
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
if (result.done) {
|
|
438
|
+
await this.scheduler.flushPending();
|
|
439
|
+
return this.finish("done");
|
|
440
|
+
}
|
|
441
|
+
if (generated >= this.maxGeneratedTokens) {
|
|
442
|
+
await this.scheduler.flushPending();
|
|
443
|
+
return this.finish("token-cap");
|
|
444
|
+
}
|
|
445
|
+
if (cancel.cancelled) return this.finish("cancelled");
|
|
446
|
+
|
|
447
|
+
pendingDraft =
|
|
448
|
+
nextDraft ??
|
|
449
|
+
this.drafter.propose({
|
|
450
|
+
prefix,
|
|
451
|
+
maxDraft: this.maxDraftTokens,
|
|
452
|
+
cancel,
|
|
453
|
+
});
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
/**
|
|
458
|
+
* Feed the whole utterance buffer to the live transcriber, finalize,
|
|
459
|
+
* and return the final transcript as contiguous text tokens. The
|
|
460
|
+
* transcriber is disposed afterwards (it is one per turn). A barge-in
|
|
461
|
+
* cancel checked before `flush()` short-circuits to an empty list.
|
|
462
|
+
*/
|
|
463
|
+
private async transcribeAll(
|
|
464
|
+
audio: TranscriptionAudio,
|
|
465
|
+
cancel: { cancelled: boolean },
|
|
466
|
+
): Promise<TextToken[]> {
|
|
467
|
+
try {
|
|
468
|
+
if (cancel.cancelled) return [];
|
|
469
|
+
const frame: PcmFrame = {
|
|
470
|
+
pcm: audio.pcm,
|
|
471
|
+
sampleRate: audio.sampleRate,
|
|
472
|
+
timestampMs: 0,
|
|
473
|
+
};
|
|
474
|
+
this.transcriber.feed(frame);
|
|
475
|
+
const final = await this.transcriber.flush();
|
|
476
|
+
if (cancel.cancelled) return [];
|
|
477
|
+
return splitTranscriptToTokens(final.partial, 0, final.tokens);
|
|
478
|
+
} finally {
|
|
479
|
+
this.transcriber.dispose();
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
private finish(
|
|
484
|
+
reason: "done" | "token-cap" | "cancelled",
|
|
485
|
+
): "done" | "token-cap" | "cancelled" {
|
|
486
|
+
this.events.onComplete?.(reason);
|
|
487
|
+
return reason;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* How many leading tokens of `accepted` match `draft` by text. The
|
|
493
|
+
* verifier accepts a prefix of the draft then emits a correction; this
|
|
494
|
+
* counts the accepted-from-draft prefix length so the rest of the draft
|
|
495
|
+
* (the rejected tail) can be rolled back from the TTS chunker.
|
|
496
|
+
*/
|
|
497
|
+
function countMatchingPrefix(
|
|
498
|
+
accepted: ReadonlyArray<TextToken>,
|
|
499
|
+
draft: ReadonlyArray<TextToken>,
|
|
500
|
+
): number {
|
|
501
|
+
const n = Math.min(accepted.length, draft.length);
|
|
502
|
+
let i = 0;
|
|
503
|
+
while (i < n && accepted[i].text === draft[i].text) i++;
|
|
504
|
+
return i;
|
|
505
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Optimistic prefill client (C7) — implements `/v1/prefill` against the
|
|
3
|
+
* llama.cpp REST API in three phases:
|
|
4
|
+
*
|
|
5
|
+
* Phase 1 — `slot/save`: snapshot the pre-user-message KV state so a
|
|
6
|
+
* rollback can restore it if speech continues (SPEECH_ACTIVE_REBOUND).
|
|
7
|
+
*
|
|
8
|
+
* Phase 2 — `POST /completion` with stream=false + cache_prompt=true:
|
|
9
|
+
* run the model's prefill over `partialText` without sampling any
|
|
10
|
+
* output tokens. This warms the KV cache so the subsequent real
|
|
11
|
+
* generation can skip one full prefill RTT.
|
|
12
|
+
*
|
|
13
|
+
* Phase 3 — `slot/save` again: snapshot the post-prefill KV state under a
|
|
14
|
+
* separate name. The voice state machine passes this handle to the
|
|
15
|
+
* verifier so generation resumes from the prefilled position.
|
|
16
|
+
*
|
|
17
|
+
* The upstream `/v1/prefill` endpoint is absent — the fork PR that
|
|
18
|
+
* adds it is tracked in `docs/eliza-1-optimistic-rollback.md`. Until it
|
|
19
|
+
* lands, phases 1–3 are emulated via the existing slot-save REST path. When
|
|
20
|
+
* the upstream endpoint ships the body of `prefillOptimistic` switches to a
|
|
21
|
+
* single REST call — callers see no signature change.
|
|
22
|
+
*
|
|
23
|
+
* Upstream endpoint contract: replace phases 2+3 with a single
|
|
24
|
+
* `POST /v1/prefill { slotId, partialText, eotProb }` once llama.cpp exposes
|
|
25
|
+
* it. That call must run the model prefill against `slotId`, save the resulting
|
|
26
|
+
* KV checkpoint, and return `{ handle, eotProb }`.
|
|
27
|
+
*/
|
|
28
|
+
import type { CheckpointHandle, CheckpointManagerLike } from "./checkpoint-manager";
|
|
29
|
+
import type { ContextPartial } from "./eager-context-builder";
|
|
30
|
+
/**
|
|
31
|
+
* Input contract for the optimistic prefill call. `partialText` is the
|
|
32
|
+
* current partial transcript; `eotProb` is the caller's estimate that the
|
|
33
|
+
* user has stopped speaking (from VAD hangover progress or the EOT classifier).
|
|
34
|
+
*/
|
|
35
|
+
export interface PrefillOptimisticArgs {
|
|
36
|
+
/** Base URL of the llama-server (`http://host:port`). */
|
|
37
|
+
baseUrl: string;
|
|
38
|
+
/** Slot id pinning this conversation. */
|
|
39
|
+
slotId: string;
|
|
40
|
+
/** Partial transcript to prefill against. Non-empty. */
|
|
41
|
+
partialText: string;
|
|
42
|
+
/**
|
|
43
|
+
* Probability the partial is end-of-turn (0..1). Today recorded as
|
|
44
|
+
* telemetry only; once `/v1/prefill` lands the server uses it to decide
|
|
45
|
+
* whether to also kick the drafter inline.
|
|
46
|
+
*/
|
|
47
|
+
eotProb: number;
|
|
48
|
+
/**
|
|
49
|
+
* Deterministic context from `EagerContextBuilder` (C3). Used to build the
|
|
50
|
+
* system prompt passed to the prefill `/completion` call so the KV cache
|
|
51
|
+
* covers both the system prompt and the partial transcript. Optional — when
|
|
52
|
+
* absent, only the partial transcript is prefilled.
|
|
53
|
+
*/
|
|
54
|
+
context?: ContextPartial;
|
|
55
|
+
}
|
|
56
|
+
export interface PrefillOptimisticResult {
|
|
57
|
+
/**
|
|
58
|
+
* Handle to the POST-prefill KV snapshot. Pass to
|
|
59
|
+
* `CheckpointManager.restoreCheckpoint` on SPEECH_END so the verifier
|
|
60
|
+
* resumes from the prefilled position.
|
|
61
|
+
*/
|
|
62
|
+
checkpointHandle: CheckpointHandle;
|
|
63
|
+
/**
|
|
64
|
+
* Approximate token count of the prefilled text. Derived from a rough
|
|
65
|
+
* whitespace tokenizer since the REST emulation path doesn't return a token count;
|
|
66
|
+
* once the upstream endpoint lands, the server returns the real count.
|
|
67
|
+
*/
|
|
68
|
+
tokenCount: number;
|
|
69
|
+
/**
|
|
70
|
+
* Wall-clock milliseconds the prefill round-trip took (phases 1–3).
|
|
71
|
+
*/
|
|
72
|
+
prefillMs: number;
|
|
73
|
+
/**
|
|
74
|
+
* Backend label. `slot-save-emulation` = pre-upstream emulation path;
|
|
75
|
+
* `prefill-v1` = native `/v1/prefill` endpoint.
|
|
76
|
+
*/
|
|
77
|
+
backend: "slot-save-emulation" | "prefill-v1";
|
|
78
|
+
/**
|
|
79
|
+
* End-of-turn probability echoed back from the server. Today equals the
|
|
80
|
+
* caller's `eotProb` (the emulation path has nothing to refine it with); once the
|
|
81
|
+
* upstream endpoint lands, the server returns its own model estimate.
|
|
82
|
+
*/
|
|
83
|
+
eotProb: number;
|
|
84
|
+
}
|
|
85
|
+
export interface PrefillOptimisticOptions {
|
|
86
|
+
checkpointManager: CheckpointManagerLike;
|
|
87
|
+
/**
|
|
88
|
+
* Name to use for the PRE-prefill snapshot (C1 — used by the rollback path
|
|
89
|
+
* on SPEECH_ACTIVE_REBOUND). Defaults to `pre-prefill`.
|
|
90
|
+
*/
|
|
91
|
+
preCheckpointName?: string;
|
|
92
|
+
/**
|
|
93
|
+
* Name to use for the POST-prefill snapshot (the one the verifier starts
|
|
94
|
+
* from on SPEECH_END). Defaults to `post-prefill`.
|
|
95
|
+
*/
|
|
96
|
+
postCheckpointName?: string;
|
|
97
|
+
/**
|
|
98
|
+
* Optional fetch implementation for tests. Defaults to global `fetch`.
|
|
99
|
+
*/
|
|
100
|
+
fetchImpl?: typeof fetch;
|
|
101
|
+
/**
|
|
102
|
+
* Request timeout for the `/completion` prefill call (ms). Default 5 000 ms.
|
|
103
|
+
* The call is a no-sample prefill-only pass, so it should complete in
|
|
104
|
+
* O(transcript_tokens / throughput) — typically well under 1 s for short
|
|
105
|
+
* partials.
|
|
106
|
+
*/
|
|
107
|
+
prefillTimeoutMs?: number;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Run the three-phase optimistic prefill and return a checkpoint handle for
|
|
111
|
+
* the post-prefill KV state.
|
|
112
|
+
*
|
|
113
|
+
* Voice state machine wiring:
|
|
114
|
+
* - Call on `PAUSE_TENTATIVE` entry with `eotProb` from the EOT classifier.
|
|
115
|
+
* - On `SPEECH_ACTIVE_REBOUND` (within rollback window): restore to the
|
|
116
|
+
* PRE-prefill checkpoint (C1 saved in phase 1) via the checkpoint manager.
|
|
117
|
+
* The post-prefill handle returned here is no longer needed.
|
|
118
|
+
* - On `SPEECH_END`: pass `result.checkpointHandle` to the verifier so it
|
|
119
|
+
* can resume generation from the prefilled KV state, saving one full
|
|
120
|
+
* prefill RTT.
|
|
121
|
+
*/
|
|
122
|
+
export declare function prefillOptimistic(args: PrefillOptimisticArgs, opts: PrefillOptimisticOptions): Promise<PrefillOptimisticResult>;
|
|
123
|
+
//# sourceMappingURL=prefill-client.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prefill-client.d.ts","sourceRoot":"","sources":["prefill-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAGH,OAAO,KAAK,EACX,gBAAgB,EAChB,qBAAqB,EACrB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAM9D;;;;GAIG;AACH,MAAM,WAAW,qBAAqB;IACrC,yDAAyD;IACzD,OAAO,EAAE,MAAM,CAAC;IAChB,yCAAyC;IACzC,MAAM,EAAE,MAAM,CAAC;IACf,yDAAyD;IACzD,WAAW,EAAE,MAAM,CAAC;IACpB;;;;OAIG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;;;;OAKG;IACH,OAAO,CAAC,EAAE,cAAc,CAAC;CACzB;AAED,MAAM,WAAW,uBAAuB;IACvC;;;;OAIG;IACH,gBAAgB,EAAE,gBAAgB,CAAC;IACnC;;;;OAIG;IACH,UAAU,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,OAAO,EAAE,qBAAqB,GAAG,YAAY,CAAC;IAC9C;;;;OAIG;IACH,OAAO,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,wBAAwB;IACxC,iBAAiB,EAAE,qBAAqB,CAAC;IACzC;;;OAGG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B;;;OAGG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;IACzB;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC1B;AAUD;;;;;;;;;;;;GAYG;AACH,wBAAsB,iBAAiB,CACtC,IAAI,EAAE,qBAAqB,EAC3B,IAAI,EAAE,wBAAwB,GAC5B,OAAO,CAAC,uBAAuB,CAAC,CAoDlC"}
|