@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A2 — LocalAgreement-n streaming-ASR partial stabilizer.
|
|
3
|
+
*
|
|
4
|
+
* Streaming ASR (the Qwen3-ASR fused build running on partial windows)
|
|
5
|
+
* emits a fresh partial transcript on every
|
|
6
|
+
* audio frame. Each partial can revise tokens the previous partial
|
|
7
|
+
* already showed — "the cat sa" → "the cat sat" → "the cat sat on" is
|
|
8
|
+
* fine, but "the cat sa" → "the cap sat" rewrites earlier text. Handing
|
|
9
|
+
* a TTS chunker every revision causes audible stutter when the agent's
|
|
10
|
+
* drafter starts speaking text the verifier later rejects.
|
|
11
|
+
*
|
|
12
|
+
* The LocalAgreement-n trick: only commit a prefix to downstream once it
|
|
13
|
+
* has appeared identically in `n` consecutive partials. Below that
|
|
14
|
+
* threshold the text is "pending" — visible to UI for confirmation
|
|
15
|
+
* latency, but never sent to the drafter / phrase chunker. n=2 is the
|
|
16
|
+
* sweet spot for voice — large enough to suppress single-frame ASR
|
|
17
|
+
* jitter, small enough that the stable prefix tracks the speaker
|
|
18
|
+
* within ~one extra frame.
|
|
19
|
+
*
|
|
20
|
+
* Wiring: the streaming-ASR adapter calls `feed(partial)` per frame; the
|
|
21
|
+
* `stable` portion is what flows into `splitTranscriptToTokens` and the
|
|
22
|
+
* drafter. `pending` is suffix-only, fed to UI for visual feedback. The
|
|
23
|
+
* stabilizer is feature-flagged off until the streaming-ASR backend is
|
|
24
|
+
* wired (see `voice/pipeline.ts`'s `usePartialStabilizer`).
|
|
25
|
+
*
|
|
26
|
+
* No `any`, no fallbacks: a malformed partial (e.g. an empty string)
|
|
27
|
+
* collapses the stable prefix to whatever the agreement window still
|
|
28
|
+
* supports — this is correctness, not a swallow.
|
|
29
|
+
*/
|
|
30
|
+
export interface PartialStabilizerOptions {
|
|
31
|
+
/**
|
|
32
|
+
* Number of consecutive identical partials a token has to appear in
|
|
33
|
+
* before it migrates from `pending` → `stable`. Default 2 (the
|
|
34
|
+
* LocalAgreement-2 setting that the streaming-ASR literature finds
|
|
35
|
+
* close to optimal for English voice input).
|
|
36
|
+
*/
|
|
37
|
+
agreementCount?: number;
|
|
38
|
+
}
|
|
39
|
+
export interface StabilizerOutput {
|
|
40
|
+
/** The agreed-on prefix — safe to hand the drafter / phrase chunker. */
|
|
41
|
+
stable: string;
|
|
42
|
+
/**
|
|
43
|
+
* The suffix still awaiting `n` matching partials — surface in UI but
|
|
44
|
+
* do not commit. Concatenating `stable + pending` reconstructs the
|
|
45
|
+
* latest raw partial.
|
|
46
|
+
*/
|
|
47
|
+
pending: string;
|
|
48
|
+
}
|
|
49
|
+
export declare class PartialStabilizer {
|
|
50
|
+
private readonly agreementCount;
|
|
51
|
+
/**
|
|
52
|
+
* The most recent partials, oldest first. We only need the last
|
|
53
|
+
* `agreementCount` entries — the agreed prefix is the intersection of
|
|
54
|
+
* all of them. Length 0 before any feed.
|
|
55
|
+
*/
|
|
56
|
+
private history;
|
|
57
|
+
/** The longest committed stable prefix so far. Monotonically grows. */
|
|
58
|
+
private committed;
|
|
59
|
+
constructor(options?: PartialStabilizerOptions);
|
|
60
|
+
/**
|
|
61
|
+
* Feed the latest streaming-ASR partial. Returns the stable / pending
|
|
62
|
+
* split. The stable prefix is monotonically non-decreasing across calls
|
|
63
|
+
* — once a span has been agreed `n` times it stays committed even if a
|
|
64
|
+
* later partial briefly disagrees (the ASR will catch up; rolling back
|
|
65
|
+
* would cause downstream stutter).
|
|
66
|
+
*/
|
|
67
|
+
feed(partial: string): StabilizerOutput;
|
|
68
|
+
/** The current committed stable prefix (read-only view). */
|
|
69
|
+
stable(): string;
|
|
70
|
+
/** Clear all history. Call at utterance boundaries. */
|
|
71
|
+
reset(): void;
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=partial-stabilizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"partial-stabilizer.d.ts","sourceRoot":"","sources":["partial-stabilizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAEH,MAAM,WAAW,wBAAwB;IACxC;;;;;OAKG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,WAAW,gBAAgB;IAChC,wEAAwE;IACxE,MAAM,EAAE,MAAM,CAAC;IACf;;;;OAIG;IACH,OAAO,EAAE,MAAM,CAAC;CAChB;AAgBD,qBAAa,iBAAiB;IAC7B,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC;;;;OAIG;IACH,OAAO,CAAC,OAAO,CAAgB;IAC/B,uEAAuE;IACvE,OAAO,CAAC,SAAS,CAAM;gBAEX,OAAO,GAAE,wBAA6B;IAUlD;;;;;;OAMG;IACH,IAAI,CAAC,OAAO,EAAE,MAAM,GAAG,gBAAgB;IAoCvC,4DAA4D;IAC5D,MAAM,IAAI,MAAM;IAIhB,uDAAuD;IACvD,KAAK,IAAI,IAAI;CAIb"}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { PartialStabilizer } from "./partial-stabilizer";
|
|
3
|
+
|
|
4
|
+
describe("PartialStabilizer (LocalAgreement-n)", () => {
|
|
5
|
+
it("never commits anything from a single partial (n=2)", () => {
|
|
6
|
+
const s = new PartialStabilizer();
|
|
7
|
+
const out = s.feed("the cat");
|
|
8
|
+
expect(out.stable).toBe("");
|
|
9
|
+
expect(out.pending).toBe("the cat");
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it("commits the common prefix once a second partial agrees", () => {
|
|
13
|
+
const s = new PartialStabilizer();
|
|
14
|
+
s.feed("the cat sa");
|
|
15
|
+
const out = s.feed("the cat sat");
|
|
16
|
+
// The two partials share the prefix "the cat sa".
|
|
17
|
+
expect(out.stable).toBe("the cat sa");
|
|
18
|
+
expect(out.pending).toBe("t");
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it("extends the stable prefix as more partials agree", () => {
|
|
22
|
+
const s = new PartialStabilizer();
|
|
23
|
+
s.feed("the cat sa");
|
|
24
|
+
s.feed("the cat sat");
|
|
25
|
+
// Third partial agrees on "the cat sat" with the second.
|
|
26
|
+
const out = s.feed("the cat sat on");
|
|
27
|
+
expect(out.stable).toBe("the cat sat");
|
|
28
|
+
expect(out.pending).toBe(" on");
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
it("does not roll back a committed prefix when a later partial briefly disagrees", () => {
|
|
32
|
+
const s = new PartialStabilizer();
|
|
33
|
+
s.feed("the cat sa");
|
|
34
|
+
s.feed("the cat sat"); // stable becomes "the cat sa"
|
|
35
|
+
const out = s.feed("the dog");
|
|
36
|
+
expect(out.stable).toBe("the cat sa");
|
|
37
|
+
// The new partial does not start with the committed prefix — the
|
|
38
|
+
// whole new partial surfaces as pending so UI shows the fresh text.
|
|
39
|
+
expect(out.pending).toBe("the dog");
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("respects a custom agreementCount", () => {
|
|
43
|
+
const s = new PartialStabilizer({ agreementCount: 3 });
|
|
44
|
+
expect(s.feed("hello").stable).toBe("");
|
|
45
|
+
expect(s.feed("hello world").stable).toBe("");
|
|
46
|
+
// Three identical partials needed before any prefix commits.
|
|
47
|
+
const out = s.feed("hello world!");
|
|
48
|
+
expect(out.stable).toBe("hello");
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("reset clears all state", () => {
|
|
52
|
+
const s = new PartialStabilizer();
|
|
53
|
+
s.feed("abc");
|
|
54
|
+
s.feed("abcd"); // stable = "abc"
|
|
55
|
+
expect(s.stable()).toBe("abc");
|
|
56
|
+
s.reset();
|
|
57
|
+
expect(s.stable()).toBe("");
|
|
58
|
+
expect(s.feed("xyz").stable).toBe("");
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it("rejects an invalid agreementCount", () => {
|
|
62
|
+
expect(() => new PartialStabilizer({ agreementCount: 0 })).toThrow();
|
|
63
|
+
expect(() => new PartialStabilizer({ agreementCount: -1 })).toThrow();
|
|
64
|
+
expect(
|
|
65
|
+
() => new PartialStabilizer({ agreementCount: Number.NaN }),
|
|
66
|
+
).toThrow();
|
|
67
|
+
});
|
|
68
|
+
});
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A2 — LocalAgreement-n streaming-ASR partial stabilizer.
|
|
3
|
+
*
|
|
4
|
+
* Streaming ASR (the Qwen3-ASR fused build running on partial windows)
|
|
5
|
+
* emits a fresh partial transcript on every
|
|
6
|
+
* audio frame. Each partial can revise tokens the previous partial
|
|
7
|
+
* already showed — "the cat sa" → "the cat sat" → "the cat sat on" is
|
|
8
|
+
* fine, but "the cat sa" → "the cap sat" rewrites earlier text. Handing
|
|
9
|
+
* a TTS chunker every revision causes audible stutter when the agent's
|
|
10
|
+
* drafter starts speaking text the verifier later rejects.
|
|
11
|
+
*
|
|
12
|
+
* The LocalAgreement-n trick: only commit a prefix to downstream once it
|
|
13
|
+
* has appeared identically in `n` consecutive partials. Below that
|
|
14
|
+
* threshold the text is "pending" — visible to UI for confirmation
|
|
15
|
+
* latency, but never sent to the drafter / phrase chunker. n=2 is the
|
|
16
|
+
* sweet spot for voice — large enough to suppress single-frame ASR
|
|
17
|
+
* jitter, small enough that the stable prefix tracks the speaker
|
|
18
|
+
* within ~one extra frame.
|
|
19
|
+
*
|
|
20
|
+
* Wiring: the streaming-ASR adapter calls `feed(partial)` per frame; the
|
|
21
|
+
* `stable` portion is what flows into `splitTranscriptToTokens` and the
|
|
22
|
+
* drafter. `pending` is suffix-only, fed to UI for visual feedback. The
|
|
23
|
+
* stabilizer is feature-flagged off until the streaming-ASR backend is
|
|
24
|
+
* wired (see `voice/pipeline.ts`'s `usePartialStabilizer`).
|
|
25
|
+
*
|
|
26
|
+
* No `any`, no fallbacks: a malformed partial (e.g. an empty string)
|
|
27
|
+
* collapses the stable prefix to whatever the agreement window still
|
|
28
|
+
* supports — this is correctness, not a swallow.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
export interface PartialStabilizerOptions {
|
|
32
|
+
/**
|
|
33
|
+
* Number of consecutive identical partials a token has to appear in
|
|
34
|
+
* before it migrates from `pending` → `stable`. Default 2 (the
|
|
35
|
+
* LocalAgreement-2 setting that the streaming-ASR literature finds
|
|
36
|
+
* close to optimal for English voice input).
|
|
37
|
+
*/
|
|
38
|
+
agreementCount?: number;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface StabilizerOutput {
|
|
42
|
+
/** The agreed-on prefix — safe to hand the drafter / phrase chunker. */
|
|
43
|
+
stable: string;
|
|
44
|
+
/**
|
|
45
|
+
* The suffix still awaiting `n` matching partials — surface in UI but
|
|
46
|
+
* do not commit. Concatenating `stable + pending` reconstructs the
|
|
47
|
+
* latest raw partial.
|
|
48
|
+
*/
|
|
49
|
+
pending: string;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const DEFAULT_AGREEMENT_COUNT = 2;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Find the longest character prefix shared by both strings. Character-level
|
|
56
|
+
* (not word-level) so a partial that finished a word ("sa" → "sat") still
|
|
57
|
+
* shows agreement on the shared prefix "sa" and only "t" stays pending.
|
|
58
|
+
*/
|
|
59
|
+
function commonPrefixLength(a: string, b: string): number {
|
|
60
|
+
const n = Math.min(a.length, b.length);
|
|
61
|
+
let i = 0;
|
|
62
|
+
while (i < n && a.charCodeAt(i) === b.charCodeAt(i)) i++;
|
|
63
|
+
return i;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export class PartialStabilizer {
|
|
67
|
+
private readonly agreementCount: number;
|
|
68
|
+
/**
|
|
69
|
+
* The most recent partials, oldest first. We only need the last
|
|
70
|
+
* `agreementCount` entries — the agreed prefix is the intersection of
|
|
71
|
+
* all of them. Length 0 before any feed.
|
|
72
|
+
*/
|
|
73
|
+
private history: string[] = [];
|
|
74
|
+
/** The longest committed stable prefix so far. Monotonically grows. */
|
|
75
|
+
private committed = "";
|
|
76
|
+
|
|
77
|
+
constructor(options: PartialStabilizerOptions = {}) {
|
|
78
|
+
const requested = options.agreementCount ?? DEFAULT_AGREEMENT_COUNT;
|
|
79
|
+
if (!Number.isFinite(requested) || requested < 1) {
|
|
80
|
+
throw new Error(
|
|
81
|
+
`[partial-stabilizer] agreementCount must be a finite integer >= 1; got ${String(requested)}`,
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
this.agreementCount = Math.floor(requested);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Feed the latest streaming-ASR partial. Returns the stable / pending
|
|
89
|
+
* split. The stable prefix is monotonically non-decreasing across calls
|
|
90
|
+
* — once a span has been agreed `n` times it stays committed even if a
|
|
91
|
+
* later partial briefly disagrees (the ASR will catch up; rolling back
|
|
92
|
+
* would cause downstream stutter).
|
|
93
|
+
*/
|
|
94
|
+
feed(partial: string): StabilizerOutput {
|
|
95
|
+
this.history.push(partial);
|
|
96
|
+
if (this.history.length > this.agreementCount) {
|
|
97
|
+
this.history.shift();
|
|
98
|
+
}
|
|
99
|
+
if (this.history.length < this.agreementCount) {
|
|
100
|
+
// Not enough partials yet to confirm anything new — only the
|
|
101
|
+
// already-committed prefix is stable.
|
|
102
|
+
return {
|
|
103
|
+
stable: this.committed,
|
|
104
|
+
pending: partial.startsWith(this.committed)
|
|
105
|
+
? partial.slice(this.committed.length)
|
|
106
|
+
: partial,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
// Intersect: agreed prefix = common prefix across the whole agreement
|
|
110
|
+
// window.
|
|
111
|
+
let agreed = this.history[0];
|
|
112
|
+
for (let i = 1; i < this.history.length; i++) {
|
|
113
|
+
const sharedLen = commonPrefixLength(agreed, this.history[i]);
|
|
114
|
+
if (sharedLen < agreed.length) {
|
|
115
|
+
agreed = agreed.slice(0, sharedLen);
|
|
116
|
+
}
|
|
117
|
+
if (agreed.length === 0) break;
|
|
118
|
+
}
|
|
119
|
+
if (agreed.length > this.committed.length) {
|
|
120
|
+
this.committed = agreed;
|
|
121
|
+
}
|
|
122
|
+
return {
|
|
123
|
+
stable: this.committed,
|
|
124
|
+
pending: partial.startsWith(this.committed)
|
|
125
|
+
? partial.slice(this.committed.length)
|
|
126
|
+
: partial,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/** The current committed stable prefix (read-only view). */
|
|
131
|
+
stable(): string {
|
|
132
|
+
return this.committed;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/** Clear all history. Call at utterance boundaries. */
|
|
136
|
+
reset(): void {
|
|
137
|
+
this.history = [];
|
|
138
|
+
this.committed = "";
|
|
139
|
+
}
|
|
140
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phoneme tokenizer interface used by the IPA-mode phrase chunker.
|
|
3
|
+
*
|
|
4
|
+
* The chunker consumes a stream of accepted text tokens and re-emits them
|
|
5
|
+
* as sub-phrase chunks at phoneme boundaries. This lets TTS start
|
|
6
|
+
* synthesizing partial phrases earlier than the punctuation-only mode, at
|
|
7
|
+
* the cost of slightly less prosody coherence per chunk.
|
|
8
|
+
*
|
|
9
|
+
* The default tokenizer is synchronous because the chunker runs in the
|
|
10
|
+
* accepted-token hot path. Full espeak-ng / phonemizer integrations can
|
|
11
|
+
* implement this interface by resolving their native or package dependency
|
|
12
|
+
* before constructing the scheduler.
|
|
13
|
+
*/
|
|
14
|
+
export interface Phoneme {
|
|
15
|
+
/** IPA symbol(s) for this phoneme. */
|
|
16
|
+
ipa: string;
|
|
17
|
+
/** Index of the source `TextToken` this phoneme came from. Used by the
|
|
18
|
+
* chunker to map sub-phrases back to token-index ranges so that the
|
|
19
|
+
* rollback queue can still drop the right audio on a verifier reject. */
|
|
20
|
+
sourceTokenIndex: number;
|
|
21
|
+
}
|
|
22
|
+
export interface PhonemeTokenizer {
|
|
23
|
+
/** Stable tokenizer name, used for logging and cache keys. */
|
|
24
|
+
readonly name: string;
|
|
25
|
+
/** Relative quality signal for telemetry and debugging. */
|
|
26
|
+
readonly quality: "ipa" | "approximate";
|
|
27
|
+
/**
|
|
28
|
+
* Tokenize a single text token's text into phonemes. The chunker calls
|
|
29
|
+
* this once per accepted token; the tokenizer returns the phonemes for
|
|
30
|
+
* that token only. Returning an empty array is legal (e.g. whitespace
|
|
31
|
+
* tokens) and is treated as "no phoneme boundary added by this token".
|
|
32
|
+
*/
|
|
33
|
+
tokenize(text: string, sourceTokenIndex: number): readonly Phoneme[];
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Synchronous English IPA approximation for phrase chunking.
|
|
37
|
+
*
|
|
38
|
+
* This is not a pronunciation dictionary; it is a deterministic tokenizer
|
|
39
|
+
* whose output is close enough for boundary counting and rollback range
|
|
40
|
+
* mapping. Deployments that need accent-accurate phonemization can inject a
|
|
41
|
+
* higher-quality `PhonemeTokenizer` built around espeak-ng or phonemizer.
|
|
42
|
+
*/
|
|
43
|
+
export declare class RuleBasedEnglishPhonemeTokenizer implements PhonemeTokenizer {
|
|
44
|
+
readonly name = "RuleBasedEnglishPhonemeTokenizer";
|
|
45
|
+
readonly quality: "approximate";
|
|
46
|
+
tokenize(text: string, sourceTokenIndex: number): readonly Phoneme[];
|
|
47
|
+
}
|
|
48
|
+
export declare function createDefaultPhonemeTokenizer(): PhonemeTokenizer;
|
|
49
|
+
//# sourceMappingURL=phoneme-tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"phoneme-tokenizer.d.ts","sourceRoot":"","sources":["phoneme-tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,MAAM,WAAW,OAAO;IACvB,sCAAsC;IACtC,GAAG,EAAE,MAAM,CAAC;IACZ;;8EAE0E;IAC1E,gBAAgB,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,gBAAgB;IAChC,8DAA8D;IAC9D,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,2DAA2D;IAC3D,QAAQ,CAAC,OAAO,EAAE,KAAK,GAAG,aAAa,CAAC;IACxC;;;;;OAKG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,gBAAgB,EAAE,MAAM,GAAG,SAAS,OAAO,EAAE,CAAC;CACrE;AA2FD;;;;;;;GAOG;AACH,qBAAa,gCAAiC,YAAW,gBAAgB;IACxE,QAAQ,CAAC,IAAI,sCAAsC;IACnD,QAAQ,CAAC,OAAO,EAAG,aAAa,CAAU;IAE1C,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,gBAAgB,EAAE,MAAM,GAAG,SAAS,OAAO,EAAE;CAepE;AAED,wBAAgB,6BAA6B,IAAI,gBAAgB,CAEhE"}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phoneme tokenizer interface used by the IPA-mode phrase chunker.
|
|
3
|
+
*
|
|
4
|
+
* The chunker consumes a stream of accepted text tokens and re-emits them
|
|
5
|
+
* as sub-phrase chunks at phoneme boundaries. This lets TTS start
|
|
6
|
+
* synthesizing partial phrases earlier than the punctuation-only mode, at
|
|
7
|
+
* the cost of slightly less prosody coherence per chunk.
|
|
8
|
+
*
|
|
9
|
+
* The default tokenizer is synchronous because the chunker runs in the
|
|
10
|
+
* accepted-token hot path. Full espeak-ng / phonemizer integrations can
|
|
11
|
+
* implement this interface by resolving their native or package dependency
|
|
12
|
+
* before constructing the scheduler.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
export interface Phoneme {
|
|
16
|
+
/** IPA symbol(s) for this phoneme. */
|
|
17
|
+
ipa: string;
|
|
18
|
+
/** Index of the source `TextToken` this phoneme came from. Used by the
|
|
19
|
+
* chunker to map sub-phrases back to token-index ranges so that the
|
|
20
|
+
* rollback queue can still drop the right audio on a verifier reject. */
|
|
21
|
+
sourceTokenIndex: number;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface PhonemeTokenizer {
|
|
25
|
+
/** Stable tokenizer name, used for logging and cache keys. */
|
|
26
|
+
readonly name: string;
|
|
27
|
+
/** Relative quality signal for telemetry and debugging. */
|
|
28
|
+
readonly quality: "ipa" | "approximate";
|
|
29
|
+
/**
|
|
30
|
+
* Tokenize a single text token's text into phonemes. The chunker calls
|
|
31
|
+
* this once per accepted token; the tokenizer returns the phonemes for
|
|
32
|
+
* that token only. Returning an empty array is legal (e.g. whitespace
|
|
33
|
+
* tokens) and is treated as "no phoneme boundary added by this token".
|
|
34
|
+
*/
|
|
35
|
+
tokenize(text: string, sourceTokenIndex: number): readonly Phoneme[];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const WORD_IPA: Readonly<Record<string, readonly string[]>> = {
|
|
39
|
+
a: ["ə"],
|
|
40
|
+
an: ["æ", "n"],
|
|
41
|
+
and: ["æ", "n", "d"],
|
|
42
|
+
are: ["ɑː", "r"],
|
|
43
|
+
be: ["b", "iː"],
|
|
44
|
+
eliza: ["ə", "l", "iː", "z", "ə"],
|
|
45
|
+
hello: ["h", "ə", "l", "oʊ"],
|
|
46
|
+
is: ["ɪ", "z"],
|
|
47
|
+
of: ["ʌ", "v"],
|
|
48
|
+
the: ["ð", "ə"],
|
|
49
|
+
to: ["t", "uː"],
|
|
50
|
+
world: ["w", "ɜː", "r", "l", "d"],
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
const DIGRAPH_IPA: Readonly<Record<string, string>> = {
|
|
54
|
+
ch: "tʃ",
|
|
55
|
+
ck: "k",
|
|
56
|
+
ng: "ŋ",
|
|
57
|
+
ph: "f",
|
|
58
|
+
qu: "kʷ",
|
|
59
|
+
sh: "ʃ",
|
|
60
|
+
th: "θ",
|
|
61
|
+
wh: "w",
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
const LETTER_IPA: Readonly<Record<string, string>> = {
|
|
65
|
+
a: "æ",
|
|
66
|
+
b: "b",
|
|
67
|
+
c: "k",
|
|
68
|
+
d: "d",
|
|
69
|
+
e: "ɛ",
|
|
70
|
+
f: "f",
|
|
71
|
+
g: "ɡ",
|
|
72
|
+
h: "h",
|
|
73
|
+
i: "ɪ",
|
|
74
|
+
j: "dʒ",
|
|
75
|
+
k: "k",
|
|
76
|
+
l: "l",
|
|
77
|
+
m: "m",
|
|
78
|
+
n: "n",
|
|
79
|
+
o: "ɑ",
|
|
80
|
+
p: "p",
|
|
81
|
+
q: "k",
|
|
82
|
+
r: "r",
|
|
83
|
+
s: "s",
|
|
84
|
+
t: "t",
|
|
85
|
+
u: "ʌ",
|
|
86
|
+
v: "v",
|
|
87
|
+
w: "w",
|
|
88
|
+
x: "k",
|
|
89
|
+
y: "j",
|
|
90
|
+
z: "z",
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
const DIGIT_IPA: Readonly<Record<string, readonly string[]>> = {
|
|
94
|
+
"0": ["z", "iː", "r", "oʊ"],
|
|
95
|
+
"1": ["w", "ʌ", "n"],
|
|
96
|
+
"2": ["t", "uː"],
|
|
97
|
+
"3": ["θ", "r", "iː"],
|
|
98
|
+
"4": ["f", "ɔː", "r"],
|
|
99
|
+
"5": ["f", "aɪ", "v"],
|
|
100
|
+
"6": ["s", "ɪ", "k", "s"],
|
|
101
|
+
"7": ["s", "ɛ", "v", "ə", "n"],
|
|
102
|
+
"8": ["eɪ", "t"],
|
|
103
|
+
"9": ["n", "aɪ", "n"],
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
function ipaForWord(word: string): readonly string[] {
|
|
107
|
+
const known = WORD_IPA[word];
|
|
108
|
+
if (known) return known;
|
|
109
|
+
|
|
110
|
+
const out: string[] = [];
|
|
111
|
+
for (let i = 0; i < word.length; ) {
|
|
112
|
+
const pair = word.slice(i, i + 2);
|
|
113
|
+
const digraph = DIGRAPH_IPA[pair];
|
|
114
|
+
if (digraph) {
|
|
115
|
+
out.push(digraph);
|
|
116
|
+
i += 2;
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const mapped = LETTER_IPA[word[i]];
|
|
121
|
+
if (mapped) out.push(mapped);
|
|
122
|
+
i += 1;
|
|
123
|
+
}
|
|
124
|
+
return out;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Synchronous English IPA approximation for phrase chunking.
|
|
129
|
+
*
|
|
130
|
+
* This is not a pronunciation dictionary; it is a deterministic tokenizer
|
|
131
|
+
* whose output is close enough for boundary counting and rollback range
|
|
132
|
+
* mapping. Deployments that need accent-accurate phonemization can inject a
|
|
133
|
+
* higher-quality `PhonemeTokenizer` built around espeak-ng or phonemizer.
|
|
134
|
+
*/
|
|
135
|
+
export class RuleBasedEnglishPhonemeTokenizer implements PhonemeTokenizer {
|
|
136
|
+
readonly name = "RuleBasedEnglishPhonemeTokenizer";
|
|
137
|
+
readonly quality = "approximate" as const;
|
|
138
|
+
|
|
139
|
+
tokenize(text: string, sourceTokenIndex: number): readonly Phoneme[] {
|
|
140
|
+
const out: Phoneme[] = [];
|
|
141
|
+
const pieces = text.match(/[A-Za-z]+|\d/g) ?? [];
|
|
142
|
+
|
|
143
|
+
for (const piece of pieces) {
|
|
144
|
+
const phonemes = /^\d$/.test(piece)
|
|
145
|
+
? (DIGIT_IPA[piece] ?? [])
|
|
146
|
+
: ipaForWord(piece.toLowerCase());
|
|
147
|
+
for (const ipa of phonemes) {
|
|
148
|
+
out.push({ ipa, sourceTokenIndex });
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return out;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export function createDefaultPhonemeTokenizer(): PhonemeTokenizer {
|
|
157
|
+
return new RuleBasedEnglishPhonemeTokenizer();
|
|
158
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
export interface CachedPhraseAudio {
|
|
2
|
+
text: string;
|
|
3
|
+
pcm: Float32Array;
|
|
4
|
+
sampleRate: number;
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Canonical seed list for the voice phrase cache: short openers, fillers, and
|
|
8
|
+
* acknowledgements the assistant emits constantly. Pre-synthesizing these and
|
|
9
|
+
* holding their PCM in `PhraseCache` removes the TTS forward pass from the
|
|
10
|
+
* critical path for the most common first utterances — `dispatchPhrase` hits
|
|
11
|
+
* the cache and writes audio to the ring buffer on the same tick.
|
|
12
|
+
*
|
|
13
|
+
* Used by:
|
|
14
|
+
* - the preset generator (`scripts/voice-preset/build-default-voice-preset.mjs`),
|
|
15
|
+
* which synthesizes these against a real OmniVoice TTS backend and writes
|
|
16
|
+
* the PCM into `cache/voice-preset-default.bin` — the seeded source of truth.
|
|
17
|
+
* - `EngineVoiceBridge.start()` indirectly: the bundle's preset ships these
|
|
18
|
+
* phrases with their PCM, which `PhraseCache.seed(...)` loads at startup.
|
|
19
|
+
* - the idle-time auto-prewarm hook (`EngineVoiceBridge.prewarmIdlePhrases`),
|
|
20
|
+
* which only runs when a real TTS backend is present — never against the
|
|
21
|
+
* silent backend (caching zeros is not a phrase cache).
|
|
22
|
+
* - the first-audio filler (`FIRST_AUDIO_FILLERS` is a subset).
|
|
23
|
+
*
|
|
24
|
+
* Entries are kept here in canonical form (lowercase, single-spaced, trimmed)
|
|
25
|
+
* so the preset generator and the runtime agree byte-for-byte on the keys.
|
|
26
|
+
*/
|
|
27
|
+
export declare const DEFAULT_PHRASE_CACHE_SEED: ReadonlyArray<string>;
|
|
28
|
+
/**
|
|
29
|
+
* The subset of `DEFAULT_PHRASE_CACHE_SEED` suitable to play the instant VAD
|
|
30
|
+
* fires `speech-start`, masking first-token latency (AGENTS.md §4 / H4). Kept
|
|
31
|
+
* short and uncommitted — anything that takes a stance ("of course") is
|
|
32
|
+
* excluded so the filler never contradicts the eventual reply. The first
|
|
33
|
+
* entry found in the phrase cache wins.
|
|
34
|
+
*/
|
|
35
|
+
export declare const FIRST_AUDIO_FILLERS: ReadonlyArray<string>;
|
|
36
|
+
export interface PhraseCacheOptions {
|
|
37
|
+
/** Maximum distinct phrase texts retained. Older non-accessed entries
|
|
38
|
+
* are evicted first. */
|
|
39
|
+
maxEntries?: number;
|
|
40
|
+
/**
|
|
41
|
+
* Opportunistic live-cache guardrail. Voice mode primarily benefits from
|
|
42
|
+
* cached acknowledgements and first sentence fragments; longer text is less
|
|
43
|
+
* likely to repeat and can evict useful hot phrases.
|
|
44
|
+
*/
|
|
45
|
+
maxEstimatedTokensPerEntry?: number;
|
|
46
|
+
/**
|
|
47
|
+
* Guardrail for live opportunistic caching. Long-form direct TTS can be
|
|
48
|
+
* megabytes of PCM and is not a good phrase-cache resident.
|
|
49
|
+
*/
|
|
50
|
+
maxPcmSamplesPerEntry?: number;
|
|
51
|
+
}
|
|
52
|
+
export declare function canonicalizePhraseText(text: string): string;
|
|
53
|
+
export declare function estimatePhraseTokenCount(text: string): number;
|
|
54
|
+
export declare class PhraseCache {
|
|
55
|
+
private readonly entries;
|
|
56
|
+
private readonly maxEntries;
|
|
57
|
+
private readonly maxEstimatedTokensPerEntry;
|
|
58
|
+
private readonly maxPcmSamplesPerEntry;
|
|
59
|
+
constructor(opts?: PhraseCacheOptions);
|
|
60
|
+
put(entry: CachedPhraseAudio): boolean;
|
|
61
|
+
/**
|
|
62
|
+
* Pre-populate the cache from a voice-preset seed list. Texts are stored
|
|
63
|
+
* verbatim — callers (the format reader) are responsible for canonicalizing
|
|
64
|
+
* before serialization, but we re-canonicalize on insert to be safe.
|
|
65
|
+
*/
|
|
66
|
+
seed(entries: ReadonlyArray<{
|
|
67
|
+
text: string;
|
|
68
|
+
pcm: Float32Array;
|
|
69
|
+
sampleRate: number;
|
|
70
|
+
}>): void;
|
|
71
|
+
get(text: string): CachedPhraseAudio | undefined;
|
|
72
|
+
has(text: string): boolean;
|
|
73
|
+
size(): number;
|
|
74
|
+
private evictOverflow;
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=phrase-cache.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"phrase-cache.d.ts","sourceRoot":"","sources":["phrase-cache.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,iBAAiB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,YAAY,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;CACnB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,eAAO,MAAM,yBAAyB,EAAE,aAAa,CAAC,MAAM,CAwB3D,CAAC;AAEF;;;;;;GAMG;AACH,eAAO,MAAM,mBAAmB,EAAE,aAAa,CAAC,MAAM,CAMrD,CAAC;AAEF,MAAM,WAAW,kBAAkB;IAClC;4BACwB;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB;;;;OAIG;IACH,0BAA0B,CAAC,EAAE,MAAM,CAAC;IACpC;;;OAGG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;CAC/B;AAED,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAE3D;AAED,wBAAgB,wBAAwB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAI7D;AAMD,qBAAa,WAAW;IACvB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwC;IAChE,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,0BAA0B,CAAS;IACpD,OAAO,CAAC,QAAQ,CAAC,qBAAqB,CAAS;gBAEnC,IAAI,GAAE,kBAAuB;IAoBzC,GAAG,CAAC,KAAK,EAAE,iBAAiB,GAAG,OAAO;IAetC;;;;OAIG;IACH,IAAI,CACH,OAAO,EAAE,aAAa,CAAC;QACtB,IAAI,EAAE,MAAM,CAAC;QACb,GAAG,EAAE,YAAY,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACnB,CAAC,GACA,IAAI;IAUP,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,iBAAiB,GAAG,SAAS;IAShD,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO;IAI1B,IAAI,IAAI,MAAM;IAId,OAAO,CAAC,aAAa;CAOrB"}
|