@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Samantha-preset on-the-fly regeneration via the fused OmniVoice FFI.
|
|
3
|
+
*
|
|
4
|
+
* Path A from W3-11: when the bundle ships the I-wave zero-fill placeholder
|
|
5
|
+
* for `cache/voice-preset-default.bin`, the runtime synthesises a real
|
|
6
|
+
* preset by encoding the bundled Samantha reference clip through the FFI's
|
|
7
|
+
* `eliza_inference_encode_reference` entrypoint and writing the resulting
|
|
8
|
+
* `ref_audio_tokens` + canonical instruct/refText into a v2 preset blob.
|
|
9
|
+
*
|
|
10
|
+
* Determinism contract:
|
|
11
|
+
* - The reference clip bytes (24 kHz mono fp32 WAV) are pinned in the
|
|
12
|
+
* bundle.
|
|
13
|
+
* - The reference transcript (`SAMANTHA_REFERENCE_TRANSCRIPT`) is pinned.
|
|
14
|
+
* - The instruct string (`SAMANTHA_INSTRUCT`) is pinned.
|
|
15
|
+
* - The OmniVoice encode entrypoint does not consume randomness (the
|
|
16
|
+
* HuBERT semantic + RVQ codec passes are pure functions of the input
|
|
17
|
+
* PCM + the model weights).
|
|
18
|
+
* Therefore the produced preset bytes are reproducible byte-for-byte across
|
|
19
|
+
* boots given the same FFI library + bundle.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
23
|
+
import path from "node:path";
|
|
24
|
+
import {
|
|
25
|
+
type ElizaInferenceContextHandle,
|
|
26
|
+
type ElizaInferenceFfi,
|
|
27
|
+
loadElizaInferenceFfi,
|
|
28
|
+
} from "./ffi-bindings";
|
|
29
|
+
import {
|
|
30
|
+
detectSamanthaPlaceholder,
|
|
31
|
+
SAMANTHA_INSTRUCT,
|
|
32
|
+
SAMANTHA_REFERENCE_TRANSCRIPT,
|
|
33
|
+
} from "./samantha-preset-placeholder";
|
|
34
|
+
import { writeVoicePresetFileV2 } from "./voice-preset-format";
|
|
35
|
+
|
|
36
|
+
/** Outcome of `ensureSamanthaPresetReady`. Distinct kinds let the caller
|
|
37
|
+
* log the right thing at the right level without re-doing detection. */
|
|
38
|
+
export type EnsureSamanthaPresetOutcome =
|
|
39
|
+
| { kind: "real-preset" }
|
|
40
|
+
| { kind: "missing-bundle-preset" }
|
|
41
|
+
| { kind: "regenerated"; bytes: number; K: number; refT: number }
|
|
42
|
+
| {
|
|
43
|
+
kind: "placeholder-no-regen";
|
|
44
|
+
reason:
|
|
45
|
+
| "missing-reference-wav"
|
|
46
|
+
| "missing-ffi-library"
|
|
47
|
+
| "ffi-no-encode-reference"
|
|
48
|
+
| "encode-reference-failed";
|
|
49
|
+
detail: string;
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
export interface RegenerateOptions {
|
|
53
|
+
bundleRoot: string;
|
|
54
|
+
/** Absolute path the regenerated preset bytes should target. The caller
|
|
55
|
+
* performs the write — this function only produces the bytes + metadata. */
|
|
56
|
+
presetPath: string;
|
|
57
|
+
/** Override path to the Samantha reference WAV. Defaults to the bundle's
|
|
58
|
+
* `tts/omnivoice/samantha-ref.wav`. */
|
|
59
|
+
referenceWav?: string;
|
|
60
|
+
/** Override the canonical reference transcript. Defaults to the pinned
|
|
61
|
+
* `SAMANTHA_REFERENCE_TRANSCRIPT`. */
|
|
62
|
+
referenceText?: string;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export interface RegenerateResult {
|
|
66
|
+
bytes: Uint8Array;
|
|
67
|
+
K: number;
|
|
68
|
+
refT: number;
|
|
69
|
+
embeddingDim: number;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Platform-specific filenames probed when locating the OmniVoice fused
|
|
74
|
+
* shared library inside a bundle. Mirrors the matching helper inside
|
|
75
|
+
* `engine-bridge.ts` (kept private there); regenerator and bridge resolve
|
|
76
|
+
* the same set of names so a bundle that loads at boot also loads at
|
|
77
|
+
* regeneration time.
|
|
78
|
+
*/
|
|
79
|
+
function libraryFilenames(): string[] {
|
|
80
|
+
if (process.platform === "darwin") return ["libelizainference.dylib"];
|
|
81
|
+
if (process.platform === "win32") {
|
|
82
|
+
return ["elizainference.dll", "libelizainference.dll"];
|
|
83
|
+
}
|
|
84
|
+
return ["libelizainference.so"];
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function locateBundleLibrary(bundleRoot: string): string {
|
|
88
|
+
const exact = process.env.ELIZA_INFERENCE_LIBRARY?.trim();
|
|
89
|
+
if (exact && existsSync(exact)) return exact;
|
|
90
|
+
const dirs = [
|
|
91
|
+
path.join(bundleRoot, "lib"),
|
|
92
|
+
exact ? path.dirname(exact) : null,
|
|
93
|
+
process.env.ELIZA_INFERENCE_LIB_DIR?.trim() || null,
|
|
94
|
+
].filter((dir): dir is string => Boolean(dir));
|
|
95
|
+
for (const dir of dirs) {
|
|
96
|
+
for (const name of libraryFilenames()) {
|
|
97
|
+
const candidate = path.join(dir, name);
|
|
98
|
+
if (existsSync(candidate)) return candidate;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return path.join(
|
|
102
|
+
dirs[0] ?? path.join(bundleRoot, "lib"),
|
|
103
|
+
libraryFilenames()[0] ?? "libelizainference.so",
|
|
104
|
+
);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Decode a 24 kHz mono Float32 LE WAV file into a Float32Array of PCM
|
|
109
|
+
* samples. Refuses anything that is not the canonical OmniVoice reference
|
|
110
|
+
* format — encoders happily accept stereo / 16-bit / 48 kHz inputs and
|
|
111
|
+
* silently degrade, which is exactly the kind of fallback sludge AGENTS.md
|
|
112
|
+
* §3 forbids. We require the file be in the right format up front.
|
|
113
|
+
*/
|
|
114
|
+
export function decodeMonoFloat32Wav24kHz(bytes: Uint8Array): Float32Array {
|
|
115
|
+
if (bytes.byteLength < 44) {
|
|
116
|
+
throw new Error(
|
|
117
|
+
`[samantha-regen] reference WAV too small (${bytes.byteLength} bytes, need >= 44 for header)`,
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
121
|
+
const riff = String.fromCharCode(...bytes.subarray(0, 4));
|
|
122
|
+
const wave = String.fromCharCode(...bytes.subarray(8, 12));
|
|
123
|
+
if (riff !== "RIFF" || wave !== "WAVE") {
|
|
124
|
+
throw new Error(
|
|
125
|
+
`[samantha-regen] reference WAV bad magic: RIFF=${JSON.stringify(riff)} WAVE=${JSON.stringify(wave)}`,
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Walk chunks for "fmt " + "data".
|
|
130
|
+
let cursor = 12;
|
|
131
|
+
let fmtOffset = -1;
|
|
132
|
+
let fmtLen = 0;
|
|
133
|
+
let dataOffset = -1;
|
|
134
|
+
let dataLen = 0;
|
|
135
|
+
while (cursor + 8 <= bytes.byteLength) {
|
|
136
|
+
const id = String.fromCharCode(...bytes.subarray(cursor, cursor + 4));
|
|
137
|
+
const size = view.getUint32(cursor + 4, true);
|
|
138
|
+
const payload = cursor + 8;
|
|
139
|
+
if (id === "fmt ") {
|
|
140
|
+
fmtOffset = payload;
|
|
141
|
+
fmtLen = size;
|
|
142
|
+
} else if (id === "data") {
|
|
143
|
+
dataOffset = payload;
|
|
144
|
+
dataLen = size;
|
|
145
|
+
break;
|
|
146
|
+
}
|
|
147
|
+
cursor = payload + size + (size % 2); // pad byte
|
|
148
|
+
}
|
|
149
|
+
if (fmtOffset < 0 || dataOffset < 0) {
|
|
150
|
+
throw new Error("[samantha-regen] reference WAV missing fmt or data chunk");
|
|
151
|
+
}
|
|
152
|
+
const audioFormat = view.getUint16(fmtOffset + 0, true);
|
|
153
|
+
const channels = view.getUint16(fmtOffset + 2, true);
|
|
154
|
+
const sampleRate = view.getUint32(fmtOffset + 4, true);
|
|
155
|
+
const bitsPerSample = view.getUint16(fmtOffset + 14, true);
|
|
156
|
+
|
|
157
|
+
// Accept WAVE_FORMAT_IEEE_FLOAT (3) or WAVE_FORMAT_EXTENSIBLE (0xFFFE)
|
|
158
|
+
// with 32-bit float samples.
|
|
159
|
+
const isFloat =
|
|
160
|
+
(audioFormat === 3 && bitsPerSample === 32) ||
|
|
161
|
+
(audioFormat === 0xfffe && bitsPerSample === 32 && fmtLen >= 40);
|
|
162
|
+
if (!isFloat) {
|
|
163
|
+
throw new Error(
|
|
164
|
+
`[samantha-regen] reference WAV must be 32-bit float PCM (got format=${audioFormat}, bps=${bitsPerSample})`,
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
if (channels !== 1) {
|
|
168
|
+
throw new Error(
|
|
169
|
+
`[samantha-regen] reference WAV must be mono (got ${channels} channels)`,
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
if (sampleRate !== 24_000) {
|
|
173
|
+
throw new Error(
|
|
174
|
+
`[samantha-regen] reference WAV must be 24 kHz (got ${sampleRate})`,
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const sampleCount = Math.floor(dataLen / 4);
|
|
179
|
+
// Copy into an aligned buffer (the input slice is not guaranteed
|
|
180
|
+
// 4-aligned; Float32Array constructor requires alignment).
|
|
181
|
+
const aligned = new Uint8Array(sampleCount * 4);
|
|
182
|
+
aligned.set(bytes.subarray(dataOffset, dataOffset + sampleCount * 4));
|
|
183
|
+
return new Float32Array(aligned.buffer);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Run the on-the-fly regeneration. Loads the bundle's OmniVoice FFI, calls
|
|
188
|
+
* `encodeReference` against the Samantha reference clip, and serialises the
|
|
189
|
+
* result into an ELZ1 v2 preset blob. The caller writes the bytes to disk.
|
|
190
|
+
*/
|
|
191
|
+
export async function regenerateSamanthaPresetFromBundle(
|
|
192
|
+
opts: RegenerateOptions,
|
|
193
|
+
): Promise<RegenerateResult> {
|
|
194
|
+
const refWav =
|
|
195
|
+
opts.referenceWav ??
|
|
196
|
+
path.join(opts.bundleRoot, "tts", "omnivoice", "samantha-ref.wav");
|
|
197
|
+
if (!existsSync(refWav)) {
|
|
198
|
+
throw new Error(
|
|
199
|
+
`[samantha-regen] Samantha reference WAV not found at ${refWav}. The bundle is missing the OmniVoice samantha reference clip.`,
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const libPath = locateBundleLibrary(opts.bundleRoot);
|
|
204
|
+
if (!existsSync(libPath)) {
|
|
205
|
+
throw new Error(
|
|
206
|
+
`[samantha-regen] OmniVoice FFI library not found under ${path.join(
|
|
207
|
+
opts.bundleRoot,
|
|
208
|
+
"lib",
|
|
209
|
+
)} (tried ${libraryFilenames().join(", ")}). Build via packages/app-core/scripts/build-llama-cpp-mtp.mjs (omnivoice-merged target).`,
|
|
210
|
+
);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const ffi: ElizaInferenceFfi = loadElizaInferenceFfi(libPath);
|
|
214
|
+
let ctx: ElizaInferenceContextHandle | null = null;
|
|
215
|
+
let ttsAcquired = false;
|
|
216
|
+
try {
|
|
217
|
+
if (
|
|
218
|
+
typeof ffi.encodeReferenceSupported !== "function" ||
|
|
219
|
+
!ffi.encodeReferenceSupported()
|
|
220
|
+
) {
|
|
221
|
+
throw new Error(
|
|
222
|
+
"[samantha-regen] this OmniVoice build does not export eliza_inference_encode_reference (ABI v4 required). Rebuild with the encode-reference target.",
|
|
223
|
+
);
|
|
224
|
+
}
|
|
225
|
+
if (typeof ffi.encodeReference !== "function") {
|
|
226
|
+
throw new Error(
|
|
227
|
+
"[samantha-regen] FFI binding missing encodeReference method despite encodeReferenceSupported()=true",
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
ctx = ffi.create(opts.bundleRoot);
|
|
232
|
+
ffi.mmapAcquire(ctx, "tts");
|
|
233
|
+
ttsAcquired = true;
|
|
234
|
+
|
|
235
|
+
const wavBytes = new Uint8Array(readFileSync(refWav));
|
|
236
|
+
const pcm = decodeMonoFloat32Wav24kHz(wavBytes);
|
|
237
|
+
|
|
238
|
+
const encoded = ffi.encodeReference({
|
|
239
|
+
ctx,
|
|
240
|
+
pcm,
|
|
241
|
+
sampleRateHz: 24_000,
|
|
242
|
+
});
|
|
243
|
+
if (encoded.K <= 0 || encoded.refT <= 0) {
|
|
244
|
+
throw new Error(
|
|
245
|
+
`[samantha-regen] encode_reference returned empty tensor (K=${encoded.K}, refT=${encoded.refT})`,
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// The FFI encode pass produces ref_audio_tokens; the speaker
|
|
250
|
+
// embedding section stays empty (OmniVoice resolves the speaker
|
|
251
|
+
// identity from the tokens, not from a separate embedding vector).
|
|
252
|
+
const embedding = new Float32Array(0);
|
|
253
|
+
const refText = opts.referenceText ?? SAMANTHA_REFERENCE_TRANSCRIPT;
|
|
254
|
+
const instruct = SAMANTHA_INSTRUCT;
|
|
255
|
+
const metadata: Record<string, unknown> = {
|
|
256
|
+
generator: "samantha-preset-regenerator",
|
|
257
|
+
generatorVersion: 1,
|
|
258
|
+
referenceWavPath: path.basename(refWav),
|
|
259
|
+
referenceWavBytes: wavBytes.byteLength,
|
|
260
|
+
referenceText: refText,
|
|
261
|
+
instruct,
|
|
262
|
+
K: encoded.K,
|
|
263
|
+
refT: encoded.refT,
|
|
264
|
+
};
|
|
265
|
+
|
|
266
|
+
const bytes = writeVoicePresetFileV2({
|
|
267
|
+
embedding,
|
|
268
|
+
phrases: [],
|
|
269
|
+
refAudioTokens: {
|
|
270
|
+
K: encoded.K,
|
|
271
|
+
refT: encoded.refT,
|
|
272
|
+
tokens: encoded.tokens,
|
|
273
|
+
},
|
|
274
|
+
refText,
|
|
275
|
+
instruct,
|
|
276
|
+
metadata,
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
return {
|
|
280
|
+
bytes,
|
|
281
|
+
K: encoded.K,
|
|
282
|
+
refT: encoded.refT,
|
|
283
|
+
embeddingDim: 0,
|
|
284
|
+
};
|
|
285
|
+
} finally {
|
|
286
|
+
if (ctx !== null) {
|
|
287
|
+
if (ttsAcquired) {
|
|
288
|
+
try {
|
|
289
|
+
ffi.mmapEvict(ctx, "tts");
|
|
290
|
+
} catch {
|
|
291
|
+
// Evict is best-effort during regeneration; destroy below
|
|
292
|
+
// tears down the context either way.
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
try {
|
|
296
|
+
ffi.destroy(ctx);
|
|
297
|
+
} catch {
|
|
298
|
+
// Destroy is best-effort during regeneration; the OS reclaims
|
|
299
|
+
// the context on process exit.
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
try {
|
|
303
|
+
ffi.close();
|
|
304
|
+
} catch {
|
|
305
|
+
// Same — close is best-effort.
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Pre-flight: detect a placeholder preset at the bundle's canonical path
|
|
312
|
+
* and regenerate it via OmniVoice when possible. Called by the engine's
|
|
313
|
+
* `ensureActiveBundleVoiceReady()` before the synchronous preset load.
|
|
314
|
+
*
|
|
315
|
+
* Outcomes:
|
|
316
|
+
*
|
|
317
|
+
* - `real-preset` — nothing to do; the file is a real preset.
|
|
318
|
+
* - `missing-bundle-preset` — file does not exist; the engine's existing
|
|
319
|
+
* error path runs (loud failure).
|
|
320
|
+
* - `regenerated` — preset bytes were generated and written.
|
|
321
|
+
* - `placeholder-no-regen` — placeholder detected but regen could not
|
|
322
|
+
* run (FFI missing, reference clip missing,
|
|
323
|
+
* etc.). Returned for the caller to log; the
|
|
324
|
+
* engine then falls through to the bundled
|
|
325
|
+
* Kokoro default voice.
|
|
326
|
+
*/
|
|
327
|
+
export async function ensureSamanthaPresetReady(
|
|
328
|
+
bundleRoot: string,
|
|
329
|
+
): Promise<EnsureSamanthaPresetOutcome> {
|
|
330
|
+
const presetPath = path.join(bundleRoot, "cache", "voice-preset-default.bin");
|
|
331
|
+
const state = detectSamanthaPlaceholder(presetPath);
|
|
332
|
+
|
|
333
|
+
if (state.kind === "missing") {
|
|
334
|
+
return { kind: "missing-bundle-preset" };
|
|
335
|
+
}
|
|
336
|
+
if (state.kind === "real-preset") {
|
|
337
|
+
return { kind: "real-preset" };
|
|
338
|
+
}
|
|
339
|
+
if (state.kind === "unreadable") {
|
|
340
|
+
return {
|
|
341
|
+
kind: "placeholder-no-regen",
|
|
342
|
+
reason: "missing-ffi-library", // closest match — file is unreadable
|
|
343
|
+
detail: state.reason,
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Placeholder detected. Try to regenerate.
|
|
348
|
+
const refWav = path.join(bundleRoot, "tts", "omnivoice", "samantha-ref.wav");
|
|
349
|
+
if (!existsSync(refWav)) {
|
|
350
|
+
return {
|
|
351
|
+
kind: "placeholder-no-regen",
|
|
352
|
+
reason: "missing-reference-wav",
|
|
353
|
+
detail: refWav,
|
|
354
|
+
};
|
|
355
|
+
}
|
|
356
|
+
const libPath = locateBundleLibrary(bundleRoot);
|
|
357
|
+
if (!existsSync(libPath)) {
|
|
358
|
+
return {
|
|
359
|
+
kind: "placeholder-no-regen",
|
|
360
|
+
reason: "missing-ffi-library",
|
|
361
|
+
detail: libPath,
|
|
362
|
+
};
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
let result: RegenerateResult;
|
|
366
|
+
try {
|
|
367
|
+
result = await regenerateSamanthaPresetFromBundle({
|
|
368
|
+
bundleRoot,
|
|
369
|
+
presetPath,
|
|
370
|
+
referenceWav: refWav,
|
|
371
|
+
referenceText: SAMANTHA_REFERENCE_TRANSCRIPT,
|
|
372
|
+
});
|
|
373
|
+
} catch (err) {
|
|
374
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
375
|
+
// Distinguish the FFI-symbol-missing path from a real synth failure
|
|
376
|
+
// — both are placeholder-no-regen but the operator-facing reason
|
|
377
|
+
// differs.
|
|
378
|
+
const reason: "ffi-no-encode-reference" | "encode-reference-failed" =
|
|
379
|
+
/encode_reference|encodeReferenceSupported|ABI v4/.test(message)
|
|
380
|
+
? "ffi-no-encode-reference"
|
|
381
|
+
: "encode-reference-failed";
|
|
382
|
+
return { kind: "placeholder-no-regen", reason, detail: message };
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
mkdirSync(path.dirname(presetPath), { recursive: true });
|
|
386
|
+
writeFileSync(presetPath, result.bytes);
|
|
387
|
+
return {
|
|
388
|
+
kind: "regenerated",
|
|
389
|
+
bytes: result.bytes.byteLength,
|
|
390
|
+
K: result.K,
|
|
391
|
+
refT: result.refT,
|
|
392
|
+
};
|
|
393
|
+
}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import { BargeInController } from "./barge-in";
|
|
2
|
+
import type { PhonemeTokenizer } from "./phoneme-tokenizer";
|
|
3
|
+
import { PhraseCache } from "./phrase-cache";
|
|
4
|
+
import { PhraseChunker } from "./phrase-chunker";
|
|
5
|
+
import { PrefixPreservingQueue } from "./prefix-preserving-queue";
|
|
6
|
+
import { PcmRingBuffer } from "./ring-buffer";
|
|
7
|
+
import { RollbackQueue } from "./rollback-queue";
|
|
8
|
+
import type { AudioChunk, AudioSink, OmniVoiceBackend, Phrase, RejectedTokenRange, SchedulerConfig, SpeakerPreset, TextToken, VoiceSchedulerTelemetryListener } from "./types";
|
|
9
|
+
/**
|
|
10
|
+
* T2 — per-phrase TTS chunk-size telemetry, emitted once per
|
|
11
|
+
* `synthesizePhraseStream` call when `SchedulerEvents.onChunkMetrics` is
|
|
12
|
+
* wired. `chunks` is the in-arrival-order distribution of streamed PCM
|
|
13
|
+
* chunks (size in PCM bytes assuming Float32 samples, duration in ms
|
|
14
|
+
* derived from samples / sampleRate). Used to debug T1-class chunk-size
|
|
15
|
+
* pathologies and to verify T3 time-budget effects.
|
|
16
|
+
*/
|
|
17
|
+
export interface TtsPhraseChunkMetrics {
|
|
18
|
+
phraseId: number;
|
|
19
|
+
/** Order-preserving list of per-chunk sizes. Empty when no chunks landed. */
|
|
20
|
+
chunks: ReadonlyArray<{
|
|
21
|
+
chunkBytes: number;
|
|
22
|
+
chunkDurationMs: number;
|
|
23
|
+
}>;
|
|
24
|
+
/** Sum of chunk durations in ms. */
|
|
25
|
+
totalDurationMs: number;
|
|
26
|
+
/** Sum of chunk bytes. */
|
|
27
|
+
totalBytes: number;
|
|
28
|
+
/** Whether the phrase synthesis was cancelled mid-stream. */
|
|
29
|
+
cancelled: boolean;
|
|
30
|
+
}
|
|
31
|
+
export type TtsChunkMetricsListener = (metrics: TtsPhraseChunkMetrics) => void;
|
|
32
|
+
export interface SchedulerEvents {
|
|
33
|
+
onPhrase?(phrase: Phrase): void;
|
|
34
|
+
onRollback?(phraseId: number, range: RejectedTokenRange): void;
|
|
35
|
+
onAudio?(chunk: AudioChunk): void;
|
|
36
|
+
/**
|
|
37
|
+
* Barge-in hard-stop: ring buffer drained, chunker reset, in-flight TTS
|
|
38
|
+
* cancelled. The engine layer's `voiceStreamingArgs` separately threads
|
|
39
|
+
* the `BargeInCancelToken.signal` (`bargeIn.onSignal` → `hard-stop`)
|
|
40
|
+
* into `dispatcher.generate` so the LLM/drafter abort too.
|
|
41
|
+
*/
|
|
42
|
+
onCancel?(): void;
|
|
43
|
+
/** Provisional barge-in: a VAD voice hit while the agent is speaking paused TTS playback. */
|
|
44
|
+
onTtsPause?(): void;
|
|
45
|
+
/** Blip resolved the provisional barge-in — TTS playback resumed. */
|
|
46
|
+
onTtsResume?(): void;
|
|
47
|
+
/** Structured scheduler telemetry for latency, cache, rollback, and barge-in metrics. */
|
|
48
|
+
onTelemetry?: VoiceSchedulerTelemetryListener;
|
|
49
|
+
/**
|
|
50
|
+
* T2 — per-phrase TTS chunk-size distribution. Optional; when set, the
|
|
51
|
+
* scheduler emits one summary per streamed phrase synthesis (success or
|
|
52
|
+
* cancelled). Lets test harnesses and metrics consumers verify T1/T3
|
|
53
|
+
* effects without scraping the audio bus.
|
|
54
|
+
*/
|
|
55
|
+
onChunkMetrics?: TtsChunkMetricsListener;
|
|
56
|
+
}
|
|
57
|
+
export interface SchedulerDeps {
|
|
58
|
+
backend: OmniVoiceBackend;
|
|
59
|
+
sink?: AudioSink;
|
|
60
|
+
phraseCache?: PhraseCache;
|
|
61
|
+
/** Optional. Required only when `config.chunkerConfig.chunkOn ===
|
|
62
|
+
* 'phoneme-stream'`. Defaults are available from
|
|
63
|
+
* `createDefaultPhonemeTokenizer()`. */
|
|
64
|
+
phonemeTokenizer?: PhonemeTokenizer;
|
|
65
|
+
}
|
|
66
|
+
export declare class VoiceScheduler {
|
|
67
|
+
readonly chunker: PhraseChunker;
|
|
68
|
+
readonly rollback: RollbackQueue;
|
|
69
|
+
readonly bargeIn: BargeInController;
|
|
70
|
+
readonly ringBuffer: PcmRingBuffer;
|
|
71
|
+
readonly sink: AudioSink;
|
|
72
|
+
readonly preset: SpeakerPreset;
|
|
73
|
+
/**
|
|
74
|
+
* Prefix-preserving barge-in queue. When the streaming TTS path is active,
|
|
75
|
+
* each audio chunk is enqueued here tagged with its token range. On
|
|
76
|
+
* hard-stop (barge-in), `rollbackAt(divergencePoint)` partitions the
|
|
77
|
+
* queue: chunks at or before the divergence point are replayed into the
|
|
78
|
+
* sink; chunks after are dropped. This lets audio that was already
|
|
79
|
+
* correct play through without re-synthesizing.
|
|
80
|
+
*/
|
|
81
|
+
readonly prefixQueue: PrefixPreservingQueue;
|
|
82
|
+
private readonly backend;
|
|
83
|
+
private readonly phraseCache;
|
|
84
|
+
private readonly events;
|
|
85
|
+
private readonly sampleRate;
|
|
86
|
+
private readonly inFlight;
|
|
87
|
+
private readonly maxInFlight;
|
|
88
|
+
private readonly streamingTtsActive;
|
|
89
|
+
private kernelTicks;
|
|
90
|
+
private nextStandalonePhraseId;
|
|
91
|
+
/** True while a provisional barge-in (`pause-tts`) has paused playback. */
|
|
92
|
+
private paused;
|
|
93
|
+
/**
|
|
94
|
+
* The last committed token index — updated whenever a phrase is dispatched
|
|
95
|
+
* to TTS. Used as the divergence point when a barge-in fires mid-response.
|
|
96
|
+
*/
|
|
97
|
+
private lastCommittedTokenIndex;
|
|
98
|
+
private agentSpeakingUntilMs;
|
|
99
|
+
private agentSpeakingTimer;
|
|
100
|
+
private phraseFlushTimer;
|
|
101
|
+
constructor(config: SchedulerConfig, deps: SchedulerDeps, events?: SchedulerEvents);
|
|
102
|
+
accept(token: TextToken, acceptedAt?: number): Promise<void>;
|
|
103
|
+
reject(range: RejectedTokenRange): Promise<void>;
|
|
104
|
+
flushPending(): Promise<void>;
|
|
105
|
+
waitIdle(): Promise<void>;
|
|
106
|
+
synthesizeText(text: string, signal?: AbortSignal): Promise<AudioChunk>;
|
|
107
|
+
prewarmPhrases(texts: ReadonlyArray<string>, opts?: {
|
|
108
|
+
concurrency?: number;
|
|
109
|
+
}): Promise<{
|
|
110
|
+
warmed: number;
|
|
111
|
+
cached: number;
|
|
112
|
+
}>;
|
|
113
|
+
tickKernel(): void;
|
|
114
|
+
kernelTickCount(): number;
|
|
115
|
+
/**
|
|
116
|
+
* Mark the agent as audibly speaking for the duration of audio handed to the
|
|
117
|
+
* sink. This is the barge-in gate: VAD blips only pause/resume TTS while this
|
|
118
|
+
* flag is true, and ASR-confirmed words hard-stop playback plus generation.
|
|
119
|
+
*/
|
|
120
|
+
markAgentSpeakingForAudio(samples: number, sampleRate: number): void;
|
|
121
|
+
/** True while a provisional barge-in has paused TTS playback. */
|
|
122
|
+
get ttsPaused(): boolean;
|
|
123
|
+
/**
|
|
124
|
+
* Drop not-yet-spoken TTS without signalling a barge-in: drain the ring
|
|
125
|
+
* buffer, reset the chunker, cancel in-flight synthesis. Used by the turn
|
|
126
|
+
* controller when a speculative response is invalidated (speech resumed) —
|
|
127
|
+
* the speculative TTS was streamed off a stale partial transcript, so it
|
|
128
|
+
* must go, but this is not a user barge-in (`onCancel` is NOT fired).
|
|
129
|
+
*/
|
|
130
|
+
cancelPendingTts(): void;
|
|
131
|
+
private dispatchPhrase;
|
|
132
|
+
private runPhraseSynthesis;
|
|
133
|
+
private synthesizePhraseStream;
|
|
134
|
+
private isPhraseTracked;
|
|
135
|
+
private cancelNativeTts;
|
|
136
|
+
private commitAudio;
|
|
137
|
+
private onBargeInSignal;
|
|
138
|
+
private handleBargeIn;
|
|
139
|
+
private emitTtsCancel;
|
|
140
|
+
private emitTelemetry;
|
|
141
|
+
private armPhraseFlushTimer;
|
|
142
|
+
private clearPhraseFlushTimer;
|
|
143
|
+
private armAgentSpeakingTimer;
|
|
144
|
+
private clearAgentSpeaking;
|
|
145
|
+
}
|
|
146
|
+
//# sourceMappingURL=scheduler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scheduler.d.ts","sourceRoot":"","sources":["scheduler.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAC/C,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EACN,qBAAqB,EAErB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAqB,aAAa,EAAE,MAAM,eAAe,CAAC;AACjE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,KAAK,EAEX,UAAU,EACV,SAAS,EAET,gBAAgB,EAChB,MAAM,EACN,kBAAkB,EAClB,eAAe,EACf,aAAa,EAEb,SAAS,EAKT,+BAA+B,EAE/B,MAAM,SAAS,CAAC;AAEjB;;;;;;;GAOG;AACH,MAAM,WAAW,qBAAqB;IACrC,QAAQ,EAAE,MAAM,CAAC;IACjB,6EAA6E;IAC7E,MAAM,EAAE,aAAa,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,eAAe,EAAE,MAAM,CAAC;KACxB,CAAC,CAAC;IACH,oCAAoC;IACpC,eAAe,EAAE,MAAM,CAAC;IACxB,0BAA0B;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,6DAA6D;IAC7D,SAAS,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,MAAM,uBAAuB,GAAG,CAAC,OAAO,EAAE,qBAAqB,KAAK,IAAI,CAAC;AAE/E,MAAM,WAAW,eAAe;IAC/B,QAAQ,CAAC,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,UAAU,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,kBAAkB,GAAG,IAAI,CAAC;IAC/D,OAAO,CAAC,CAAC,KAAK,EAAE,UAAU,GAAG,IAAI,CAAC;IAClC;;;;;OAKG;IACH,QAAQ,CAAC,IAAI,IAAI,CAAC;IAClB,6FAA6F;IAC7F,UAAU,CAAC,IAAI,IAAI,CAAC;IACpB,qEAAqE;IACrE,WAAW,CAAC,IAAI,IAAI,CAAC;IACrB,yFAAyF;IACzF,WAAW,CAAC,EAAE,+BAA+B,CAAC;IAC9C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,uBAAuB,CAAC;CACzC;AAED,MAAM,WAAW,aAAa;IAC7B,OAAO,EAAE,gBAAgB,CAAC;IAC1B,IAAI,CAAC,EAAE,SAAS,CAAC;IACjB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B;;6CAEyC;IACzC,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;CACpC;AAiED,qBAAa,cAAc;IAC1B,QAAQ,CAAC,OAAO,EAAE,aAAa,CAAC;IAChC,QAAQ,CAAC,QAAQ,gBAAuB;IACxC,QAAQ,CAAC,OAAO,oBAA2B;IAC3C,QAAQ,CAAC,UAAU,EAAE,aAAa,CAAC;IACnC,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IACzB,QAAQ,CAAC,MAAM,EAAE,aAAa,CAAC;IAC/B;;;;;;;OAOG;IACH,QAAQ,CAAC,WAAW,wBAA+B;IACnD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAmB;IAC3C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;IAC1C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAkB;IACzC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA+B;IACxD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAU;IAC7C,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,sBAAsB,CAAM;IACpC,2EAA2E;IAC3E,OAAO,CAAC,MAAM,CAAS;IACvB;;;OAGG;IACH,OAAO,CAAC,uBAAuB,CAAK;IACpC,OAAO,CAAC,oBAAoB,CAAK;IACjC,OAAO,CAAC,kBAAkB,CAA8C;IACxE,OAAO,CAAC,gBAAgB,CAA8C;gBAGrE,MAAM,EAAE,eAAe,EACvB,IAAI,EAAE,aAAa,EACnB,MAAM,GAAE,eAAoB;IAoCvB,MAAM,CAAC,KAAK,EAAE,SAAS,EAAE,UAAU,SAAa,GAAG,OAAO,CAAC,IAAI,CAAC;IAWhE,MAAM,CAAC,KAAK,EAAE,kBAAkB,GAAG,OAAO,CAAC,IAAI,CAAC;IA8BhD,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC;IAQ7B,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAKzB,cAAc,CACnB,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,WAAW,GAClB,OAAO,CAAC,UAAU,CAAC;IA6FhB,cAAc,CACnB,KAAK,EAAE,aAAa,CAAC,MAAM,CAAC,EAC5B,IAAI,GAAE;QAAE,WAAW,CAAC,EAAE,MAAM,CAAA;KAAO,GACjC,OAAO,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IA8C9C,UAAU,IAAI,IAAI;IAIlB,eAAe,IAAI,MAAM;IAIzB;;;;OAIG;IACH,yBAAyB,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI;IAYpE,iEAAiE;IACjE,IAAI,SAAS,IAAI,OAAO,CAEvB;IAED;;;;;;OAMG;IACH,gBAAgB,IAAI,IAAI;YAeV,cAAc;YA4Dd,kBAAkB;YA8ClB,sBAAsB;IAkHpC,OAAO,CAAC,eAAe;IAMvB,OAAO,CAAC,eAAe;IAMvB,OAAO,CAAC,WAAW;IA4CnB,OAAO,CAAC,eAAe;IAgCvB,OAAO,CAAC,aAAa;IAkErB,OAAO,CAAC,aAAa;IASrB,OAAO,CAAC,aAAa;IAIrB,OAAO,CAAC,mBAAmB;IAsB3B,OAAO,CAAC,qBAAqB;IAO7B,OAAO,CAAC,qBAAqB;IAqB7B,OAAO,CAAC,kBAAkB;CAQ1B"}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* T2 — TTS chunk-size telemetry tests for `VoiceScheduler`.
|
|
3
|
+
*
|
|
4
|
+
* The scheduler streams TTS PCM in chunks; T2 records the per-phrase
|
|
5
|
+
* distribution of chunk sizes so we can debug T1-class pathologies (one
|
|
6
|
+
* giant chunk = no streaming) and confirm T3-class effects (more, smaller
|
|
7
|
+
* phrases = more, smaller chunks). The streaming backend is a fake so
|
|
8
|
+
* tests stay hermetic.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { describe, expect, it } from "vitest";
|
|
12
|
+
import { InMemoryAudioSink } from "./ring-buffer";
|
|
13
|
+
import { type TtsPhraseChunkMetrics, VoiceScheduler } from "./scheduler";
|
|
14
|
+
import type {
|
|
15
|
+
AudioChunk,
|
|
16
|
+
OmniVoiceBackend,
|
|
17
|
+
Phrase,
|
|
18
|
+
SpeakerPreset,
|
|
19
|
+
StreamingTtsBackend,
|
|
20
|
+
TextToken,
|
|
21
|
+
TtsPcmChunk,
|
|
22
|
+
} from "./types";
|
|
23
|
+
|
|
24
|
+
function tok(index: number, text: string): TextToken {
|
|
25
|
+
return { index, text };
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function makePreset(): SpeakerPreset {
|
|
29
|
+
const embedding = new Float32Array([0.1, 0.2]);
|
|
30
|
+
return {
|
|
31
|
+
voiceId: "default",
|
|
32
|
+
embedding,
|
|
33
|
+
bytes: new Uint8Array(embedding.buffer.slice(0)),
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
class ScriptedStreamingBackend
|
|
38
|
+
implements OmniVoiceBackend, StreamingTtsBackend
|
|
39
|
+
{
|
|
40
|
+
constructor(private readonly chunks: ReadonlyArray<Float32Array>) {}
|
|
41
|
+
async synthesize(): Promise<AudioChunk> {
|
|
42
|
+
throw new Error("not used");
|
|
43
|
+
}
|
|
44
|
+
async synthesizeStream(args: {
|
|
45
|
+
phrase: Phrase;
|
|
46
|
+
preset: SpeakerPreset;
|
|
47
|
+
cancelSignal: { cancelled: boolean };
|
|
48
|
+
onChunk: (chunk: TtsPcmChunk) => boolean | undefined;
|
|
49
|
+
onKernelTick?: () => void;
|
|
50
|
+
}): Promise<{ cancelled: boolean }> {
|
|
51
|
+
for (const pcm of this.chunks) {
|
|
52
|
+
args.onKernelTick?.();
|
|
53
|
+
if (args.cancelSignal.cancelled) break;
|
|
54
|
+
args.onChunk({ pcm, sampleRate: 24000, isFinal: false });
|
|
55
|
+
}
|
|
56
|
+
args.onChunk({
|
|
57
|
+
pcm: new Float32Array(0),
|
|
58
|
+
sampleRate: 24000,
|
|
59
|
+
isFinal: true,
|
|
60
|
+
});
|
|
61
|
+
return { cancelled: args.cancelSignal.cancelled };
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
describe("VoiceScheduler T2 chunk-size telemetry", () => {
|
|
66
|
+
it("emits one onChunkMetrics summary per phrase with per-chunk byte and duration", async () => {
|
|
67
|
+
// 240 samples @ 24 kHz = 10 ms per chunk. Two chunks = 20 ms.
|
|
68
|
+
const backend = new ScriptedStreamingBackend([
|
|
69
|
+
new Float32Array(240),
|
|
70
|
+
new Float32Array(480),
|
|
71
|
+
]);
|
|
72
|
+
const sink = new InMemoryAudioSink();
|
|
73
|
+
const metricsLog: TtsPhraseChunkMetrics[] = [];
|
|
74
|
+
const sched = new VoiceScheduler(
|
|
75
|
+
{
|
|
76
|
+
chunkerConfig: { maxTokensPerPhrase: 10 },
|
|
77
|
+
preset: makePreset(),
|
|
78
|
+
ringBufferCapacity: 4096,
|
|
79
|
+
sampleRate: 24000,
|
|
80
|
+
},
|
|
81
|
+
{ backend, sink },
|
|
82
|
+
{ onChunkMetrics: (m) => metricsLog.push(m) },
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
await sched.accept(tok(0, "Hello"));
|
|
86
|
+
await sched.accept(tok(1, "."));
|
|
87
|
+
await sched.waitIdle();
|
|
88
|
+
|
|
89
|
+
expect(metricsLog).toHaveLength(1);
|
|
90
|
+
const m = metricsLog[0];
|
|
91
|
+
expect(m.chunks).toHaveLength(2);
|
|
92
|
+
// Float32 => 4 bytes / sample.
|
|
93
|
+
expect(m.chunks[0]).toEqual({ chunkBytes: 240 * 4, chunkDurationMs: 10 });
|
|
94
|
+
expect(m.chunks[1]).toEqual({ chunkBytes: 480 * 4, chunkDurationMs: 20 });
|
|
95
|
+
expect(m.totalBytes).toBe((240 + 480) * 4);
|
|
96
|
+
expect(m.totalDurationMs).toBe(30);
|
|
97
|
+
expect(m.cancelled).toBe(false);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it("reports cancelled=false summary when synthesis completes", async () => {
|
|
101
|
+
const backend = new ScriptedStreamingBackend([new Float32Array(120)]);
|
|
102
|
+
const sink = new InMemoryAudioSink();
|
|
103
|
+
const metricsLog: TtsPhraseChunkMetrics[] = [];
|
|
104
|
+
const sched = new VoiceScheduler(
|
|
105
|
+
{
|
|
106
|
+
chunkerConfig: { maxTokensPerPhrase: 10 },
|
|
107
|
+
preset: makePreset(),
|
|
108
|
+
ringBufferCapacity: 4096,
|
|
109
|
+
sampleRate: 24000,
|
|
110
|
+
},
|
|
111
|
+
{ backend, sink },
|
|
112
|
+
{ onChunkMetrics: (m) => metricsLog.push(m) },
|
|
113
|
+
);
|
|
114
|
+
await sched.accept(tok(0, "Hi"));
|
|
115
|
+
await sched.accept(tok(1, "."));
|
|
116
|
+
await sched.waitIdle();
|
|
117
|
+
expect(metricsLog).toHaveLength(1);
|
|
118
|
+
expect(metricsLog[0].cancelled).toBe(false);
|
|
119
|
+
expect(metricsLog[0].chunks).toHaveLength(1);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it("does not invoke onChunkMetrics when the listener is absent", async () => {
|
|
123
|
+
const backend = new ScriptedStreamingBackend([new Float32Array(120)]);
|
|
124
|
+
const sink = new InMemoryAudioSink();
|
|
125
|
+
// No listener — should not throw or do extra work; the scheduler still
|
|
126
|
+
// commits audio normally.
|
|
127
|
+
const sched = new VoiceScheduler(
|
|
128
|
+
{
|
|
129
|
+
chunkerConfig: { maxTokensPerPhrase: 10 },
|
|
130
|
+
preset: makePreset(),
|
|
131
|
+
ringBufferCapacity: 4096,
|
|
132
|
+
sampleRate: 24000,
|
|
133
|
+
},
|
|
134
|
+
{ backend, sink },
|
|
135
|
+
);
|
|
136
|
+
await sched.accept(tok(0, "Hi"));
|
|
137
|
+
await sched.accept(tok(1, "."));
|
|
138
|
+
await sched.waitIdle();
|
|
139
|
+
expect(sink.totalWritten()).toBeGreaterThan(0);
|
|
140
|
+
});
|
|
141
|
+
});
|