@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +82 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.d.ts +29 -0
- package/src/actions/transcription-control.d.ts.map +1 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +8 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1082 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +205 -0
- package/src/routes/local-inference-asr-route.ts +163 -0
- package/src/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/src/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +808 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/transcript-audio-store.d.ts +15 -0
- package/src/routes/transcript-audio-store.d.ts.map +1 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.d.ts +36 -0
- package/src/routes/transcripts-routes.d.ts.map +1 -0
- package/src/routes/transcripts-routes.test.ts +144 -0
- package/src/routes/transcripts-routes.ts +159 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +62 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1448 -0
- package/src/runtime/index.d.ts +15 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +33 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bionic-host-loader.d.ts +46 -0
- package/src/services/bionic-host-loader.d.ts.map +1 -0
- package/src/services/bionic-host-loader.test.ts +133 -0
- package/src/services/bionic-host-loader.ts +180 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +238 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +95 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +339 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +747 -0
- package/src/services/downloader.ts +925 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +540 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1909 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.d.ts +56 -0
- package/src/services/gpu-detect.d.ts.map +1 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +231 -0
- package/src/services/hardware.ts +410 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +29 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +211 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +689 -0
- package/src/services/manifest/schema.d.ts +713 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +653 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +567 -0
- package/src/services/memory-arbiter.d.ts +318 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +925 -0
- package/src/services/memory-monitor.d.ts +122 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +297 -0
- package/src/services/memory-pressure.d.ts +130 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +407 -0
- package/src/services/routing-policy.d.ts +69 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.test.ts +164 -0
- package/src/services/routing-policy.ts +297 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.d.ts +33 -0
- package/src/services/system-memory.d.ts.map +1 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/asr-timed.real.test.ts +141 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +243 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +759 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2302 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +674 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +728 -0
- package/src/services/voice/ffi-bindings.ts +3225 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.d.ts +37 -0
- package/src/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.d.ts +41 -0
- package/src/services/voice/transcript-service.d.ts.map +1 -0
- package/src/services/voice/transcript-service.test.ts +137 -0
- package/src/services/voice/transcript-service.ts +141 -0
- package/src/services/voice/transcript-store.d.ts +53 -0
- package/src/services/voice/transcript-store.d.ts.map +1 -0
- package/src/services/voice/transcript-store.test.ts +153 -0
- package/src/services/voice/transcript-store.ts +132 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +418 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice backend selection — picks between OmniVoice and Kokoro at engine
|
|
3
|
+
* arm time.
|
|
4
|
+
*
|
|
5
|
+
* The scheduler is backend-agnostic — both implementations satisfy
|
|
6
|
+
* `OmniVoiceBackend + StreamingTtsBackend`. This module isolates the
|
|
7
|
+
* decision logic so the engine layer (or a test) can ask "which backend
|
|
8
|
+
* should I instantiate?" and get a single, auditable answer.
|
|
9
|
+
*
|
|
10
|
+
* Decision modes:
|
|
11
|
+
*
|
|
12
|
+
* - `omnivoice` — force OmniVoice. Used when the caller needs voice
|
|
13
|
+
* cloning (Kokoro v1.0 has fixed voice packs, no per-user cloning).
|
|
14
|
+
* - `kokoro` — force Kokoro. Used when the caller cares about
|
|
15
|
+
* first-audio latency over voice fidelity (Kokoro ≈ 97ms CPU TTFB,
|
|
16
|
+
* OmniVoice ≈ 200ms on the fused build).
|
|
17
|
+
* - `auto` — apply the documented heuristic below.
|
|
18
|
+
*
|
|
19
|
+
* Mobile precedence: when `mobile === true` the selector returns Kokoro
|
|
20
|
+
* unconditionally (it is smaller + faster and the only backend shipped in
|
|
21
|
+
* mobile-class bundles). This short-circuits every mode and heuristic below.
|
|
22
|
+
*
|
|
23
|
+
* `auto` heuristic (deterministic, no model probes — those go through the
|
|
24
|
+
* autotune layer in `voice/scheduler.ts`):
|
|
25
|
+
*
|
|
26
|
+
* 1. If `requireVoiceCloning === true` → OmniVoice.
|
|
27
|
+
* 2. If `targetTtfaMs` is set and < 200 → Kokoro.
|
|
28
|
+
* 3. If a Kokoro RTF measurement is available and OmniVoice RTF is not,
|
|
29
|
+
* or Kokoro RTF beats OmniVoice by ≥ 10% → Kokoro.
|
|
30
|
+
* 4. Else → first entry of `tierVoiceBackends` if provided (the
|
|
31
|
+
* catalog's declared per-tier default). Falls back to OmniVoice
|
|
32
|
+
* only when no tier policy is supplied.
|
|
33
|
+
*
|
|
34
|
+
* Tier policy comes from `ELIZA_1_VOICE_BACKENDS` in
|
|
35
|
+
* `packages/shared/src/local-inference/catalog.ts`. Callers should pass
|
|
36
|
+
* the active bundle's `voiceBackends` array so the selection is
|
|
37
|
+
* data-driven (small tiers → Kokoro default; large tiers → OmniVoice).
|
|
38
|
+
*
|
|
39
|
+
* The decision returns a tagged discriminated union, not a backend
|
|
40
|
+
* instance, so the engine layer can instantiate the chosen backend with
|
|
41
|
+
* its own dependencies (FFI handle / Kokoro runtime / etc.). This keeps
|
|
42
|
+
* the selection logic unit-testable without dragging the ORT or FFI
|
|
43
|
+
* surfaces into the test graph.
|
|
44
|
+
*/
|
|
45
|
+
export type VoiceBackendChoice = "omnivoice" | "kokoro";
|
|
46
|
+
export type VoiceBackendMode = VoiceBackendChoice | "auto";
|
|
47
|
+
export interface VoiceBackendInputs {
|
|
48
|
+
/** Caller-set mode. Defaults to `auto`. */
|
|
49
|
+
mode?: VoiceBackendMode;
|
|
50
|
+
/** Time-to-first-audio target (ms). Lower → prefer Kokoro. */
|
|
51
|
+
targetTtfaMs?: number;
|
|
52
|
+
/** Whether the caller needs per-user voice cloning. */
|
|
53
|
+
requireVoiceCloning?: boolean;
|
|
54
|
+
/** Latest measured RTF for Kokoro on this device (audio_seconds / wall_seconds). */
|
|
55
|
+
kokoroRtf?: number | null;
|
|
56
|
+
/** Latest measured RTF for OmniVoice on this device. */
|
|
57
|
+
omnivoiceRtf?: number | null;
|
|
58
|
+
/** Whether Kokoro model artifacts are present on disk. The selector
|
|
59
|
+
* never returns Kokoro when this is `false` — no silent downgrade. */
|
|
60
|
+
kokoroAvailable: boolean;
|
|
61
|
+
/** Whether the OmniVoice FFI library is present on disk. */
|
|
62
|
+
omnivoiceAvailable: boolean;
|
|
63
|
+
/**
|
|
64
|
+
* True on mobile (iOS / Android) builds. Mobile uses Kokoro exclusively —
|
|
65
|
+
* it is smaller and faster than OmniVoice and is the only TTS backend
|
|
66
|
+
* shipped in mobile-class bundles. When set, the selector returns Kokoro
|
|
67
|
+
* unconditionally (ignoring `mode`, RTF, and TTFA heuristics) and throws
|
|
68
|
+
* if Kokoro artifacts are missing rather than falling back to OmniVoice.
|
|
69
|
+
*/
|
|
70
|
+
mobile?: boolean;
|
|
71
|
+
/**
|
|
72
|
+
* The active bundle's per-tier voice backend policy, as declared in
|
|
73
|
+
* `ELIZA_1_VOICE_BACKENDS`. First entry is the catalog default for
|
|
74
|
+
* the tier; later entries are also bundled. The selector reads this
|
|
75
|
+
* to make the `auto` default tier-aware rather than hard-coding a
|
|
76
|
+
* single backend.
|
|
77
|
+
*
|
|
78
|
+
* Omit when called outside the Eliza-1 catalog context (e.g. ad-hoc
|
|
79
|
+
* smoke benches) — the selector falls back to OmniVoice as the
|
|
80
|
+
* historical default in that case.
|
|
81
|
+
*/
|
|
82
|
+
tierVoiceBackends?: ReadonlyArray<VoiceBackendChoice>;
|
|
83
|
+
}
|
|
84
|
+
export interface VoiceBackendDecision {
|
|
85
|
+
backend: VoiceBackendChoice;
|
|
86
|
+
/** One-line reason — surfaced to telemetry. */
|
|
87
|
+
reason: string;
|
|
88
|
+
}
|
|
89
|
+
/** Resolve the env override (`ELIZA_TTS_BACKEND=kokoro|omnivoice|auto`). */
|
|
90
|
+
export declare function readVoiceBackendModeFromEnv(env?: NodeJS.ProcessEnv): VoiceBackendMode | undefined;
|
|
91
|
+
export declare function selectVoiceBackend(inputs: VoiceBackendInputs): VoiceBackendDecision;
|
|
92
|
+
//# sourceMappingURL=runtime-selection.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runtime-selection.d.ts","sourceRoot":"","sources":["runtime-selection.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2CG;AAEH,MAAM,MAAM,kBAAkB,GAAG,WAAW,GAAG,QAAQ,CAAC;AAExD,MAAM,MAAM,gBAAgB,GAAG,kBAAkB,GAAG,MAAM,CAAC;AAE3D,MAAM,WAAW,kBAAkB;IAClC,2CAA2C;IAC3C,IAAI,CAAC,EAAE,gBAAgB,CAAC;IACxB,8DAA8D;IAC9D,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,uDAAuD;IACvD,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,oFAAoF;IACpF,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,wDAAwD;IACxD,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B;2EACuE;IACvE,eAAe,EAAE,OAAO,CAAC;IACzB,4DAA4D;IAC5D,kBAAkB,EAAE,OAAO,CAAC;IAC5B;;;;;;OAMG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB;;;;;;;;;;OAUG;IACH,iBAAiB,CAAC,EAAE,aAAa,CAAC,kBAAkB,CAAC,CAAC;CACtD;AAED,MAAM,WAAW,oBAAoB;IACpC,OAAO,EAAE,kBAAkB,CAAC;IAC5B,+CAA+C;IAC/C,MAAM,EAAE,MAAM,CAAC;CACf;AAKD,4EAA4E;AAC5E,wBAAgB,2BAA2B,CAC1C,GAAG,GAAE,MAAM,CAAC,UAAwB,GAClC,gBAAgB,GAAG,SAAS,CAO9B;AAED,wBAAgB,kBAAkB,CACjC,MAAM,EAAE,kBAAkB,GACxB,oBAAoB,CA8HtB"}
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice backend selection — picks between OmniVoice and Kokoro at engine
|
|
3
|
+
* arm time.
|
|
4
|
+
*
|
|
5
|
+
* The scheduler is backend-agnostic — both implementations satisfy
|
|
6
|
+
* `OmniVoiceBackend + StreamingTtsBackend`. This module isolates the
|
|
7
|
+
* decision logic so the engine layer (or a test) can ask "which backend
|
|
8
|
+
* should I instantiate?" and get a single, auditable answer.
|
|
9
|
+
*
|
|
10
|
+
* Decision modes:
|
|
11
|
+
*
|
|
12
|
+
* - `omnivoice` — force OmniVoice. Used when the caller needs voice
|
|
13
|
+
* cloning (Kokoro v1.0 has fixed voice packs, no per-user cloning).
|
|
14
|
+
* - `kokoro` — force Kokoro. Used when the caller cares about
|
|
15
|
+
* first-audio latency over voice fidelity (Kokoro ≈ 97ms CPU TTFB,
|
|
16
|
+
* OmniVoice ≈ 200ms on the fused build).
|
|
17
|
+
* - `auto` — apply the documented heuristic below.
|
|
18
|
+
*
|
|
19
|
+
* Mobile precedence: when `mobile === true` the selector returns Kokoro
|
|
20
|
+
* unconditionally (it is smaller + faster and the only backend shipped in
|
|
21
|
+
* mobile-class bundles). This short-circuits every mode and heuristic below.
|
|
22
|
+
*
|
|
23
|
+
* `auto` heuristic (deterministic, no model probes — those go through the
|
|
24
|
+
* autotune layer in `voice/scheduler.ts`):
|
|
25
|
+
*
|
|
26
|
+
* 1. If `requireVoiceCloning === true` → OmniVoice.
|
|
27
|
+
* 2. If `targetTtfaMs` is set and < 200 → Kokoro.
|
|
28
|
+
* 3. If a Kokoro RTF measurement is available and OmniVoice RTF is not,
|
|
29
|
+
* or Kokoro RTF beats OmniVoice by ≥ 10% → Kokoro.
|
|
30
|
+
* 4. Else → first entry of `tierVoiceBackends` if provided (the
|
|
31
|
+
* catalog's declared per-tier default). Falls back to OmniVoice
|
|
32
|
+
* only when no tier policy is supplied.
|
|
33
|
+
*
|
|
34
|
+
* Tier policy comes from `ELIZA_1_VOICE_BACKENDS` in
|
|
35
|
+
* `packages/shared/src/local-inference/catalog.ts`. Callers should pass
|
|
36
|
+
* the active bundle's `voiceBackends` array so the selection is
|
|
37
|
+
* data-driven (small tiers → Kokoro default; large tiers → OmniVoice).
|
|
38
|
+
*
|
|
39
|
+
* The decision returns a tagged discriminated union, not a backend
|
|
40
|
+
* instance, so the engine layer can instantiate the chosen backend with
|
|
41
|
+
* its own dependencies (FFI handle / Kokoro runtime / etc.). This keeps
|
|
42
|
+
* the selection logic unit-testable without dragging the ORT or FFI
|
|
43
|
+
* surfaces into the test graph.
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
export type VoiceBackendChoice = "omnivoice" | "kokoro";
|
|
47
|
+
|
|
48
|
+
export type VoiceBackendMode = VoiceBackendChoice | "auto";
|
|
49
|
+
|
|
50
|
+
export interface VoiceBackendInputs {
|
|
51
|
+
/** Caller-set mode. Defaults to `auto`. */
|
|
52
|
+
mode?: VoiceBackendMode;
|
|
53
|
+
/** Time-to-first-audio target (ms). Lower → prefer Kokoro. */
|
|
54
|
+
targetTtfaMs?: number;
|
|
55
|
+
/** Whether the caller needs per-user voice cloning. */
|
|
56
|
+
requireVoiceCloning?: boolean;
|
|
57
|
+
/** Latest measured RTF for Kokoro on this device (audio_seconds / wall_seconds). */
|
|
58
|
+
kokoroRtf?: number | null;
|
|
59
|
+
/** Latest measured RTF for OmniVoice on this device. */
|
|
60
|
+
omnivoiceRtf?: number | null;
|
|
61
|
+
/** Whether Kokoro model artifacts are present on disk. The selector
|
|
62
|
+
* never returns Kokoro when this is `false` — no silent downgrade. */
|
|
63
|
+
kokoroAvailable: boolean;
|
|
64
|
+
/** Whether the OmniVoice FFI library is present on disk. */
|
|
65
|
+
omnivoiceAvailable: boolean;
|
|
66
|
+
/**
|
|
67
|
+
* True on mobile (iOS / Android) builds. Mobile uses Kokoro exclusively —
|
|
68
|
+
* it is smaller and faster than OmniVoice and is the only TTS backend
|
|
69
|
+
* shipped in mobile-class bundles. When set, the selector returns Kokoro
|
|
70
|
+
* unconditionally (ignoring `mode`, RTF, and TTFA heuristics) and throws
|
|
71
|
+
* if Kokoro artifacts are missing rather than falling back to OmniVoice.
|
|
72
|
+
*/
|
|
73
|
+
mobile?: boolean;
|
|
74
|
+
/**
|
|
75
|
+
* The active bundle's per-tier voice backend policy, as declared in
|
|
76
|
+
* `ELIZA_1_VOICE_BACKENDS`. First entry is the catalog default for
|
|
77
|
+
* the tier; later entries are also bundled. The selector reads this
|
|
78
|
+
* to make the `auto` default tier-aware rather than hard-coding a
|
|
79
|
+
* single backend.
|
|
80
|
+
*
|
|
81
|
+
* Omit when called outside the Eliza-1 catalog context (e.g. ad-hoc
|
|
82
|
+
* smoke benches) — the selector falls back to OmniVoice as the
|
|
83
|
+
* historical default in that case.
|
|
84
|
+
*/
|
|
85
|
+
tierVoiceBackends?: ReadonlyArray<VoiceBackendChoice>;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export interface VoiceBackendDecision {
|
|
89
|
+
backend: VoiceBackendChoice;
|
|
90
|
+
/** One-line reason — surfaced to telemetry. */
|
|
91
|
+
reason: string;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const TTFA_CUTOFF_MS = 200;
|
|
95
|
+
const RTF_MARGIN = 1.1; // Kokoro must beat OmniVoice by 10% to win on RTF.
|
|
96
|
+
|
|
97
|
+
/** Resolve the env override (`ELIZA_TTS_BACKEND=kokoro|omnivoice|auto`). */
|
|
98
|
+
export function readVoiceBackendModeFromEnv(
|
|
99
|
+
env: NodeJS.ProcessEnv = process.env,
|
|
100
|
+
): VoiceBackendMode | undefined {
|
|
101
|
+
const raw = env.ELIZA_TTS_BACKEND?.trim().toLowerCase();
|
|
102
|
+
if (!raw) return undefined;
|
|
103
|
+
if (raw === "kokoro" || raw === "omnivoice" || raw === "auto") return raw;
|
|
104
|
+
throw new Error(
|
|
105
|
+
`[voice] ELIZA_TTS_BACKEND must be one of 'kokoro', 'omnivoice', 'auto' (got '${raw}')`,
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export function selectVoiceBackend(
|
|
110
|
+
inputs: VoiceBackendInputs,
|
|
111
|
+
): VoiceBackendDecision {
|
|
112
|
+
// Mobile is Kokoro-exclusive: it is smaller + faster and is the only TTS
|
|
113
|
+
// backend shipped in mobile-class bundles. This wins over every mode and
|
|
114
|
+
// heuristic below — no OmniVoice fallback on phones.
|
|
115
|
+
if (inputs.mobile) {
|
|
116
|
+
if (!inputs.kokoroAvailable) {
|
|
117
|
+
throw new Error(
|
|
118
|
+
"[voice] mobile builds use Kokoro exclusively but its model artifacts are not present on disk",
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
return {
|
|
122
|
+
backend: "kokoro",
|
|
123
|
+
reason:
|
|
124
|
+
"mobile platform — Kokoro exclusively (OmniVoice not shipped on mobile)",
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const mode = inputs.mode ?? "auto";
|
|
129
|
+
|
|
130
|
+
if (mode === "kokoro") {
|
|
131
|
+
if (!inputs.kokoroAvailable) {
|
|
132
|
+
throw new Error(
|
|
133
|
+
"[voice] ELIZA_TTS_BACKEND=kokoro but Kokoro model artifacts are not present on disk",
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
return { backend: "kokoro", reason: "forced via mode=kokoro" };
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (mode === "omnivoice") {
|
|
140
|
+
if (!inputs.omnivoiceAvailable) {
|
|
141
|
+
throw new Error(
|
|
142
|
+
"[voice] ELIZA_TTS_BACKEND=omnivoice but the OmniVoice FFI library is not present",
|
|
143
|
+
);
|
|
144
|
+
}
|
|
145
|
+
return { backend: "omnivoice", reason: "forced via mode=omnivoice" };
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// `auto` — apply heuristics.
|
|
149
|
+
if (inputs.requireVoiceCloning) {
|
|
150
|
+
if (!inputs.omnivoiceAvailable) {
|
|
151
|
+
throw new Error(
|
|
152
|
+
"[voice] voice cloning required but OmniVoice FFI library is not available; Kokoro v1.0 has no per-user cloning",
|
|
153
|
+
);
|
|
154
|
+
}
|
|
155
|
+
return {
|
|
156
|
+
backend: "omnivoice",
|
|
157
|
+
reason: "voice cloning required (Kokoro v1.0 cannot clone)",
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (
|
|
162
|
+
inputs.targetTtfaMs !== undefined &&
|
|
163
|
+
inputs.targetTtfaMs < TTFA_CUTOFF_MS
|
|
164
|
+
) {
|
|
165
|
+
if (inputs.kokoroAvailable) {
|
|
166
|
+
return {
|
|
167
|
+
backend: "kokoro",
|
|
168
|
+
reason: `targetTtfaMs=${inputs.targetTtfaMs} < ${TTFA_CUTOFF_MS} → Kokoro (~97ms CPU TTFB)`,
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
if (!inputs.omnivoiceAvailable) {
|
|
172
|
+
throw new Error(
|
|
173
|
+
"[voice] no TTS backend available (neither Kokoro model nor OmniVoice FFI library on disk)",
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
return {
|
|
177
|
+
backend: "omnivoice",
|
|
178
|
+
reason: "targetTtfaMs requested but Kokoro artifacts missing",
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
if (
|
|
183
|
+
inputs.kokoroAvailable &&
|
|
184
|
+
inputs.kokoroRtf !== null &&
|
|
185
|
+
inputs.kokoroRtf !== undefined &&
|
|
186
|
+
inputs.kokoroRtf > 0
|
|
187
|
+
) {
|
|
188
|
+
if (
|
|
189
|
+
inputs.omnivoiceRtf === null ||
|
|
190
|
+
inputs.omnivoiceRtf === undefined ||
|
|
191
|
+
inputs.omnivoiceRtf <= 0
|
|
192
|
+
) {
|
|
193
|
+
return {
|
|
194
|
+
backend: "kokoro",
|
|
195
|
+
reason: `Kokoro RTF=${inputs.kokoroRtf.toFixed(2)} measured; OmniVoice RTF unknown`,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
if (inputs.kokoroRtf >= inputs.omnivoiceRtf * RTF_MARGIN) {
|
|
199
|
+
return {
|
|
200
|
+
backend: "kokoro",
|
|
201
|
+
reason: `Kokoro RTF=${inputs.kokoroRtf.toFixed(2)} beats OmniVoice RTF=${inputs.omnivoiceRtf.toFixed(2)} by ≥10%`,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
if (!inputs.omnivoiceAvailable && inputs.kokoroAvailable) {
|
|
207
|
+
return {
|
|
208
|
+
backend: "kokoro",
|
|
209
|
+
reason: "OmniVoice FFI library not available; Kokoro is the only option",
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
if (!inputs.omnivoiceAvailable && !inputs.kokoroAvailable) {
|
|
213
|
+
throw new Error(
|
|
214
|
+
"[voice] no TTS backend available (neither Kokoro model nor OmniVoice FFI library on disk)",
|
|
215
|
+
);
|
|
216
|
+
}
|
|
217
|
+
// Both backends available, no override. Honor the tier's declared
|
|
218
|
+
// default if the caller supplied one (catalog-driven), else fall back
|
|
219
|
+
// to OmniVoice to preserve historical behavior for non-Eliza-1 contexts.
|
|
220
|
+
const tierDefault = inputs.tierVoiceBackends?.[0];
|
|
221
|
+
if (tierDefault === "kokoro") {
|
|
222
|
+
return {
|
|
223
|
+
backend: "kokoro",
|
|
224
|
+
reason: "tier default — kokoro per ELIZA_1_VOICE_BACKENDS",
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
if (tierDefault === "omnivoice") {
|
|
228
|
+
return {
|
|
229
|
+
backend: "omnivoice",
|
|
230
|
+
reason: "tier default — omnivoice per ELIZA_1_VOICE_BACKENDS",
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
return {
|
|
234
|
+
backend: "omnivoice",
|
|
235
|
+
reason: "default — OmniVoice on the fused build (no tier policy supplied)",
|
|
236
|
+
};
|
|
237
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public types for the Kokoro-82M TTS backend (Apache-2.0, hexgrad/Kokoro-82M
|
|
3
|
+
* upstream).
|
|
4
|
+
*
|
|
5
|
+
* Kokoro is a small (~82M-param) StyleTTS-2 derivative that ships with a set
|
|
6
|
+
* of "voice packs" — pre-baked 256-dim style vectors (one .bin per voice).
|
|
7
|
+
* Adding voices is a cheap (~512KB) extra download.
|
|
8
|
+
*/
|
|
9
|
+
/** Canonical voice-pack id. Convention: `<lang>_<name>` (af_bella, am_michael). */
|
|
10
|
+
export type KokoroVoiceId = string;
|
|
11
|
+
/** One bundled voice pack — small fp32 style tensor on disk. */
|
|
12
|
+
export interface KokoroVoicePack {
|
|
13
|
+
/** `af_bella`, `af_sarah`, `am_michael`, ... */
|
|
14
|
+
id: KokoroVoiceId;
|
|
15
|
+
/** Human-readable name shown in UI. */
|
|
16
|
+
displayName: string;
|
|
17
|
+
/** Two-letter language tag (`a` = American English, `b` = British English, etc. per Kokoro convention). */
|
|
18
|
+
lang: string;
|
|
19
|
+
/** Filename inside the voices/ directory, relative to the Kokoro model root. */
|
|
20
|
+
file: string;
|
|
21
|
+
/** Style-vector dim (256 for v1.0). */
|
|
22
|
+
dim: number;
|
|
23
|
+
/** Genre/voice tags for picker UIs. */
|
|
24
|
+
tags?: ReadonlyArray<string>;
|
|
25
|
+
}
|
|
26
|
+
/** Where the runtime expects to find Kokoro on disk. */
|
|
27
|
+
export interface KokoroModelLayout {
|
|
28
|
+
/** Directory under `<stateDir>/local-inference/models/kokoro/`. */
|
|
29
|
+
root: string;
|
|
30
|
+
/** Model file — the Kokoro GGUF carried by our llama.cpp fork. */
|
|
31
|
+
modelFile: string;
|
|
32
|
+
/** Directory containing the per-voice style tensors. */
|
|
33
|
+
voicesDir: string;
|
|
34
|
+
/** Model output sample rate (Kokoro v1.0 = 24000). */
|
|
35
|
+
sampleRate: number;
|
|
36
|
+
}
|
|
37
|
+
/** Construction-time configuration for `KokoroTtsBackend`. */
|
|
38
|
+
export interface KokoroBackendOptions {
|
|
39
|
+
/** Resolved on-disk layout. Required — the backend never guesses paths. */
|
|
40
|
+
layout: KokoroModelLayout;
|
|
41
|
+
/**
|
|
42
|
+
* Voice id to use when the caller's `SpeakerPreset.voiceId` is not in the
|
|
43
|
+
* voice-pack registry. The named voice MUST be present in `layout.voicesDir`.
|
|
44
|
+
*/
|
|
45
|
+
defaultVoiceId: KokoroVoiceId;
|
|
46
|
+
/**
|
|
47
|
+
* Optional phonemizer override. Defaults to the bundled lazy phonemizer,
|
|
48
|
+
* which uses `phonemize` if installed and falls back to a deterministic
|
|
49
|
+
* grapheme-to-phoneme adapter otherwise (documented tradeoff in README).
|
|
50
|
+
*/
|
|
51
|
+
phonemizer?: KokoroPhonemizer;
|
|
52
|
+
/**
|
|
53
|
+
* Max samples emitted in a single streaming chunk. Defaults to a quarter-
|
|
54
|
+
* second at 24kHz (6000) so the scheduler ring buffer sees a continuous
|
|
55
|
+
* trickle and TTFB stays close to the first inference completion.
|
|
56
|
+
*/
|
|
57
|
+
streamingChunkSamples?: number;
|
|
58
|
+
}
|
|
59
|
+
/** A pure (or async-pure) text → phoneme-id sequence converter. */
|
|
60
|
+
export interface KokoroPhonemizer {
|
|
61
|
+
/** Phonemize a single utterance into a sequence of integer ids. */
|
|
62
|
+
phonemize(text: string, lang: string): Promise<KokoroPhonemeSequence>;
|
|
63
|
+
/** Human-facing id (`"phonemize"`, `"espeak-ng"`, `"fallback-g2p"`, ...). */
|
|
64
|
+
readonly id: string;
|
|
65
|
+
}
|
|
66
|
+
export interface KokoroPhonemeSequence {
|
|
67
|
+
/** Token ids for the model's `input_ids` tensor. */
|
|
68
|
+
ids: Int32Array;
|
|
69
|
+
/** Original phoneme string, for debugging and tests. */
|
|
70
|
+
phonemes: string;
|
|
71
|
+
}
|
|
72
|
+
/** Raised when the on-disk model layout is missing or malformed. */
|
|
73
|
+
export declare class KokoroModelMissingError extends Error {
|
|
74
|
+
readonly code: "kokoro-model-missing";
|
|
75
|
+
constructor(message: string);
|
|
76
|
+
}
|
|
77
|
+
/** Raised when phonemization cannot proceed (no phonemizer + non-ASCII text). */
|
|
78
|
+
export declare class KokoroPhonemizerError extends Error {
|
|
79
|
+
readonly code: "kokoro-phonemizer-error";
|
|
80
|
+
constructor(message: string);
|
|
81
|
+
}
|
|
82
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["types.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,mFAAmF;AACnF,MAAM,MAAM,aAAa,GAAG,MAAM,CAAC;AAEnC,gEAAgE;AAChE,MAAM,WAAW,eAAe;IAC/B,gDAAgD;IAChD,EAAE,EAAE,aAAa,CAAC;IAClB,uCAAuC;IACvC,WAAW,EAAE,MAAM,CAAC;IACpB,2GAA2G;IAC3G,IAAI,EAAE,MAAM,CAAC;IACb,gFAAgF;IAChF,IAAI,EAAE,MAAM,CAAC;IACb,uCAAuC;IACvC,GAAG,EAAE,MAAM,CAAC;IACZ,uCAAuC;IACvC,IAAI,CAAC,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;CAC7B;AAED,wDAAwD;AACxD,MAAM,WAAW,iBAAiB;IACjC,mEAAmE;IACnE,IAAI,EAAE,MAAM,CAAC;IACb,kEAAkE;IAClE,SAAS,EAAE,MAAM,CAAC;IAClB,wDAAwD;IACxD,SAAS,EAAE,MAAM,CAAC;IAClB,sDAAsD;IACtD,UAAU,EAAE,MAAM,CAAC;CACnB;AAED,8DAA8D;AAC9D,MAAM,WAAW,oBAAoB;IACpC,2EAA2E;IAC3E,MAAM,EAAE,iBAAiB,CAAC;IAC1B;;;OAGG;IACH,cAAc,EAAE,aAAa,CAAC;IAC9B;;;;OAIG;IACH,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B;;;;OAIG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;CAC/B;AAED,mEAAmE;AACnE,MAAM,WAAW,gBAAgB;IAChC,mEAAmE;IACnE,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAC;IACtE,6EAA6E;IAC7E,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,qBAAqB;IACrC,oDAAoD;IACpD,GAAG,EAAE,UAAU,CAAC;IAChB,wDAAwD;IACxD,QAAQ,EAAE,MAAM,CAAC;CACjB;AAED,oEAAoE;AACpE,qBAAa,uBAAwB,SAAQ,KAAK;IACjD,QAAQ,CAAC,IAAI,EAAG,sBAAsB,CAAU;gBACpC,OAAO,EAAE,MAAM;CAI3B;AAED,iFAAiF;AACjF,qBAAa,qBAAsB,SAAQ,KAAK;IAC/C,QAAQ,CAAC,IAAI,EAAG,yBAAyB,CAAU;gBACvC,OAAO,EAAE,MAAM;CAI3B"}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public types for the Kokoro-82M TTS backend (Apache-2.0, hexgrad/Kokoro-82M
|
|
3
|
+
* upstream).
|
|
4
|
+
*
|
|
5
|
+
* Kokoro is a small (~82M-param) StyleTTS-2 derivative that ships with a set
|
|
6
|
+
* of "voice packs" — pre-baked 256-dim style vectors (one .bin per voice).
|
|
7
|
+
* Adding voices is a cheap (~512KB) extra download.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
/** Canonical voice-pack id. Convention: `<lang>_<name>` (af_bella, am_michael). */
|
|
11
|
+
export type KokoroVoiceId = string;
|
|
12
|
+
|
|
13
|
+
/** One bundled voice pack — small fp32 style tensor on disk. */
|
|
14
|
+
export interface KokoroVoicePack {
|
|
15
|
+
/** `af_bella`, `af_sarah`, `am_michael`, ... */
|
|
16
|
+
id: KokoroVoiceId;
|
|
17
|
+
/** Human-readable name shown in UI. */
|
|
18
|
+
displayName: string;
|
|
19
|
+
/** Two-letter language tag (`a` = American English, `b` = British English, etc. per Kokoro convention). */
|
|
20
|
+
lang: string;
|
|
21
|
+
/** Filename inside the voices/ directory, relative to the Kokoro model root. */
|
|
22
|
+
file: string;
|
|
23
|
+
/** Style-vector dim (256 for v1.0). */
|
|
24
|
+
dim: number;
|
|
25
|
+
/** Genre/voice tags for picker UIs. */
|
|
26
|
+
tags?: ReadonlyArray<string>;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** Where the runtime expects to find Kokoro on disk. */
|
|
30
|
+
export interface KokoroModelLayout {
|
|
31
|
+
/** Directory under `<stateDir>/local-inference/models/kokoro/`. */
|
|
32
|
+
root: string;
|
|
33
|
+
/** Model file — the Kokoro GGUF carried by our llama.cpp fork. */
|
|
34
|
+
modelFile: string;
|
|
35
|
+
/** Directory containing the per-voice style tensors. */
|
|
36
|
+
voicesDir: string;
|
|
37
|
+
/** Model output sample rate (Kokoro v1.0 = 24000). */
|
|
38
|
+
sampleRate: number;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** Construction-time configuration for `KokoroTtsBackend`. */
|
|
42
|
+
export interface KokoroBackendOptions {
|
|
43
|
+
/** Resolved on-disk layout. Required — the backend never guesses paths. */
|
|
44
|
+
layout: KokoroModelLayout;
|
|
45
|
+
/**
|
|
46
|
+
* Voice id to use when the caller's `SpeakerPreset.voiceId` is not in the
|
|
47
|
+
* voice-pack registry. The named voice MUST be present in `layout.voicesDir`.
|
|
48
|
+
*/
|
|
49
|
+
defaultVoiceId: KokoroVoiceId;
|
|
50
|
+
/**
|
|
51
|
+
* Optional phonemizer override. Defaults to the bundled lazy phonemizer,
|
|
52
|
+
* which uses `phonemize` if installed and falls back to a deterministic
|
|
53
|
+
* grapheme-to-phoneme adapter otherwise (documented tradeoff in README).
|
|
54
|
+
*/
|
|
55
|
+
phonemizer?: KokoroPhonemizer;
|
|
56
|
+
/**
|
|
57
|
+
* Max samples emitted in a single streaming chunk. Defaults to a quarter-
|
|
58
|
+
* second at 24kHz (6000) so the scheduler ring buffer sees a continuous
|
|
59
|
+
* trickle and TTFB stays close to the first inference completion.
|
|
60
|
+
*/
|
|
61
|
+
streamingChunkSamples?: number;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/** A pure (or async-pure) text → phoneme-id sequence converter. */
|
|
65
|
+
export interface KokoroPhonemizer {
|
|
66
|
+
/** Phonemize a single utterance into a sequence of integer ids. */
|
|
67
|
+
phonemize(text: string, lang: string): Promise<KokoroPhonemeSequence>;
|
|
68
|
+
/** Human-facing id (`"phonemize"`, `"espeak-ng"`, `"fallback-g2p"`, ...). */
|
|
69
|
+
readonly id: string;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export interface KokoroPhonemeSequence {
|
|
73
|
+
/** Token ids for the model's `input_ids` tensor. */
|
|
74
|
+
ids: Int32Array;
|
|
75
|
+
/** Original phoneme string, for debugging and tests. */
|
|
76
|
+
phonemes: string;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Raised when the on-disk model layout is missing or malformed. */
|
|
80
|
+
export class KokoroModelMissingError extends Error {
|
|
81
|
+
readonly code = "kokoro-model-missing" as const;
|
|
82
|
+
constructor(message: string) {
|
|
83
|
+
super(message);
|
|
84
|
+
this.name = "KokoroModelMissingError";
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/** Raised when phonemization cannot proceed (no phonemizer + non-ASCII text). */
|
|
89
|
+
export class KokoroPhonemizerError extends Error {
|
|
90
|
+
readonly code = "kokoro-phonemizer-error" as const;
|
|
91
|
+
constructor(message: string) {
|
|
92
|
+
super(message);
|
|
93
|
+
this.name = "KokoroPhonemizerError";
|
|
94
|
+
}
|
|
95
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Registry of bundled Kokoro voice packs (upstream: hexgrad/Kokoro-82M).
|
|
3
|
+
*
|
|
4
|
+
* Each entry maps a stable `KokoroVoiceId` (the `voices/<id>.bin` filename
|
|
5
|
+
* Kokoro ships) onto display metadata. The runtime resolves a caller's
|
|
6
|
+
* `SpeakerPreset.voiceId` against this table; an unknown id falls through to
|
|
7
|
+
* the backend's `defaultVoiceId`.
|
|
8
|
+
*
|
|
9
|
+
* The actual style tensor (256 fp32 values) lives at
|
|
10
|
+
* `<modelRoot>/voices/<file>` and is loaded lazily on first use.
|
|
11
|
+
*
|
|
12
|
+
* Reference: https://huggingface.co/hexgrad/Kokoro-82M
|
|
13
|
+
*/
|
|
14
|
+
import type { KokoroVoicePack } from "./types";
|
|
15
|
+
export declare const KOKORO_VOICE_PACKS: ReadonlyArray<KokoroVoicePack>;
|
|
16
|
+
/** Look up a voice pack by id. Returns `undefined` for unknown ids — the
|
|
17
|
+
* backend chooses how to fall back (typically `defaultVoiceId`). */
|
|
18
|
+
export declare function findKokoroVoice(id: string): KokoroVoicePack | undefined;
|
|
19
|
+
/** The voice the runtime selects when nothing is configured. */
|
|
20
|
+
export declare const KOKORO_DEFAULT_VOICE_ID = "af_same";
|
|
21
|
+
/** Conservative fallback voice when a configured/default preset is not staged. */
|
|
22
|
+
export declare const KOKORO_FALLBACK_VOICE_ID = "af_bella";
|
|
23
|
+
//# sourceMappingURL=voice-presets.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"voice-presets.d.ts","sourceRoot":"","sources":["voice-presets.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,eAAO,MAAM,kBAAkB,EAAE,aAAa,CAAC,eAAe,CAkG7D,CAAC;AAIF;qEACqE;AACrE,wBAAgB,eAAe,CAAC,EAAE,EAAE,MAAM,GAAG,eAAe,GAAG,SAAS,CAEvE;AAED,gEAAgE;AAChE,eAAO,MAAM,uBAAuB,YAAY,CAAC;AAEjD,kFAAkF;AAClF,eAAO,MAAM,wBAAwB,aAAa,CAAC"}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Registry of bundled Kokoro voice packs (upstream: hexgrad/Kokoro-82M).
|
|
3
|
+
*
|
|
4
|
+
* Each entry maps a stable `KokoroVoiceId` (the `voices/<id>.bin` filename
|
|
5
|
+
* Kokoro ships) onto display metadata. The runtime resolves a caller's
|
|
6
|
+
* `SpeakerPreset.voiceId` against this table; an unknown id falls through to
|
|
7
|
+
* the backend's `defaultVoiceId`.
|
|
8
|
+
*
|
|
9
|
+
* The actual style tensor (256 fp32 values) lives at
|
|
10
|
+
* `<modelRoot>/voices/<file>` and is loaded lazily on first use.
|
|
11
|
+
*
|
|
12
|
+
* Reference: https://huggingface.co/hexgrad/Kokoro-82M
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import type { KokoroVoicePack } from "./types";
|
|
16
|
+
|
|
17
|
+
export const KOKORO_VOICE_PACKS: ReadonlyArray<KokoroVoicePack> = [
|
|
18
|
+
// American English — female
|
|
19
|
+
{
|
|
20
|
+
id: "af_bella",
|
|
21
|
+
displayName: "Bella (US English)",
|
|
22
|
+
lang: "a",
|
|
23
|
+
file: "af_bella.bin",
|
|
24
|
+
dim: 256,
|
|
25
|
+
tags: ["female", "warm", "default"],
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
id: "af_sarah",
|
|
29
|
+
displayName: "Sarah (US English)",
|
|
30
|
+
lang: "a",
|
|
31
|
+
file: "af_sarah.bin",
|
|
32
|
+
dim: 256,
|
|
33
|
+
tags: ["female", "professional"],
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
id: "af_nicole",
|
|
37
|
+
displayName: "Nicole (US English, breathy)",
|
|
38
|
+
lang: "a",
|
|
39
|
+
file: "af_nicole.bin",
|
|
40
|
+
dim: 256,
|
|
41
|
+
tags: ["female", "breathy"],
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
id: "af_sky",
|
|
45
|
+
displayName: "Sky (US English)",
|
|
46
|
+
lang: "a",
|
|
47
|
+
file: "af_sky.bin",
|
|
48
|
+
dim: 256,
|
|
49
|
+
tags: ["female", "young"],
|
|
50
|
+
},
|
|
51
|
+
// American English — male
|
|
52
|
+
{
|
|
53
|
+
id: "am_michael",
|
|
54
|
+
displayName: "Michael (US English)",
|
|
55
|
+
lang: "a",
|
|
56
|
+
file: "am_michael.bin",
|
|
57
|
+
dim: 256,
|
|
58
|
+
tags: ["male", "warm"],
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
id: "am_adam",
|
|
62
|
+
displayName: "Adam (US English)",
|
|
63
|
+
lang: "a",
|
|
64
|
+
file: "am_adam.bin",
|
|
65
|
+
dim: 256,
|
|
66
|
+
tags: ["male", "neutral"],
|
|
67
|
+
},
|
|
68
|
+
// British English
|
|
69
|
+
{
|
|
70
|
+
id: "bf_emma",
|
|
71
|
+
displayName: "Emma (British English)",
|
|
72
|
+
lang: "b",
|
|
73
|
+
file: "bf_emma.bin",
|
|
74
|
+
dim: 256,
|
|
75
|
+
tags: ["female", "british"],
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
id: "bf_isabella",
|
|
79
|
+
displayName: "Isabella (British English)",
|
|
80
|
+
lang: "b",
|
|
81
|
+
file: "bf_isabella.bin",
|
|
82
|
+
dim: 256,
|
|
83
|
+
tags: ["female", "british"],
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
id: "bm_george",
|
|
87
|
+
displayName: "George (British English)",
|
|
88
|
+
lang: "b",
|
|
89
|
+
file: "bm_george.bin",
|
|
90
|
+
dim: 256,
|
|
91
|
+
tags: ["male", "british"],
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
id: "bm_lewis",
|
|
95
|
+
displayName: "Lewis (British English)",
|
|
96
|
+
lang: "b",
|
|
97
|
+
file: "bm_lewis.bin",
|
|
98
|
+
dim: 256,
|
|
99
|
+
tags: ["male", "british"],
|
|
100
|
+
},
|
|
101
|
+
// Eliza-1 fine-tuned voice — same (research-only, derivative of *Her* 2013).
|
|
102
|
+
// Voice pack lives at `elizaos/eliza-1` under `voice/kokoro/voices/af_same.bin`
|
|
103
|
+
// (first push is private; do not promote to default without a public-release sign-off).
|
|
104
|
+
// Source corpus: `lalalune/ai_voices/sam` upstream subset, landed locally as
|
|
105
|
+
// `same` (58 clips, 3.51 min, research-only).
|
|
106
|
+
// Voice id obeys the Kokoro `<lang><sex>_<name>` convention (US English, female).
|
|
107
|
+
{
|
|
108
|
+
id: "af_same",
|
|
109
|
+
displayName: "Same (Eliza-1, US English)",
|
|
110
|
+
lang: "a",
|
|
111
|
+
file: "af_same.bin",
|
|
112
|
+
dim: 256,
|
|
113
|
+
tags: ["female", "same", "eliza-1-voice", "research-only"],
|
|
114
|
+
},
|
|
115
|
+
];
|
|
116
|
+
|
|
117
|
+
const VOICE_BY_ID = new Map(KOKORO_VOICE_PACKS.map((v) => [v.id, v] as const));
|
|
118
|
+
|
|
119
|
+
/** Look up a voice pack by id. Returns `undefined` for unknown ids — the
|
|
120
|
+
* backend chooses how to fall back (typically `defaultVoiceId`). */
|
|
121
|
+
export function findKokoroVoice(id: string): KokoroVoicePack | undefined {
|
|
122
|
+
return VOICE_BY_ID.get(id);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/** The voice the runtime selects when nothing is configured. */
|
|
126
|
+
export const KOKORO_DEFAULT_VOICE_ID = "af_same";
|
|
127
|
+
|
|
128
|
+
/** Conservative fallback voice when a configured/default preset is not staged. */
|
|
129
|
+
export const KOKORO_FALLBACK_VOICE_ID = "af_bella";
|