@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.11-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +83 -0
- package/package.json +81 -15
- package/src/actions/generate-media.d.ts +59 -0
- package/src/actions/generate-media.d.ts.map +1 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.d.ts +23 -0
- package/src/actions/identify-speaker.d.ts.map +1 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +807 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.d.ts +7 -0
- package/src/index.d.ts.map +1 -0
- package/src/index.ts +54 -0
- package/src/local-inference-routes.d.ts +38 -0
- package/src/local-inference-routes.d.ts.map +1 -0
- package/src/local-inference-routes.test.ts +344 -0
- package/src/local-inference-routes.ts +1543 -0
- package/src/provider.d.ts +21 -0
- package/src/provider.d.ts.map +1 -0
- package/src/provider.ts +1171 -0
- package/src/routes/compat-helpers.d.ts +18 -0
- package/src/routes/compat-helpers.d.ts.map +1 -0
- package/src/routes/compat-helpers.ts +274 -0
- package/src/routes/family-member-route.d.ts +62 -0
- package/src/routes/family-member-route.d.ts.map +1 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.d.ts +19 -0
- package/src/routes/index.d.ts.map +1 -0
- package/src/routes/index.ts +60 -0
- package/src/routes/live-diarization-route.d.ts +26 -0
- package/src/routes/live-diarization-route.d.ts.map +1 -0
- package/src/routes/live-diarization-route.test.ts +213 -0
- package/src/routes/live-diarization-route.ts +122 -0
- package/src/routes/local-inference-asr-route.d.ts +4 -0
- package/src/routes/local-inference-asr-route.d.ts.map +1 -0
- package/src/routes/local-inference-asr-route.test.ts +190 -0
- package/src/routes/local-inference-asr-route.ts +213 -0
- package/src/routes/local-inference-compat-routes.d.ts +16 -0
- package/src/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/src/routes/local-inference-compat-routes.test.ts +423 -0
- package/src/routes/local-inference-compat-routes.ts +782 -0
- package/src/routes/local-inference-tts-route.d.ts +7 -0
- package/src/routes/local-inference-tts-route.d.ts.map +1 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/voice-first-run-routes.d.ts +62 -0
- package/src/routes/voice-first-run-routes.d.ts.map +1 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.d.ts +62 -0
- package/src/routes/voice-models-routes.d.ts.map +1 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/src/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.d.ts +52 -0
- package/src/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/src/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.d.ts +77 -0
- package/src/runtime/embedding-manager-support.d.ts.map +1 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.d.ts +16 -0
- package/src/runtime/embedding-presets.d.ts.map +1 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.d.ts +14 -0
- package/src/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.d.ts +53 -0
- package/src/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +528 -0
- package/src/runtime/ensure-local-inference-handler.ts +1398 -0
- package/src/runtime/index.d.ts +14 -0
- package/src/runtime/index.d.ts.map +1 -0
- package/src/runtime/index.ts +27 -0
- package/src/runtime/mobile-local-inference-gate.d.ts +31 -0
- package/src/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +69 -0
- package/src/runtime/mobile-local-inference-gate.ts +44 -0
- package/src/runtime/voice-entity-binding.d.ts +103 -0
- package/src/runtime/voice-entity-binding.d.ts.map +1 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +69 -0
- package/src/runtime/voice-entity-binding.ts +328 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.d.ts +282 -0
- package/src/services/active-model.d.ts.map +1 -0
- package/src/services/active-model.ts +1213 -0
- package/src/services/asr/errors.d.ts +21 -0
- package/src/services/asr/errors.d.ts.map +1 -0
- package/src/services/asr/errors.ts +50 -0
- package/src/services/asr/hash.d.ts +28 -0
- package/src/services/asr/hash.d.ts.map +1 -0
- package/src/services/asr/hash.ts +49 -0
- package/src/services/asr/index.d.ts +76 -0
- package/src/services/asr/index.d.ts.map +1 -0
- package/src/services/asr/index.ts +178 -0
- package/src/services/asr/types.d.ts +91 -0
- package/src/services/asr/types.d.ts.map +1 -0
- package/src/services/asr/types.ts +95 -0
- package/src/services/assignments.d.ts +71 -0
- package/src/services/assignments.d.ts.map +1 -0
- package/src/services/assignments.test.ts +80 -0
- package/src/services/assignments.ts +230 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.d.ts +346 -0
- package/src/services/backend.d.ts.map +1 -0
- package/src/services/backend.ts +612 -0
- package/src/services/bundled-models.d.ts +34 -0
- package/src/services/bundled-models.d.ts.map +1 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.d.ts +206 -0
- package/src/services/cache-bridge.d.ts.map +1 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.d.ts +10 -0
- package/src/services/catalog.d.ts.map +1 -0
- package/src/services/catalog.test.ts +240 -0
- package/src/services/catalog.ts +27 -0
- package/src/services/checkpoint-client.d.ts +109 -0
- package/src/services/checkpoint-client.d.ts.map +1 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.d.ts +102 -0
- package/src/services/cloud-fallback.d.ts.map +1 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/conversation-registry.d.ts +142 -0
- package/src/services/conversation-registry.d.ts.map +1 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts +92 -0
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +333 -0
- package/src/services/device-bridge.d.ts +188 -0
- package/src/services/device-bridge.d.ts.map +1 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.d.ts +149 -0
- package/src/services/device-resource-metrics.d.ts.map +1 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.d.ts +115 -0
- package/src/services/device-tier.d.ts.map +1 -0
- package/src/services/device-tier.test.ts +371 -0
- package/src/services/device-tier.ts +410 -0
- package/src/services/downloader.d.ts +82 -0
- package/src/services/downloader.d.ts.map +1 -0
- package/src/services/downloader.test.ts +724 -0
- package/src/services/downloader.ts +899 -0
- package/src/services/engine-direct-bundle.test.ts +58 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.d.ts +534 -0
- package/src/services/engine.d.ts.map +1 -0
- package/src/services/engine.ts +1891 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.d.ts +17 -0
- package/src/services/external-scanner.d.ts.map +1 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +442 -0
- package/src/services/ffi-streaming-backend.d.ts +180 -0
- package/src/services/ffi-streaming-backend.d.ts.map +1 -0
- package/src/services/ffi-streaming-backend.ts +382 -0
- package/src/services/ffi-streaming-runner.d.ts +122 -0
- package/src/services/ffi-streaming-runner.d.ts.map +1 -0
- package/src/services/ffi-streaming-runner.test.ts +60 -0
- package/src/services/ffi-streaming-runner.ts +354 -0
- package/src/services/ffi-unload-ordering.test.ts +162 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.d.ts +72 -0
- package/src/services/handler-registry.d.ts.map +1 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.d.ts +63 -0
- package/src/services/hardware.d.ts.map +1 -0
- package/src/services/hardware.test.ts +183 -0
- package/src/services/hardware.ts +404 -0
- package/src/services/hf-search.d.ts +26 -0
- package/src/services/hf-search.d.ts.map +1 -0
- package/src/services/hf-search.test.ts +69 -0
- package/src/services/hf-search.ts +420 -0
- package/src/services/image-description-runtime.d.ts +14 -0
- package/src/services/image-description-runtime.d.ts.map +1 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/src/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.d.ts +118 -0
- package/src/services/imagegen/backend-selector.d.ts.map +1 -0
- package/src/services/imagegen/backend-selector.ts +281 -0
- package/src/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/src/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.d.ts +16 -0
- package/src/services/imagegen/errors.d.ts.map +1 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.d.ts +58 -0
- package/src/services/imagegen/index.d.ts.map +1 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.d.ts +74 -0
- package/src/services/imagegen/mflux.d.ts.map +1 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.d.ts +180 -0
- package/src/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/src/services/imagegen/sd-cpp.ts +718 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.d.ts +181 -0
- package/src/services/imagegen/types.d.ts.map +1 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.d.ts +30 -0
- package/src/services/index.d.ts.map +1 -0
- package/src/services/index.ts +225 -0
- package/src/services/inference-capabilities.d.ts +132 -0
- package/src/services/inference-capabilities.d.ts.map +1 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.d.ts +59 -0
- package/src/services/inference-telemetry.d.ts.map +1 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.d.ts +189 -0
- package/src/services/kv-spill.d.ts.map +1 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +356 -0
- package/src/services/latency-trace.d.ts +346 -0
- package/src/services/latency-trace.d.ts.map +1 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.d.ts +96 -0
- package/src/services/llm-streaming-binding.d.ts.map +1 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.d.ts +82 -0
- package/src/services/load-args.d.ts.map +1 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +708 -0
- package/src/services/manifest/index.d.ts +4 -0
- package/src/services/manifest/index.d.ts.map +1 -0
- package/src/services/manifest/index.ts +66 -0
- package/src/services/manifest/manifest.test.ts +693 -0
- package/src/services/manifest/schema.d.ts +715 -0
- package/src/services/manifest/schema.d.ts.map +1 -0
- package/src/services/manifest/schema.ts +655 -0
- package/src/services/manifest/types.d.ts +30 -0
- package/src/services/manifest/types.d.ts.map +1 -0
- package/src/services/manifest/types.ts +55 -0
- package/src/services/manifest/validator.d.ts +66 -0
- package/src/services/manifest/validator.d.ts.map +1 -0
- package/src/services/manifest/validator.ts +569 -0
- package/src/services/memory-arbiter.d.ts +343 -0
- package/src/services/memory-arbiter.d.ts.map +1 -0
- package/src/services/memory-arbiter.test.ts +419 -0
- package/src/services/memory-arbiter.ts +1000 -0
- package/src/services/memory-monitor.d.ts +119 -0
- package/src/services/memory-monitor.d.ts.map +1 -0
- package/src/services/memory-monitor.test.ts +208 -0
- package/src/services/memory-monitor.ts +296 -0
- package/src/services/memory-pressure.d.ts +127 -0
- package/src/services/memory-pressure.d.ts.map +1 -0
- package/src/services/memory-pressure.ts +413 -0
- package/src/services/mtp-doctor.d.ts +13 -0
- package/src/services/mtp-doctor.d.ts.map +1 -0
- package/src/services/mtp-doctor.ts +78 -0
- package/src/services/network-policy.d.ts +127 -0
- package/src/services/network-policy.d.ts.map +1 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.d.ts +6 -0
- package/src/services/paths.d.ts.map +1 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.d.ts +124 -0
- package/src/services/planner-skeleton.d.ts.map +1 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.d.ts +38 -0
- package/src/services/providers.d.ts.map +1 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +163 -0
- package/src/services/ram-budget.d.ts +110 -0
- package/src/services/ram-budget.d.ts.map +1 -0
- package/src/services/ram-budget.ts +0 -0
- package/src/services/readiness.d.ts +9 -0
- package/src/services/readiness.d.ts.map +1 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.d.ts +111 -0
- package/src/services/recommendation.d.ts.map +1 -0
- package/src/services/recommendation.ts +672 -0
- package/src/services/registry.d.ts +35 -0
- package/src/services/registry.d.ts.map +1 -0
- package/src/services/registry.ts +151 -0
- package/src/services/router-handler.d.ts +92 -0
- package/src/services/router-handler.d.ts.map +1 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +376 -0
- package/src/services/routing-policy.d.ts +55 -0
- package/src/services/routing-policy.d.ts.map +1 -0
- package/src/services/routing-policy.ts +228 -0
- package/src/services/routing-preferences.d.ts +8 -0
- package/src/services/routing-preferences.d.ts.map +1 -0
- package/src/services/routing-preferences.ts +15 -0
- package/src/services/runtime-target.d.ts +98 -0
- package/src/services/runtime-target.d.ts.map +1 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.d.ts +128 -0
- package/src/services/service.d.ts.map +1 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +735 -0
- package/src/services/session-pool.d.ts +72 -0
- package/src/services/session-pool.d.ts.map +1 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.d.ts +23 -0
- package/src/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.d.ts +311 -0
- package/src/services/structured-output.d.ts.map +1 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/tts/errors.ts +46 -0
- package/src/services/tts/index.ts +214 -0
- package/src/services/tts/tts-audio-cache.ts +235 -0
- package/src/services/tts/types.ts +157 -0
- package/src/services/types.d.ts +19 -0
- package/src/services/types.d.ts.map +1 -0
- package/src/services/types.ts +55 -0
- package/src/services/verify-on-device.d.ts +34 -0
- package/src/services/verify-on-device.d.ts.map +1 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.d.ts +8 -0
- package/src/services/verify.d.ts.map +1 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.d.ts +115 -0
- package/src/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.d.ts +99 -0
- package/src/services/vision/capacitor-llama.d.ts.map +1 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.d.ts +47 -0
- package/src/services/vision/cloud-fallback.d.ts.map +1 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.d.ts +71 -0
- package/src/services/vision/hash.d.ts.map +1 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.d.ts +95 -0
- package/src/services/vision/index.d.ts.map +1 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.d.ts +73 -0
- package/src/services/vision/llama-server.d.ts.map +1 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.d.ts +153 -0
- package/src/services/vision/types.d.ts.map +1 -0
- package/src/services/vision/types.ts +154 -0
- package/src/services/vision/vast-fallback.d.ts +18 -0
- package/src/services/vision/vast-fallback.d.ts.map +1 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.d.ts +98 -0
- package/src/services/vision-embedding-cache.d.ts.map +1 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +88 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +92 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +124 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +197 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/audio-frame-consumer.d.ts +212 -0
- package/src/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/src/services/voice/audio-frame-consumer.test.ts +343 -0
- package/src/services/voice/audio-frame-consumer.ts +491 -0
- package/src/services/voice/barge-in.d.ts +112 -0
- package/src/services/voice/barge-in.d.ts.map +1 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +336 -0
- package/src/services/voice/cancellation-coordinator.d.ts +127 -0
- package/src/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.d.ts +199 -0
- package/src/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +743 -0
- package/src/services/voice/eager-context-builder.d.ts +170 -0
- package/src/services/voice/eager-context-builder.d.ts.map +1 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.d.ts +133 -0
- package/src/services/voice/embedding.d.ts.map +1 -0
- package/src/services/voice/embedding.test.ts +148 -0
- package/src/services/voice/embedding.ts +244 -0
- package/src/services/voice/emotion-attribution.d.ts +68 -0
- package/src/services/voice/emotion-attribution.d.ts.map +1 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge.d.ts +746 -0
- package/src/services/voice/engine-bridge.d.ts.map +1 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2226 -0
- package/src/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/src/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/src/services/voice/eot-classifier-ggml.ts +566 -0
- package/src/services/voice/eot-classifier.d.ts +214 -0
- package/src/services/voice/eot-classifier.d.ts.map +1 -0
- package/src/services/voice/eot-classifier.ts +533 -0
- package/src/services/voice/errors.d.ts +20 -0
- package/src/services/voice/errors.d.ts.map +1 -0
- package/src/services/voice/errors.ts +32 -0
- package/src/services/voice/expressive-tags.d.ts +158 -0
- package/src/services/voice/expressive-tags.d.ts.map +1 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.d.ts +636 -0
- package/src/services/voice/ffi-bindings.d.ts.map +1 -0
- package/src/services/voice/ffi-bindings.test.ts +671 -0
- package/src/services/voice/ffi-bindings.ts +3050 -0
- package/src/services/voice/first-line-cache.d.ts +181 -0
- package/src/services/voice/first-line-cache.d.ts.map +1 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.d.ts +51 -0
- package/src/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/src/services/voice/fused-eot-scorer.ts +135 -0
- package/src/services/voice/index.d.ts +91 -0
- package/src/services/voice/index.d.ts.map +1 -0
- package/src/services/voice/index.ts +481 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +151 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +231 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts +72 -0
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +207 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/src/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts +92 -0
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/src/services/voice/kokoro/runtime-selection.ts +237 -0
- package/src/services/voice/kokoro/types.d.ts +82 -0
- package/src/services/voice/kokoro/types.d.ts.map +1 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/src/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.d.ts +30 -0
- package/src/services/voice/kokoro/voices.d.ts.map +1 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.d.ts +135 -0
- package/src/services/voice/lifecycle.d.ts.map +1 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.d.ts +96 -0
- package/src/services/voice/live-diarization-session.d.ts.map +1 -0
- package/src/services/voice/live-diarization-session.ts +289 -0
- package/src/services/voice/mic-source.d.ts +136 -0
- package/src/services/voice/mic-source.d.ts.map +1 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/optimistic-policy.d.ts +109 -0
- package/src/services/voice/optimistic-policy.d.ts.map +1 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.d.ts +73 -0
- package/src/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/src/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.d.ts +76 -0
- package/src/services/voice/phrase-cache.d.ts.map +1 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.d.ts +62 -0
- package/src/services/voice/phrase-chunker.d.ts.map +1 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.d.ts +151 -0
- package/src/services/voice/pipeline-impls.d.ts.map +1 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.d.ts +216 -0
- package/src/services/voice/pipeline.d.ts.map +1 -0
- package/src/services/voice/pipeline.ts +505 -0
- package/src/services/voice/prefill-client.d.ts +123 -0
- package/src/services/voice/prefill-client.d.ts.map +1 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/src/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.d.ts +248 -0
- package/src/services/voice/profile-store.d.ts.map +1 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/ring-buffer.d.ts +40 -0
- package/src/services/voice/ring-buffer.d.ts.map +1 -0
- package/src/services/voice/ring-buffer.ts +105 -0
- package/src/services/voice/rollback-queue.d.ts +24 -0
- package/src/services/voice/rollback-queue.d.ts.map +1 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/scheduler.d.ts +146 -0
- package/src/services/voice/scheduler.d.ts.map +1 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/shared-resources.d.ts +190 -0
- package/src/services/voice/shared-resources.d.ts.map +1 -0
- package/src/services/voice/shared-resources.ts +320 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +386 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.d.ts +75 -0
- package/src/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/src/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.d.ts +37 -0
- package/src/services/voice/speaker/encoder.d.ts.map +1 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.d.ts +83 -0
- package/src/services/voice/speaker-imprint.d.ts.map +1 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.d.ts +77 -0
- package/src/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.d.ts +73 -0
- package/src/services/voice/system-audio-sink.d.ts.map +1 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.d.ts +244 -0
- package/src/services/voice/transcriber.d.ts.map +1 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/turn-controller.d.ts +183 -0
- package/src/services/voice/turn-controller.d.ts.map +1 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.d.ts +643 -0
- package/src/services/voice/types.d.ts.map +1 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.d.ts +282 -0
- package/src/services/voice/vad.d.ts.map +1 -0
- package/src/services/voice/vad.test.ts +480 -0
- package/src/services/voice/vad.ts +827 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.d.ts +241 -0
- package/src/services/voice/voice-budget.d.ts.map +1 -0
- package/src/services/voice/voice-budget.test.ts +420 -0
- package/src/services/voice/voice-budget.ts +656 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/src/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-preset-format.d.ts +158 -0
- package/src/services/voice/voice-preset-format.d.ts.map +1 -0
- package/src/services/voice/voice-preset-format.ts +700 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.d.ts +116 -0
- package/src/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.d.ts +83 -0
- package/src/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.ts +154 -0
- package/src/services/voice/voice-settings.d.ts +82 -0
- package/src/services/voice/voice-settings.d.ts.map +1 -0
- package/src/services/voice/voice-settings.ts +172 -0
- package/src/services/voice/voice-state-machine.d.ts +364 -0
- package/src/services/voice/voice-state-machine.d.ts.map +1 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +326 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.d.ts +101 -0
- package/src/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/src/services/voice/wake-word-ggml.ts +320 -0
- package/src/services/voice/wake-word.d.ts +255 -0
- package/src/services/voice/wake-word.d.ts.map +1 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.d.ts +240 -0
- package/src/services/voice-model-updater.d.ts.map +1 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.d.ts +3 -0
- package/src/services/voice-prewarm.d.ts.map +1 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/dist/index.d.ts +0 -37
- package/dist/index.js +0 -1098
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public barrel for the Kokoro-82M TTS adapter.
|
|
3
|
+
*
|
|
4
|
+
* External callers (the engine layer, the bench harness, tests) should
|
|
5
|
+
* import from `./kokoro` rather than reaching into individual files. The
|
|
6
|
+
* internal layout may change; this surface is stable.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
export type { KokoroTtsBackendDeps } from "./kokoro-backend";
|
|
10
|
+
export { KokoroTtsBackend } from "./kokoro-backend";
|
|
11
|
+
export type { KokoroFfiRuntimeOptions } from "./kokoro-ffi-runtime";
|
|
12
|
+
export { KokoroFfiRuntime } from "./kokoro-ffi-runtime";
|
|
13
|
+
export type {
|
|
14
|
+
KokoroMockRuntimeOptions,
|
|
15
|
+
KokoroPythonRuntimeOptions,
|
|
16
|
+
KokoroRuntime,
|
|
17
|
+
KokoroRuntimeChunk,
|
|
18
|
+
KokoroRuntimeInputs,
|
|
19
|
+
} from "./kokoro-runtime";
|
|
20
|
+
export {
|
|
21
|
+
KOKORO_GGUF_REL_PATH,
|
|
22
|
+
KokoroMockRuntime,
|
|
23
|
+
KokoroPythonRuntime,
|
|
24
|
+
} from "./kokoro-runtime";
|
|
25
|
+
export type {
|
|
26
|
+
PhonemeStreamWindow,
|
|
27
|
+
StreamPhonemesOptions,
|
|
28
|
+
} from "./phoneme-stream";
|
|
29
|
+
|
|
30
|
+
export {
|
|
31
|
+
phonemizePhrase,
|
|
32
|
+
streamPhonemes,
|
|
33
|
+
} from "./phoneme-stream";
|
|
34
|
+
export {
|
|
35
|
+
FallbackG2PPhonemizer,
|
|
36
|
+
KOKORO_PAD_ID,
|
|
37
|
+
NpmPhonemizePhonemizer,
|
|
38
|
+
resolvePhonemizer,
|
|
39
|
+
} from "./phonemizer";
|
|
40
|
+
export type {
|
|
41
|
+
KokoroBackendDecision,
|
|
42
|
+
KokoroBackendId,
|
|
43
|
+
KokoroBackendInputs,
|
|
44
|
+
} from "./pick-runtime";
|
|
45
|
+
export {
|
|
46
|
+
pickKokoroRuntimeBackend,
|
|
47
|
+
readKokoroBackendFromEnv,
|
|
48
|
+
} from "./pick-runtime";
|
|
49
|
+
export type {
|
|
50
|
+
VoiceBackendChoice,
|
|
51
|
+
VoiceBackendDecision,
|
|
52
|
+
VoiceBackendInputs,
|
|
53
|
+
VoiceBackendMode,
|
|
54
|
+
} from "./runtime-selection";
|
|
55
|
+
export {
|
|
56
|
+
readVoiceBackendModeFromEnv,
|
|
57
|
+
selectVoiceBackend,
|
|
58
|
+
} from "./runtime-selection";
|
|
59
|
+
export type {
|
|
60
|
+
KokoroBackendOptions,
|
|
61
|
+
KokoroModelLayout,
|
|
62
|
+
KokoroPhonemeSequence,
|
|
63
|
+
KokoroPhonemizer,
|
|
64
|
+
KokoroVoiceId,
|
|
65
|
+
KokoroVoicePack,
|
|
66
|
+
} from "./types";
|
|
67
|
+
export {
|
|
68
|
+
KokoroModelMissingError,
|
|
69
|
+
KokoroPhonemizerError,
|
|
70
|
+
} from "./types";
|
|
71
|
+
export {
|
|
72
|
+
findKokoroVoice,
|
|
73
|
+
KOKORO_DEFAULT_VOICE_ID,
|
|
74
|
+
KOKORO_VOICE_PACKS,
|
|
75
|
+
listKokoroVoiceIds,
|
|
76
|
+
listKokoroVoicesByLang,
|
|
77
|
+
listKokoroVoicesByTag,
|
|
78
|
+
resolveKokoroVoiceOrDefault,
|
|
79
|
+
} from "./voices";
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Kokoro-82M TTS backend.
|
|
3
|
+
*
|
|
4
|
+
* Implements the same `OmniVoiceBackend + StreamingTtsBackend` seam that
|
|
5
|
+
* `FfiOmniVoiceBackend` (the OmniVoice path) satisfies, so a
|
|
6
|
+
* `VoiceScheduler` instance does not need to know which TTS family it is
|
|
7
|
+
* driving. The runtime selection layer (`runtime-selection.ts`) picks
|
|
8
|
+
* between this and `FfiOmniVoiceBackend` based on hardware tier and the
|
|
9
|
+
* caller's first-audio-latency target.
|
|
10
|
+
*
|
|
11
|
+
* The actual model inference is delegated to a `KokoroRuntime` instance
|
|
12
|
+
* (GGUF / mock) — this class owns:
|
|
13
|
+
* - phonemizer resolution + per-phrase phonemize call,
|
|
14
|
+
* - voice-pack resolution against `SpeakerPreset.voiceId`,
|
|
15
|
+
* - streaming-protocol bookkeeping (cancel signal polling, final tail).
|
|
16
|
+
*
|
|
17
|
+
* No fallback sludge: if the runtime is unavailable, the backend throws
|
|
18
|
+
* on first synthesis rather than emitting silent zero PCM (AGENTS.md §3).
|
|
19
|
+
*/
|
|
20
|
+
import type { AudioChunk, OmniVoiceBackend, Phrase, SpeakerPreset, StreamingTtsBackend, TtsPcmChunk } from "../types";
|
|
21
|
+
import type { KokoroRuntime } from "./kokoro-runtime";
|
|
22
|
+
import type { KokoroBackendOptions } from "./types";
|
|
23
|
+
export interface KokoroTtsBackendDeps extends KokoroBackendOptions {
|
|
24
|
+
/** The concrete model runner. Wire `KokoroFfiRuntime` (in-process fused
|
|
25
|
+
* libelizainference) in production and `KokoroMockRuntime` in tests. */
|
|
26
|
+
runtime: KokoroRuntime;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* `KokoroTtsBackend` is a streaming-only TTS backend. The model produces
|
|
30
|
+
* the full waveform in one forward, but we surface it as one body chunk +
|
|
31
|
+
* tail so the scheduler protocol is identical for both backends.
|
|
32
|
+
*/
|
|
33
|
+
export declare class KokoroTtsBackend implements OmniVoiceBackend, StreamingTtsBackend {
|
|
34
|
+
readonly id: "kokoro";
|
|
35
|
+
private readonly runtime;
|
|
36
|
+
private readonly defaultVoiceId;
|
|
37
|
+
private readonly streamingChunkSamples;
|
|
38
|
+
private phonemizer;
|
|
39
|
+
private readonly phonemizerOverride?;
|
|
40
|
+
constructor(deps: KokoroTtsBackendDeps);
|
|
41
|
+
/** Native sample rate of the model output (24 kHz for Kokoro v1.0). */
|
|
42
|
+
get sampleRate(): number;
|
|
43
|
+
/** Always true — `KokoroTtsBackend` satisfies `StreamingTtsBackend`. */
|
|
44
|
+
supportsStreamingTts(): boolean;
|
|
45
|
+
/**
|
|
46
|
+
* One-shot synthesis. Drives the streaming path internally and
|
|
47
|
+
* concatenates chunks. Cancellation observed at chunk boundaries.
|
|
48
|
+
*/
|
|
49
|
+
synthesize(args: {
|
|
50
|
+
phrase: Phrase;
|
|
51
|
+
preset: SpeakerPreset;
|
|
52
|
+
cancelSignal: {
|
|
53
|
+
cancelled: boolean;
|
|
54
|
+
};
|
|
55
|
+
onKernelTick?: () => void;
|
|
56
|
+
}): Promise<AudioChunk>;
|
|
57
|
+
synthesizeStream(args: {
|
|
58
|
+
phrase: Phrase;
|
|
59
|
+
preset: SpeakerPreset;
|
|
60
|
+
cancelSignal: {
|
|
61
|
+
cancelled: boolean;
|
|
62
|
+
};
|
|
63
|
+
onChunk: (chunk: TtsPcmChunk) => boolean | undefined;
|
|
64
|
+
onKernelTick?: () => void;
|
|
65
|
+
}): Promise<{
|
|
66
|
+
cancelled: boolean;
|
|
67
|
+
}>;
|
|
68
|
+
dispose(): void;
|
|
69
|
+
private resolveVoice;
|
|
70
|
+
private ensurePhonemizer;
|
|
71
|
+
}
|
|
72
|
+
//# sourceMappingURL=kokoro-backend.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"kokoro-backend.d.ts","sourceRoot":"","sources":["kokoro-backend.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,KAAK,EACX,UAAU,EACV,gBAAgB,EAChB,MAAM,EACN,aAAa,EACb,mBAAmB,EACnB,WAAW,EACX,MAAM,UAAU,CAAC;AAClB,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAEtD,OAAO,KAAK,EACX,oBAAoB,EAGpB,MAAM,SAAS,CAAC;AAGjB,MAAM,WAAW,oBAAqB,SAAQ,oBAAoB;IACjE;6EACyE;IACzE,OAAO,EAAE,aAAa,CAAC;CACvB;AAED;;;;GAIG;AACH,qBAAa,gBAAiB,YAAW,gBAAgB,EAAE,mBAAmB;IAC7E,QAAQ,CAAC,EAAE,EAAG,QAAQ,CAAU;IAChC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAgB;IACxC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,qBAAqB,CAAS;IAC/C,OAAO,CAAC,UAAU,CAAiC;IACnD,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAmB;gBAE3C,IAAI,EAAE,oBAAoB;IAQtC,uEAAuE;IACvE,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,wEAAwE;IACxE,oBAAoB,IAAI,OAAO;IAI/B;;;OAGG;IACG,UAAU,CAAC,IAAI,EAAE;QACtB,MAAM,EAAE,MAAM,CAAC;QACf,MAAM,EAAE,aAAa,CAAC;QACtB,YAAY,EAAE;YAAE,SAAS,EAAE,OAAO,CAAA;SAAE,CAAC;QACrC,YAAY,CAAC,EAAE,MAAM,IAAI,CAAC;KAC1B,GAAG,OAAO,CAAC,UAAU,CAAC;IAgCjB,gBAAgB,CAAC,IAAI,EAAE;QAC5B,MAAM,EAAE,MAAM,CAAC;QACf,MAAM,EAAE,aAAa,CAAC;QACtB,YAAY,EAAE;YAAE,SAAS,EAAE,OAAO,CAAA;SAAE,CAAC;QACrC,OAAO,EAAE,CAAC,KAAK,EAAE,WAAW,KAAK,OAAO,GAAG,SAAS,CAAC;QACrD,YAAY,CAAC,EAAE,MAAM,IAAI,CAAC;KAC1B,GAAG,OAAO,CAAC;QAAE,SAAS,EAAE,OAAO,CAAA;KAAE,CAAC;IAkEnC,OAAO,IAAI,IAAI;IAIf,OAAO,CAAC,YAAY;YASN,gBAAgB;CAM9B"}
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Kokoro-82M TTS backend.
|
|
3
|
+
*
|
|
4
|
+
* Implements the same `OmniVoiceBackend + StreamingTtsBackend` seam that
|
|
5
|
+
* `FfiOmniVoiceBackend` (the OmniVoice path) satisfies, so a
|
|
6
|
+
* `VoiceScheduler` instance does not need to know which TTS family it is
|
|
7
|
+
* driving. The runtime selection layer (`runtime-selection.ts`) picks
|
|
8
|
+
* between this and `FfiOmniVoiceBackend` based on hardware tier and the
|
|
9
|
+
* caller's first-audio-latency target.
|
|
10
|
+
*
|
|
11
|
+
* The actual model inference is delegated to a `KokoroRuntime` instance
|
|
12
|
+
* (GGUF / mock) — this class owns:
|
|
13
|
+
* - phonemizer resolution + per-phrase phonemize call,
|
|
14
|
+
* - voice-pack resolution against `SpeakerPreset.voiceId`,
|
|
15
|
+
* - streaming-protocol bookkeeping (cancel signal polling, final tail).
|
|
16
|
+
*
|
|
17
|
+
* No fallback sludge: if the runtime is unavailable, the backend throws
|
|
18
|
+
* on first synthesis rather than emitting silent zero PCM (AGENTS.md §3).
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import type {
|
|
22
|
+
AudioChunk,
|
|
23
|
+
OmniVoiceBackend,
|
|
24
|
+
Phrase,
|
|
25
|
+
SpeakerPreset,
|
|
26
|
+
StreamingTtsBackend,
|
|
27
|
+
TtsPcmChunk,
|
|
28
|
+
} from "../types";
|
|
29
|
+
import type { KokoroRuntime } from "./kokoro-runtime";
|
|
30
|
+
import { resolvePhonemizer } from "./phonemizer";
|
|
31
|
+
import type {
|
|
32
|
+
KokoroBackendOptions,
|
|
33
|
+
KokoroPhonemizer,
|
|
34
|
+
KokoroVoicePack,
|
|
35
|
+
} from "./types";
|
|
36
|
+
import { resolveKokoroVoiceOrDefault } from "./voices";
|
|
37
|
+
|
|
38
|
+
export interface KokoroTtsBackendDeps extends KokoroBackendOptions {
|
|
39
|
+
/** The concrete model runner. Wire `KokoroFfiRuntime` (in-process fused
|
|
40
|
+
* libelizainference) in production and `KokoroMockRuntime` in tests. */
|
|
41
|
+
runtime: KokoroRuntime;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* `KokoroTtsBackend` is a streaming-only TTS backend. The model produces
|
|
46
|
+
* the full waveform in one forward, but we surface it as one body chunk +
|
|
47
|
+
* tail so the scheduler protocol is identical for both backends.
|
|
48
|
+
*/
|
|
49
|
+
export class KokoroTtsBackend implements OmniVoiceBackend, StreamingTtsBackend {
|
|
50
|
+
readonly id = "kokoro" as const;
|
|
51
|
+
private readonly runtime: KokoroRuntime;
|
|
52
|
+
private readonly defaultVoiceId: string;
|
|
53
|
+
private readonly streamingChunkSamples: number;
|
|
54
|
+
private phonemizer: KokoroPhonemizer | null = null;
|
|
55
|
+
private readonly phonemizerOverride?: KokoroPhonemizer;
|
|
56
|
+
|
|
57
|
+
constructor(deps: KokoroTtsBackendDeps) {
|
|
58
|
+
this.runtime = deps.runtime;
|
|
59
|
+
this.defaultVoiceId = deps.defaultVoiceId;
|
|
60
|
+
this.streamingChunkSamples =
|
|
61
|
+
deps.streamingChunkSamples ?? Math.floor(deps.layout.sampleRate / 4);
|
|
62
|
+
this.phonemizerOverride = deps.phonemizer;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/** Native sample rate of the model output (24 kHz for Kokoro v1.0). */
|
|
66
|
+
get sampleRate(): number {
|
|
67
|
+
return this.runtime.sampleRate;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/** Always true — `KokoroTtsBackend` satisfies `StreamingTtsBackend`. */
|
|
71
|
+
supportsStreamingTts(): boolean {
|
|
72
|
+
return true;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* One-shot synthesis. Drives the streaming path internally and
|
|
77
|
+
* concatenates chunks. Cancellation observed at chunk boundaries.
|
|
78
|
+
*/
|
|
79
|
+
async synthesize(args: {
|
|
80
|
+
phrase: Phrase;
|
|
81
|
+
preset: SpeakerPreset;
|
|
82
|
+
cancelSignal: { cancelled: boolean };
|
|
83
|
+
onKernelTick?: () => void;
|
|
84
|
+
}): Promise<AudioChunk> {
|
|
85
|
+
const collected: Float32Array[] = [];
|
|
86
|
+
let total = 0;
|
|
87
|
+
await this.synthesizeStream({
|
|
88
|
+
phrase: args.phrase,
|
|
89
|
+
preset: args.preset,
|
|
90
|
+
cancelSignal: args.cancelSignal,
|
|
91
|
+
onKernelTick: args.onKernelTick,
|
|
92
|
+
onChunk: ({ pcm, isFinal }) => {
|
|
93
|
+
args.onKernelTick?.();
|
|
94
|
+
if (!isFinal && pcm.length > 0) {
|
|
95
|
+
collected.push(pcm);
|
|
96
|
+
total += pcm.length;
|
|
97
|
+
}
|
|
98
|
+
return args.cancelSignal.cancelled;
|
|
99
|
+
},
|
|
100
|
+
});
|
|
101
|
+
const merged = new Float32Array(total);
|
|
102
|
+
let off = 0;
|
|
103
|
+
for (const part of collected) {
|
|
104
|
+
merged.set(part, off);
|
|
105
|
+
off += part.length;
|
|
106
|
+
}
|
|
107
|
+
return {
|
|
108
|
+
phraseId: args.phrase.id,
|
|
109
|
+
fromIndex: args.phrase.fromIndex,
|
|
110
|
+
toIndex: args.phrase.toIndex,
|
|
111
|
+
pcm: merged,
|
|
112
|
+
sampleRate: this.runtime.sampleRate,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
async synthesizeStream(args: {
|
|
117
|
+
phrase: Phrase;
|
|
118
|
+
preset: SpeakerPreset;
|
|
119
|
+
cancelSignal: { cancelled: boolean };
|
|
120
|
+
onChunk: (chunk: TtsPcmChunk) => boolean | undefined;
|
|
121
|
+
onKernelTick?: () => void;
|
|
122
|
+
}): Promise<{ cancelled: boolean }> {
|
|
123
|
+
const voice = this.resolveVoice(args.preset);
|
|
124
|
+
const phonemizer = await this.ensurePhonemizer();
|
|
125
|
+
args.onKernelTick?.();
|
|
126
|
+
const phonemes = await phonemizer.phonemize(args.phrase.text, voice.lang);
|
|
127
|
+
if (args.cancelSignal.cancelled) {
|
|
128
|
+
args.onChunk({
|
|
129
|
+
pcm: new Float32Array(0),
|
|
130
|
+
sampleRate: this.runtime.sampleRate,
|
|
131
|
+
isFinal: true,
|
|
132
|
+
});
|
|
133
|
+
return { cancelled: true };
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// The runtime emits one (or a few) body chunks. We re-chunk to
|
|
137
|
+
// `streamingChunkSamples` so the scheduler's ring buffer sees a
|
|
138
|
+
// continuous trickle even when ONNX returns the whole waveform at
|
|
139
|
+
// once — this is how Kokoro's ~97ms TTFB becomes audible to the
|
|
140
|
+
// listener before the full phrase finishes decoding.
|
|
141
|
+
const limit = this.streamingChunkSamples;
|
|
142
|
+
let cancelled = false;
|
|
143
|
+
const result = await this.runtime.synthesize({
|
|
144
|
+
phonemes,
|
|
145
|
+
voice,
|
|
146
|
+
cancelSignal: args.cancelSignal,
|
|
147
|
+
onChunk: ({ pcm, isFinal }) => {
|
|
148
|
+
args.onKernelTick?.();
|
|
149
|
+
if (cancelled || args.cancelSignal.cancelled) {
|
|
150
|
+
cancelled = true;
|
|
151
|
+
return true;
|
|
152
|
+
}
|
|
153
|
+
if (pcm.length === 0) {
|
|
154
|
+
// Pass through tail markers from the runtime — the final tail is
|
|
155
|
+
// emitted by us below, so swallow runtime-side finals to avoid
|
|
156
|
+
// double-tails.
|
|
157
|
+
if (!isFinal) return false;
|
|
158
|
+
return false;
|
|
159
|
+
}
|
|
160
|
+
for (let off = 0; off < pcm.length; off += limit) {
|
|
161
|
+
if (args.cancelSignal.cancelled) {
|
|
162
|
+
cancelled = true;
|
|
163
|
+
return true;
|
|
164
|
+
}
|
|
165
|
+
const end = Math.min(pcm.length, off + limit);
|
|
166
|
+
const slice = pcm.subarray(off, end);
|
|
167
|
+
const want = args.onChunk({
|
|
168
|
+
pcm: slice,
|
|
169
|
+
sampleRate: this.runtime.sampleRate,
|
|
170
|
+
isFinal: false,
|
|
171
|
+
});
|
|
172
|
+
if (want === true || args.cancelSignal.cancelled) {
|
|
173
|
+
cancelled = true;
|
|
174
|
+
return true;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return false;
|
|
178
|
+
},
|
|
179
|
+
});
|
|
180
|
+
args.onChunk({
|
|
181
|
+
pcm: new Float32Array(0),
|
|
182
|
+
sampleRate: this.runtime.sampleRate,
|
|
183
|
+
isFinal: true,
|
|
184
|
+
});
|
|
185
|
+
return { cancelled: cancelled || result.cancelled };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
dispose(): void {
|
|
189
|
+
this.runtime.dispose();
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
private resolveVoice(preset: SpeakerPreset): KokoroVoicePack {
|
|
193
|
+
// The scheduler's `SpeakerPreset.voiceId` is the canonical caller
|
|
194
|
+
// hook for picking a Kokoro voice; an unknown id falls back to the
|
|
195
|
+
// configured default rather than throwing (so OmniVoice-authored
|
|
196
|
+
// presets still produce audio when routed through Kokoro).
|
|
197
|
+
const id = preset.voiceId || this.defaultVoiceId;
|
|
198
|
+
return resolveKokoroVoiceOrDefault(id);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
private async ensurePhonemizer(): Promise<KokoroPhonemizer> {
|
|
202
|
+
if (this.phonemizer) return this.phonemizer;
|
|
203
|
+
this.phonemizer = await resolvePhonemizer(this.phonemizerOverride);
|
|
204
|
+
console.info(`[kokoro] using phonemizer=${this.phonemizer.id}`);
|
|
205
|
+
return this.phonemizer;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* On-disk discovery for the Kokoro-only voice mode. Probes
|
|
3
|
+
* `<stateDir>/local-inference/models/kokoro/` (or `$ELIZA_KOKORO_MODEL_DIR`)
|
|
4
|
+
* for a Kokoro GGUF model file plus at least one voice `.bin` under
|
|
5
|
+
* `voices/`. Callers can pass an explicit model root to probe bundle-local
|
|
6
|
+
* Kokoro artifacts first. Returns null when anything is missing — no
|
|
7
|
+
* auto-download (AGENTS.md §3). GGUF-only: the ONNX path has been retired
|
|
8
|
+
* (see `runtimeKind` below).
|
|
9
|
+
*
|
|
10
|
+
* TRANSPORT NOTE — Kokoro synthesizes in-process through the fused
|
|
11
|
+
* `libelizainference` handle (ABI v10 `eliza_inference_kokoro_*`), the same
|
|
12
|
+
* dlopen()-ed lib as OmniVoice. The fork links Kokoro's native engine
|
|
13
|
+
* (`tools/kokoro/kokoro_lib`, its own GGUF reader + iSTFT decoder) into the
|
|
14
|
+
* fused build, and `KokoroFfiRuntime` drives it via `kokoroLoad` /
|
|
15
|
+
* `kokoroSynthesize`. This is the canonical path on every platform and the
|
|
16
|
+
* only one that ships on iOS / Google Play (those forbid the app opening a
|
|
17
|
+
* local TCP socket). The legacy `KokoroGgufRuntime` — POST `/v1/audio/speech`
|
|
18
|
+
* on a Kokoro-capable llama-server (the MTP gateway launched with
|
|
19
|
+
* `--kokoro-model`) — stays as an explicit dev/desktop opt-in
|
|
20
|
+
* (`KOKORO_BACKEND=fork`) and is never resolved on the mobile path. The GGUF
|
|
21
|
+
* is produced by the fork's `tools/kokoro/convert_kokoro_pth_to_gguf.py`.
|
|
22
|
+
*
|
|
23
|
+
* Env overrides:
|
|
24
|
+
* ELIZA_KOKORO_MODEL_DIR — directory root
|
|
25
|
+
* ELIZA_KOKORO_MODEL_FILE — exact filename inside the root
|
|
26
|
+
* (ONNX or GGUF; the loader auto-detects)
|
|
27
|
+
* ELIZA_KOKORO_DEFAULT_VOICE_ID — default voice id (e.g. `af_same`, `af_bella`)
|
|
28
|
+
*/
|
|
29
|
+
import type { KokoroModelLayout } from "./types";
|
|
30
|
+
/** Canonical Kokoro v1.0 output sample rate. */
|
|
31
|
+
export declare const KOKORO_DEFAULT_SAMPLE_RATE = 24000;
|
|
32
|
+
/** True iff the candidate filename routes to the fused GGUF path. */
|
|
33
|
+
export declare function isKokoroGgufFile(filename: string): boolean;
|
|
34
|
+
export interface KokoroEngineDiscoveryResult {
|
|
35
|
+
layout: KokoroModelLayout;
|
|
36
|
+
/**
|
|
37
|
+
* Resolved default voice id. Prefers the catalog default
|
|
38
|
+
* (`KOKORO_DEFAULT_VOICE_ID` = `af_same`, Samantha) when its preset is on
|
|
39
|
+
* disk; falls back loudly to `KOKORO_FALLBACK_VOICE_ID` (`af_bella`) when
|
|
40
|
+
* Samantha's preset has not been produced yet; otherwise picks the first
|
|
41
|
+
* voice pack whose `.bin` is actually staged.
|
|
42
|
+
*/
|
|
43
|
+
defaultVoiceId: string;
|
|
44
|
+
/**
|
|
45
|
+
* Resolved runtime kind. Always `"gguf"` — only GGUF model files are
|
|
46
|
+
* accepted by the discovery (ONNX paths have been retired).
|
|
47
|
+
*/
|
|
48
|
+
runtimeKind: "gguf";
|
|
49
|
+
}
|
|
50
|
+
/** Returns the on-disk directory the discovery probes. */
|
|
51
|
+
export declare function kokoroEngineModelDir(rootOverride?: string): string;
|
|
52
|
+
/**
|
|
53
|
+
* Probe disk for a usable Kokoro layout. Returns null when any required
|
|
54
|
+
* piece is missing — the engine then falls back to its existing behaviour
|
|
55
|
+
* (fused omnivoice or `StubOmniVoiceBackend`).
|
|
56
|
+
*/
|
|
57
|
+
export declare function resolveKokoroEngineConfig(rootOverride?: string): KokoroEngineDiscoveryResult | null;
|
|
58
|
+
//# sourceMappingURL=kokoro-engine-discovery.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"kokoro-engine-discovery.d.ts","sourceRoot":"","sources":["kokoro-engine-discovery.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAKH,OAAO,KAAK,EAAE,iBAAiB,EAAmB,MAAM,SAAS,CAAC;AAOlE,gDAAgD;AAChD,eAAO,MAAM,0BAA0B,QAAS,CAAC;AAiBjD,qEAAqE;AACrE,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAE1D;AAED,MAAM,WAAW,2BAA2B;IAC3C,MAAM,EAAE,iBAAiB,CAAC;IAC1B;;;;;;OAMG;IACH,cAAc,EAAE,MAAM,CAAC;IACvB;;;OAGG;IACH,WAAW,EAAE,MAAM,CAAC;CACpB;AAED,0DAA0D;AAC1D,wBAAgB,oBAAoB,CAAC,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,CAMlE;AAED;;;;GAIG;AACH,wBAAgB,yBAAyB,CACxC,YAAY,CAAC,EAAE,MAAM,GACnB,2BAA2B,GAAG,IAAI,CAuBpC"}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* On-disk discovery for the Kokoro-only voice mode. Probes
|
|
3
|
+
* `<stateDir>/local-inference/models/kokoro/` (or `$ELIZA_KOKORO_MODEL_DIR`)
|
|
4
|
+
* for a Kokoro GGUF model file plus at least one voice `.bin` under
|
|
5
|
+
* `voices/`. Callers can pass an explicit model root to probe bundle-local
|
|
6
|
+
* Kokoro artifacts first. Returns null when anything is missing — no
|
|
7
|
+
* auto-download (AGENTS.md §3). GGUF-only: the ONNX path has been retired
|
|
8
|
+
* (see `runtimeKind` below).
|
|
9
|
+
*
|
|
10
|
+
* TRANSPORT NOTE — Kokoro synthesizes in-process through the fused
|
|
11
|
+
* `libelizainference` handle (ABI v10 `eliza_inference_kokoro_*`), the same
|
|
12
|
+
* dlopen()-ed lib as OmniVoice. The fork links Kokoro's native engine
|
|
13
|
+
* (`tools/kokoro/kokoro_lib`, its own GGUF reader + iSTFT decoder) into the
|
|
14
|
+
* fused build, and `KokoroFfiRuntime` drives it via `kokoroLoad` /
|
|
15
|
+
* `kokoroSynthesize`. This is the canonical path on every platform and the
|
|
16
|
+
* only one that ships on iOS / Google Play (those forbid the app opening a
|
|
17
|
+
* local TCP socket). The legacy `KokoroGgufRuntime` — POST `/v1/audio/speech`
|
|
18
|
+
* on a Kokoro-capable llama-server (the MTP gateway launched with
|
|
19
|
+
* `--kokoro-model`) — stays as an explicit dev/desktop opt-in
|
|
20
|
+
* (`KOKORO_BACKEND=fork`) and is never resolved on the mobile path. The GGUF
|
|
21
|
+
* is produced by the fork's `tools/kokoro/convert_kokoro_pth_to_gguf.py`.
|
|
22
|
+
*
|
|
23
|
+
* Env overrides:
|
|
24
|
+
* ELIZA_KOKORO_MODEL_DIR — directory root
|
|
25
|
+
* ELIZA_KOKORO_MODEL_FILE — exact filename inside the root
|
|
26
|
+
* (ONNX or GGUF; the loader auto-detects)
|
|
27
|
+
* ELIZA_KOKORO_DEFAULT_VOICE_ID — default voice id (e.g. `af_same`, `af_bella`)
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
import { existsSync, readdirSync } from "node:fs";
|
|
31
|
+
import path from "node:path";
|
|
32
|
+
import { elizaModelsDir } from "../../paths";
|
|
33
|
+
import type { KokoroModelLayout, KokoroVoicePack } from "./types";
|
|
34
|
+
import {
|
|
35
|
+
KOKORO_DEFAULT_VOICE_ID,
|
|
36
|
+
KOKORO_FALLBACK_VOICE_ID,
|
|
37
|
+
KOKORO_VOICE_PACKS,
|
|
38
|
+
} from "./voice-presets";
|
|
39
|
+
|
|
40
|
+
/** Canonical Kokoro v1.0 output sample rate. */
|
|
41
|
+
export const KOKORO_DEFAULT_SAMPLE_RATE = 24_000;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Filenames the loader will accept if `ELIZA_KOKORO_MODEL_FILE` is unset.
|
|
45
|
+
* Order is preference-first: a fused-GGUF beats an ONNX of the same
|
|
46
|
+
* quantization tier, and within ONNX the int8 export beats fp32.
|
|
47
|
+
*
|
|
48
|
+
* The Q4_K_M GGUF is what the elizaOS/llama.cpp fork's
|
|
49
|
+
* `tools/kokoro/convert_kokoro_pth_to_gguf.py` produces for shipping
|
|
50
|
+
* tiers; `kokoro-82m-v1_0.gguf` is the unquantized canonical filename
|
|
51
|
+
* the runtime documents at `kokoro-runtime.ts:KOKORO_GGUF_REL_PATH`.
|
|
52
|
+
*/
|
|
53
|
+
const CANDIDATE_MODEL_FILES: ReadonlyArray<string> = [
|
|
54
|
+
"kokoro-82m-v1_0-Q4_K_M.gguf",
|
|
55
|
+
"kokoro-82m-v1_0.gguf",
|
|
56
|
+
];
|
|
57
|
+
|
|
58
|
+
/** True iff the candidate filename routes to the fused GGUF path. */
|
|
59
|
+
export function isKokoroGgufFile(filename: string): boolean {
|
|
60
|
+
return /\.gguf$/i.test(filename);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export interface KokoroEngineDiscoveryResult {
|
|
64
|
+
layout: KokoroModelLayout;
|
|
65
|
+
/**
|
|
66
|
+
* Resolved default voice id. Prefers the catalog default
|
|
67
|
+
* (`KOKORO_DEFAULT_VOICE_ID` = `af_same`, Samantha) when its preset is on
|
|
68
|
+
* disk; falls back loudly to `KOKORO_FALLBACK_VOICE_ID` (`af_bella`) when
|
|
69
|
+
* Samantha's preset has not been produced yet; otherwise picks the first
|
|
70
|
+
* voice pack whose `.bin` is actually staged.
|
|
71
|
+
*/
|
|
72
|
+
defaultVoiceId: string;
|
|
73
|
+
/**
|
|
74
|
+
* Resolved runtime kind. Always `"gguf"` — only GGUF model files are
|
|
75
|
+
* accepted by the discovery (ONNX paths have been retired).
|
|
76
|
+
*/
|
|
77
|
+
runtimeKind: "gguf";
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/** Returns the on-disk directory the discovery probes. */
|
|
81
|
+
export function kokoroEngineModelDir(rootOverride?: string): string {
|
|
82
|
+
const explicit = rootOverride?.trim();
|
|
83
|
+
if (explicit) return explicit;
|
|
84
|
+
const env = process.env.ELIZA_KOKORO_MODEL_DIR?.trim();
|
|
85
|
+
if (env) return env;
|
|
86
|
+
return path.join(elizaModelsDir(), "kokoro");
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Probe disk for a usable Kokoro layout. Returns null when any required
|
|
91
|
+
* piece is missing — the engine then falls back to its existing behaviour
|
|
92
|
+
* (fused omnivoice or `StubOmniVoiceBackend`).
|
|
93
|
+
*/
|
|
94
|
+
export function resolveKokoroEngineConfig(
|
|
95
|
+
rootOverride?: string,
|
|
96
|
+
): KokoroEngineDiscoveryResult | null {
|
|
97
|
+
const root = kokoroEngineModelDir(rootOverride);
|
|
98
|
+
if (!existsSync(root)) return null;
|
|
99
|
+
|
|
100
|
+
const modelFile = resolveModelFile(root);
|
|
101
|
+
if (!modelFile) return null;
|
|
102
|
+
|
|
103
|
+
const voicesDir = path.join(root, "voices");
|
|
104
|
+
if (!existsSync(voicesDir)) return null;
|
|
105
|
+
|
|
106
|
+
const defaultVoiceId = resolveDefaultVoiceId(voicesDir);
|
|
107
|
+
if (!defaultVoiceId) return null;
|
|
108
|
+
|
|
109
|
+
return {
|
|
110
|
+
layout: {
|
|
111
|
+
root,
|
|
112
|
+
modelFile,
|
|
113
|
+
voicesDir,
|
|
114
|
+
sampleRate: KOKORO_DEFAULT_SAMPLE_RATE,
|
|
115
|
+
},
|
|
116
|
+
defaultVoiceId,
|
|
117
|
+
runtimeKind: "gguf",
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function resolveModelFile(root: string): string | null {
|
|
122
|
+
const env = process.env.ELIZA_KOKORO_MODEL_FILE?.trim();
|
|
123
|
+
if (env) {
|
|
124
|
+
return existsSync(path.join(root, env)) ? env : null;
|
|
125
|
+
}
|
|
126
|
+
for (const candidate of CANDIDATE_MODEL_FILES) {
|
|
127
|
+
if (existsSync(path.join(root, candidate))) return candidate;
|
|
128
|
+
}
|
|
129
|
+
return null;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function resolveDefaultVoiceId(voicesDir: string): string | null {
|
|
133
|
+
const env = process.env.ELIZA_KOKORO_DEFAULT_VOICE_ID?.trim();
|
|
134
|
+
if (env) {
|
|
135
|
+
const pack = findVoicePack(env);
|
|
136
|
+
if (pack && existsSync(path.join(voicesDir, pack.file))) return pack.id;
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
// Prefer the catalog default (Samantha) when its file is staged.
|
|
140
|
+
const defaultPack = findVoicePack(KOKORO_DEFAULT_VOICE_ID);
|
|
141
|
+
if (defaultPack && existsSync(path.join(voicesDir, defaultPack.file))) {
|
|
142
|
+
return defaultPack.id;
|
|
143
|
+
}
|
|
144
|
+
// Samantha preset bytes not staged — fall back to the bundled fallback
|
|
145
|
+
// voice (af_bella). This MUST be loud so operators see the degradation:
|
|
146
|
+
// the canonical default is Samantha and we only land here when the LoRA
|
|
147
|
+
// pipeline has not produced a real `af_same.bin` yet.
|
|
148
|
+
const fallbackPack = findVoicePack(KOKORO_FALLBACK_VOICE_ID);
|
|
149
|
+
if (fallbackPack && existsSync(path.join(voicesDir, fallbackPack.file))) {
|
|
150
|
+
// eslint-disable-next-line no-console -- this is the one place where
|
|
151
|
+
// the runtime must surface the fallback to the operator console; the
|
|
152
|
+
// structured logger is unavailable at discovery time.
|
|
153
|
+
console.warn(
|
|
154
|
+
`[kokoro] default voice ${KOKORO_DEFAULT_VOICE_ID} preset not staged at ${path.join(voicesDir, defaultPack?.file ?? `${KOKORO_DEFAULT_VOICE_ID}.bin`)} — falling back to ${KOKORO_FALLBACK_VOICE_ID}. Run packages/training/scripts/voice/samantha_lora/RUNBOOK.md to produce a real Samantha preset, or regenerate via plugins/plugin-local-inference/scripts/regenerate-samantha-preset.mjs.`,
|
|
155
|
+
);
|
|
156
|
+
return fallbackPack.id;
|
|
157
|
+
}
|
|
158
|
+
// Otherwise pick the first catalog voice whose file is on disk. This
|
|
159
|
+
// lets operators stage a single voice (any voice) and have it just work.
|
|
160
|
+
const staged = listStagedVoiceIds(voicesDir);
|
|
161
|
+
return staged[0] ?? null;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function findVoicePack(id: string): KokoroVoicePack | null {
|
|
165
|
+
return KOKORO_VOICE_PACKS.find((v) => v.id === id) ?? null;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function listStagedVoiceIds(voicesDir: string): string[] {
|
|
169
|
+
try {
|
|
170
|
+
const present = new Set(readdirSync(voicesDir));
|
|
171
|
+
return KOKORO_VOICE_PACKS.filter((v) => present.has(v.file)).map(
|
|
172
|
+
(v) => v.id,
|
|
173
|
+
);
|
|
174
|
+
} catch {
|
|
175
|
+
return [];
|
|
176
|
+
}
|
|
177
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-process Kokoro-82M runtime over the fused `libelizainference` FFI
|
|
3
|
+
* (the `eliza_inference_kokoro_*` exports — introduced at ABI v10; the fused
|
|
4
|
+
* library is currently ABI v11, which adds EOT on top, so these symbols are
|
|
5
|
+
* present in every current build — see `ELIZA_INFERENCE_ABI_VERSION` in
|
|
6
|
+
* ffi-bindings.ts).
|
|
7
|
+
*
|
|
8
|
+
* This is the canonical Kokoro execution path on every platform. It replaces
|
|
9
|
+
* the local-TCP `KokoroGgufRuntime` (POST `/v1/audio/speech` on a running
|
|
10
|
+
* llama-server) for the mobile case — iOS and Google Play forbid the app
|
|
11
|
+
* opening a local TCP socket, so the HTTP→llama-server route cannot ship there.
|
|
12
|
+
* Kokoro synthesizes through the same dlopen()-ed handle as OmniVoice: the
|
|
13
|
+
* fused build links Eliza-1's Kokoro engine (its own GGUF reader + iSTFT
|
|
14
|
+
* decoder) behind `eliza_inference_kokoro_supported/load/synthesize/sample_rate`.
|
|
15
|
+
*
|
|
16
|
+
* Ownership: this runtime owns its own FFI handle + context. The context is
|
|
17
|
+
* created with `create(bundleRoot)` anchored at the bundle root (or the Kokoro
|
|
18
|
+
* model root when there is no Eliza-1 bundle), mirroring how the desktop fused
|
|
19
|
+
* text runtime obtains its ctx. The GGUF + the active voice `.bin` are loaded
|
|
20
|
+
* once via `kokoroLoad` and reloaded only when the requested voice changes.
|
|
21
|
+
*
|
|
22
|
+
* No silent fallback (AGENTS.md §3): when the loaded library does not export
|
|
23
|
+
* the Kokoro symbols (`kokoroSupported() === false`) or the model/voice files
|
|
24
|
+
* are missing, construction / first synthesis throws a structured
|
|
25
|
+
* `VoiceLifecycleError` rather than dropping back to the TCP route.
|
|
26
|
+
*/
|
|
27
|
+
import { type ElizaInferenceContextHandle, type ElizaInferenceFfi } from "../ffi-bindings";
|
|
28
|
+
import type { KokoroRuntime, KokoroRuntimeInputs } from "./kokoro-runtime";
|
|
29
|
+
import type { KokoroModelLayout } from "./types";
|
|
30
|
+
export interface KokoroFfiRuntimeOptions {
|
|
31
|
+
/** Resolved on-disk Kokoro layout (GGUF filename + voices dir + root). */
|
|
32
|
+
layout: KokoroModelLayout;
|
|
33
|
+
/**
|
|
34
|
+
* Directory the FFI context anchors at (`create(bundleRoot)`). Defaults to
|
|
35
|
+
* the Kokoro model root, which is sufficient for the standalone Kokoro
|
|
36
|
+
* engine — it loads the GGUF + voice `.bin` by explicit absolute path, not
|
|
37
|
+
* by bundle convention.
|
|
38
|
+
*/
|
|
39
|
+
bundleRoot?: string;
|
|
40
|
+
/**
|
|
41
|
+
* Inject a pre-loaded FFI handle (the desktop fused engine already owns one).
|
|
42
|
+
* When omitted the runtime loads its own via `resolveFusedLibraryPath`.
|
|
43
|
+
*/
|
|
44
|
+
ffi?: ElizaInferenceFfi;
|
|
45
|
+
/**
|
|
46
|
+
* Inject a context to reuse. When omitted the runtime creates its own with
|
|
47
|
+
* `ffi.create(bundleRoot)` and destroys it on `dispose`.
|
|
48
|
+
*/
|
|
49
|
+
ctx?: ElizaInferenceContextHandle;
|
|
50
|
+
}
|
|
51
|
+
export declare class KokoroFfiRuntime implements KokoroRuntime {
|
|
52
|
+
readonly id: "gguf";
|
|
53
|
+
readonly sampleRate: number;
|
|
54
|
+
private readonly layout;
|
|
55
|
+
private readonly ffi;
|
|
56
|
+
private readonly ownsFfi;
|
|
57
|
+
private readonly ctx;
|
|
58
|
+
private readonly ownsCtx;
|
|
59
|
+
/** Voice id currently resident on the ctx (null until first load). */
|
|
60
|
+
private loadedVoiceId;
|
|
61
|
+
private disposed;
|
|
62
|
+
constructor(opts: KokoroFfiRuntimeOptions);
|
|
63
|
+
synthesize(args: KokoroRuntimeInputs): Promise<{
|
|
64
|
+
cancelled: boolean;
|
|
65
|
+
}>;
|
|
66
|
+
dispose(): void;
|
|
67
|
+
/**
|
|
68
|
+
* Load the GGUF + the requested voice `.bin` into the ctx, reloading only
|
|
69
|
+
* when the voice changes (Kokoro keeps the model resident; swapping voices
|
|
70
|
+
* is a cheap re-load of the 256-float style tensor).
|
|
71
|
+
*/
|
|
72
|
+
private ensureVoiceLoaded;
|
|
73
|
+
private kokoroSynthesize;
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=kokoro-ffi-runtime.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"kokoro-ffi-runtime.d.ts","sourceRoot":"","sources":["kokoro-ffi-runtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAMH,OAAO,EACN,KAAK,2BAA2B,EAChC,KAAK,iBAAiB,EAEtB,MAAM,iBAAiB,CAAC;AAEzB,OAAO,KAAK,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AAC3E,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAC;AAcjD,MAAM,WAAW,uBAAuB;IACvC,0EAA0E;IAC1E,MAAM,EAAE,iBAAiB,CAAC;IAC1B;;;;;OAKG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB;;;OAGG;IACH,GAAG,CAAC,EAAE,iBAAiB,CAAC;IACxB;;;OAGG;IACH,GAAG,CAAC,EAAE,2BAA2B,CAAC;CAClC;AAED,qBAAa,gBAAiB,YAAW,aAAa;IACrD,QAAQ,CAAC,EAAE,EAAG,MAAM,CAAU;IAC9B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAE5B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAoB;IAC3C,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAoB;IACxC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAA8B;IAClD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,sEAAsE;IACtE,OAAO,CAAC,aAAa,CAAuB;IAC5C,OAAO,CAAC,QAAQ,CAAS;gBAEb,IAAI,EAAE,uBAAuB;IA4CnC,UAAU,CAAC,IAAI,EAAE,mBAAmB,GAAG,OAAO,CAAC;QAAE,SAAS,EAAE,OAAO,CAAA;KAAE,CAAC;IA4C5E,OAAO,IAAI,IAAI;IAOf;;;;OAIG;IACH,OAAO,CAAC,iBAAiB;IAoCzB,OAAO,CAAC,gBAAgB;CASxB"}
|