@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/dist/actions/generate-media.d.ts +59 -0
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts +23 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts +29 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts +8 -37
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +38979 -430
- package/dist/index.js.map +217 -0
- package/dist/local-inference-routes.d.ts +47 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts +21 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/routes/compat-helpers.d.ts +18 -0
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts +62 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/dist/routes/index.d.ts +20 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/dist/routes/live-diarization-route.d.ts +33 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts +4 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts +16 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts +7 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts +15 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/dist/routes/transcripts-routes.d.ts +44 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts +62 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts +62 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts +77 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts +16 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/dist/runtime/index.d.ts +15 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/dist/runtime/voice-entity-binding.d.ts +113 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/dist/services/active-model.d.ts +310 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/dist/services/assignments.d.ts +84 -0
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/dist/services/backend.d.ts +440 -0
- package/dist/services/backend.d.ts.map +1 -0
- package/dist/services/bionic-host-loader.d.ts +67 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts +34 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts +206 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts +109 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts +102 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts +142 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts +188 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts +149 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/dist/services/device-tier.d.ts +133 -0
- package/dist/services/device-tier.d.ts.map +1 -0
- package/dist/services/downloader.d.ts +94 -0
- package/dist/services/downloader.d.ts.map +1 -0
- package/dist/services/engine.d.ts +579 -0
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts +17 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/dist/services/ffi-streaming-backend.d.ts +201 -0
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/dist/services/ffi-streaming-runner.d.ts +146 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts +56 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts +72 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts +63 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts +14 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts +118 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts +16 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts +58 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts +74 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts +181 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts +181 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/dist/services/index.d.ts +31 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts +132 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts +59 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts +189 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts +346 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts +96 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts +82 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/dist/services/manifest/schema.d.ts +903 -0
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/dist/services/manifest/types.d.ts +32 -0
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts +66 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/dist/services/memory-arbiter.d.ts +348 -0
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/dist/services/memory-monitor.d.ts +128 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts +130 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts +13 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts +127 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts +6 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts +124 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts +38 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts +110 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts +9 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts +111 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/dist/services/registry.d.ts +33 -0
- package/dist/services/registry.d.ts.map +1 -0
- package/dist/services/router-handler.d.ts +92 -0
- package/dist/services/router-handler.d.ts.map +1 -0
- package/dist/services/routing-policy.d.ts +92 -0
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts +8 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts +98 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/dist/services/service.d.ts +128 -0
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts +72 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts +311 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts +33 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/dist/services/types.d.ts +19 -0
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts +34 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts +8 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts +115 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts +99 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts +47 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts +71 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/dist/services/vision/index.d.ts +95 -0
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts +73 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/dist/services/vision/types.d.ts +162 -0
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts +18 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/dist/services/vision-embedding-cache.d.ts +98 -0
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts +112 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts +199 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts +170 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/dist/services/voice/embedding.d.ts +132 -0
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts +68 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/dist/services/voice/engine-bridge.d.ts +762 -0
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier.d.ts +211 -0
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/dist/services/voice/errors.d.ts +20 -0
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/dist/services/voice/expressive-tags.d.ts +158 -0
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/dist/services/voice/ffi-bindings.d.ts +696 -0
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts +181 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/index.d.ts +96 -0
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts +82 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts +30 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts +135 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/dist/services/voice/mic-source.d.ts +136 -0
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts +109 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/dist/services/voice/partial-stabilizer.d.ts +73 -0
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts +76 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts +62 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts +151 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts +216 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts +123 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts +248 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts +40 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts +24 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts +146 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/dist/services/voice/shared-resources.d.ts +204 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts +75 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts +37 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts +83 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts +73 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/dist/services/voice/transcriber.d.ts +244 -0
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts +37 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/dist/services/voice/transcript-service.d.ts +60 -0
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/dist/services/voice/transcript-store.d.ts +64 -0
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts +183 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/dist/services/voice/types.d.ts +643 -0
- package/dist/services/voice/types.d.ts.map +1 -0
- package/dist/services/voice/vad.d.ts +283 -0
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts +241 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/dist/services/voice/voice-preset-format.d.ts +158 -0
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts +83 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts +364 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/dist/services/voice/wake-word-ggml.d.ts +100 -0
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts +255 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts +240 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts +3 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +101 -15
- package/registry-entry.json +137 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +831 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.test.ts +390 -0
- package/src/local-inference-routes.ts +1625 -0
- package/src/provider.ts +1111 -0
- package/src/routes/compat-helpers.ts +275 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.ts +61 -0
- package/src/routes/live-diarization-route.test.ts +347 -0
- package/src/routes/live-diarization-route.ts +198 -0
- package/src/routes/local-inference-asr-route.test.ts +246 -0
- package/src/routes/local-inference-asr-route.ts +166 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +775 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.test.ts +195 -0
- package/src/routes/transcripts-routes.ts +191 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
- package/src/runtime/ensure-local-inference-handler.ts +1640 -0
- package/src/runtime/index.ts +36 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
- package/src/runtime/mobile-local-inference-gate.ts +99 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
- package/src/runtime/voice-entity-binding.ts +368 -0
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.ts +1416 -0
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +106 -0
- package/src/services/assignments.ts +278 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +791 -0
- package/src/services/bionic-host-loader.test.ts +226 -0
- package/src/services/bionic-host-loader.ts +252 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.test.ts +259 -0
- package/src/services/catalog.ts +33 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.test.ts +458 -0
- package/src/services/device-tier.ts +502 -0
- package/src/services/downloader.test.ts +888 -0
- package/src/services/downloader.ts +1039 -0
- package/src/services/engine-direct-bundle.test.ts +90 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.ts +2096 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +445 -0
- package/src/services/ffi-streaming-backend.ts +418 -0
- package/src/services/ffi-streaming-runner.test.ts +220 -0
- package/src/services/ffi-streaming-runner.ts +407 -0
- package/src/services/ffi-unload-ordering.test.ts +166 -0
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.test.ts +236 -0
- package/src/services/hardware.ts +438 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.ts +715 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.ts +229 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +357 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
- package/src/services/manifest/index.ts +72 -0
- package/src/services/manifest/manifest.test.ts +791 -0
- package/src/services/manifest/schema.ts +761 -0
- package/src/services/manifest/types.ts +61 -0
- package/src/services/manifest/validator.ts +633 -0
- package/src/services/memory-arbiter.test.ts +558 -0
- package/src/services/memory-arbiter.ts +991 -0
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +232 -0
- package/src/services/memory-monitor.ts +309 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.ts +86 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +164 -0
- package/src/services/ram-budget.ts +309 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.ts +157 -0
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +426 -0
- package/src/services/routing-policy.test.ts +352 -0
- package/src/services/routing-policy.ts +367 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +750 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.ts +59 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.ts +163 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +133 -0
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +139 -0
- package/src/services/voice/audio-frame-consumer.test.ts +669 -0
- package/src/services/voice/audio-frame-consumer.ts +651 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +335 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +902 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +242 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2343 -0
- package/src/services/voice/eot-classifier-ggml.ts +569 -0
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +422 -0
- package/src/services/voice/errors.ts +34 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.test.ts +735 -0
- package/src/services/voice/ffi-bindings.ts +3387 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.ts +139 -0
- package/src/services/voice/index.ts +502 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.ts +64 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +622 -0
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.ts +504 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +343 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.test.ts +195 -0
- package/src/services/voice/transcript-service.ts +205 -0
- package/src/services/voice/transcript-store.test.ts +189 -0
- package/src/services/voice/transcript-store.ts +164 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.test.ts +498 -0
- package/src/services/voice/vad.ts +832 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.test.ts +415 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +713 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +280 -0
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +367 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.ts +319 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/src/voice-workbench.ts +71 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* In-process Kokoro-82M runtime over the fused `libelizainference` FFI
|
|
3
|
+
* (the `eliza_inference_kokoro_*` exports — introduced at ABI v10; the fused
|
|
4
|
+
* library is currently ABI v12, which adds EOT (v11) and ASR word timestamps
|
|
5
|
+
* (v12) on top, so these symbols are present in every current build — see
|
|
6
|
+
* `ELIZA_INFERENCE_ABI_VERSION` in ffi-bindings.ts).
|
|
7
|
+
*
|
|
8
|
+
* This is the canonical Kokoro execution path on every platform. It replaces
|
|
9
|
+
* the local-TCP `KokoroGgufRuntime` (POST `/v1/audio/speech` on a running
|
|
10
|
+
* llama-server) for the mobile case — iOS and Google Play forbid the app
|
|
11
|
+
* opening a local TCP socket, so the HTTP→llama-server route cannot ship there.
|
|
12
|
+
* Kokoro synthesizes through the same dlopen()-ed handle as OmniVoice: the
|
|
13
|
+
* fused build links Eliza-1's Kokoro engine (its own GGUF reader + iSTFT
|
|
14
|
+
* decoder) behind `eliza_inference_kokoro_supported/load/synthesize/sample_rate`.
|
|
15
|
+
*
|
|
16
|
+
* Ownership: this runtime owns its own FFI handle + context. The context is
|
|
17
|
+
* created with `create(bundleRoot)` anchored at the bundle root (or the Kokoro
|
|
18
|
+
* model root when there is no Eliza-1 bundle), mirroring how the desktop fused
|
|
19
|
+
* text runtime obtains its ctx. The GGUF + the active voice `.bin` are loaded
|
|
20
|
+
* once via `kokoroLoad` and reloaded only when the requested voice changes.
|
|
21
|
+
*
|
|
22
|
+
* No silent fallback (AGENTS.md §3): when the loaded library does not export
|
|
23
|
+
* the Kokoro symbols (`kokoroSupported() === false`) or the model/voice files
|
|
24
|
+
* are missing, construction / first synthesis throws a structured
|
|
25
|
+
* `VoiceLifecycleError` rather than dropping back to the TCP route.
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
import { existsSync } from "node:fs";
|
|
29
|
+
import path from "node:path";
|
|
30
|
+
import { logger } from "@elizaos/core";
|
|
31
|
+
import { resolveFusedLibraryPath } from "../../desktop-fused-ffi-backend-runtime";
|
|
32
|
+
import {
|
|
33
|
+
type ElizaInferenceContextHandle,
|
|
34
|
+
type ElizaInferenceFfi,
|
|
35
|
+
loadElizaInferenceFfi,
|
|
36
|
+
} from "../ffi-bindings";
|
|
37
|
+
import { VoiceLifecycleError } from "../lifecycle";
|
|
38
|
+
import type { KokoroRuntime, KokoroRuntimeInputs } from "./kokoro-runtime";
|
|
39
|
+
import type { KokoroModelLayout } from "./types";
|
|
40
|
+
import { resolveKokoroVoiceOrDefault } from "./voices";
|
|
41
|
+
|
|
42
|
+
/** Kokoro v1.0 style-vector inner dimension. */
|
|
43
|
+
const KOKORO_STYLE_DIM = 256;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Per-synthesis output ceiling. Kokoro v1.0 emits 24 kHz fp32 PCM; 30 s of
|
|
47
|
+
* headroom (720 000 samples) bounds a single phrase synthesis well past the
|
|
48
|
+
* longest chunk the phrase chunker will hand us. The library returns the real
|
|
49
|
+
* sample count, which we slice to — this is only the allocation cap.
|
|
50
|
+
*/
|
|
51
|
+
const MAX_OUTPUT_SAMPLES = 30 * 24_000;
|
|
52
|
+
|
|
53
|
+
export interface KokoroFfiRuntimeOptions {
|
|
54
|
+
/** Resolved on-disk Kokoro layout (GGUF filename + voices dir + root). */
|
|
55
|
+
layout: KokoroModelLayout;
|
|
56
|
+
/**
|
|
57
|
+
* Directory the FFI context anchors at (`create(bundleRoot)`). Defaults to
|
|
58
|
+
* the Kokoro model root, which is sufficient for the standalone Kokoro
|
|
59
|
+
* engine — it loads the GGUF + voice `.bin` by explicit absolute path, not
|
|
60
|
+
* by bundle convention.
|
|
61
|
+
*/
|
|
62
|
+
bundleRoot?: string;
|
|
63
|
+
/**
|
|
64
|
+
* Inject a pre-loaded FFI handle (the desktop fused engine already owns one).
|
|
65
|
+
* When omitted the runtime loads its own via `resolveFusedLibraryPath`.
|
|
66
|
+
*/
|
|
67
|
+
ffi?: ElizaInferenceFfi;
|
|
68
|
+
/**
|
|
69
|
+
* Inject a context to reuse. When omitted the runtime creates its own with
|
|
70
|
+
* `ffi.create(bundleRoot)` and destroys it on `dispose`.
|
|
71
|
+
*/
|
|
72
|
+
ctx?: ElizaInferenceContextHandle;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export class KokoroFfiRuntime implements KokoroRuntime {
|
|
76
|
+
readonly id = "gguf" as const;
|
|
77
|
+
readonly sampleRate: number;
|
|
78
|
+
|
|
79
|
+
private readonly layout: KokoroModelLayout;
|
|
80
|
+
private readonly ffi: ElizaInferenceFfi;
|
|
81
|
+
private readonly ownsFfi: boolean;
|
|
82
|
+
private readonly ctx: ElizaInferenceContextHandle;
|
|
83
|
+
private readonly ownsCtx: boolean;
|
|
84
|
+
/** Voice id currently resident on the ctx (null until first load). */
|
|
85
|
+
private loadedVoiceId: string | null = null;
|
|
86
|
+
private disposed = false;
|
|
87
|
+
|
|
88
|
+
constructor(opts: KokoroFfiRuntimeOptions) {
|
|
89
|
+
this.layout = opts.layout;
|
|
90
|
+
const bundleRoot = opts.bundleRoot ?? opts.layout.root;
|
|
91
|
+
|
|
92
|
+
const provided = opts.ffi;
|
|
93
|
+
if (provided) {
|
|
94
|
+
this.ffi = provided;
|
|
95
|
+
this.ownsFfi = false;
|
|
96
|
+
} else {
|
|
97
|
+
const libPath = resolveFusedLibraryPath(bundleRoot);
|
|
98
|
+
if (!libPath) {
|
|
99
|
+
throw new VoiceLifecycleError(
|
|
100
|
+
"kernel-missing",
|
|
101
|
+
`[KokoroFfiRuntime] fused libelizainference not found for the in-process Eliza-1 Kokoro engine (anchored at ${bundleRoot}). ` +
|
|
102
|
+
"Set ELIZA_INFERENCE_LIBRARY or build via packages/app-core/scripts/build-llama-cpp-mtp.mjs.",
|
|
103
|
+
);
|
|
104
|
+
}
|
|
105
|
+
this.ffi = loadElizaInferenceFfi(libPath);
|
|
106
|
+
this.ownsFfi = true;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (
|
|
110
|
+
typeof this.ffi.kokoroSupported !== "function" ||
|
|
111
|
+
!this.ffi.kokoroSupported()
|
|
112
|
+
) {
|
|
113
|
+
if (this.ownsFfi) this.ffi.close();
|
|
114
|
+
throw new VoiceLifecycleError(
|
|
115
|
+
"kernel-missing",
|
|
116
|
+
`[KokoroFfiRuntime] the loaded libelizainference (ABI v${this.ffi.libraryAbiVersion}) does not link the in-process Eliza-1 Kokoro engine. ` +
|
|
117
|
+
"Rebuild with the Kokoro engine enabled — the mobile path must not fall back to the local-TCP /v1/audio/speech route.",
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if (opts.ctx !== undefined) {
|
|
122
|
+
this.ctx = opts.ctx;
|
|
123
|
+
this.ownsCtx = false;
|
|
124
|
+
} else {
|
|
125
|
+
this.ctx = this.ffi.create(bundleRoot);
|
|
126
|
+
this.ownsCtx = true;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
this.sampleRate = this.layout.sampleRate;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
async synthesize(args: KokoroRuntimeInputs): Promise<{ cancelled: boolean }> {
|
|
133
|
+
if (this.disposed) {
|
|
134
|
+
throw new VoiceLifecycleError(
|
|
135
|
+
"kernel-missing",
|
|
136
|
+
"[KokoroFfiRuntime] synthesize called after dispose",
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
this.ensureVoiceLoaded(args.voice.id);
|
|
140
|
+
|
|
141
|
+
if (args.cancelSignal.cancelled) {
|
|
142
|
+
args.onChunk({
|
|
143
|
+
pcm: new Float32Array(0),
|
|
144
|
+
sampleRate: this.sampleRate,
|
|
145
|
+
isFinal: true,
|
|
146
|
+
});
|
|
147
|
+
return { cancelled: true };
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const maxSamples = args.maxSamples ?? MAX_OUTPUT_SAMPLES;
|
|
151
|
+
// The Kokoro engine produces the full waveform in one synchronous
|
|
152
|
+
// forward. The text it phonemizes internally is the same phoneme string
|
|
153
|
+
// the llama-server `/v1/audio/speech` path sends as `input`.
|
|
154
|
+
const pcm = this.kokoroSynthesize(args.phonemes.phonemes, maxSamples);
|
|
155
|
+
|
|
156
|
+
let cancelled = false;
|
|
157
|
+
if (args.cancelSignal.cancelled) {
|
|
158
|
+
cancelled = true;
|
|
159
|
+
} else if (pcm.length > 0) {
|
|
160
|
+
const want = args.onChunk({
|
|
161
|
+
pcm,
|
|
162
|
+
sampleRate: this.sampleRate,
|
|
163
|
+
isFinal: false,
|
|
164
|
+
});
|
|
165
|
+
if (want === true || args.cancelSignal.cancelled) cancelled = true;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
args.onChunk({
|
|
169
|
+
pcm: new Float32Array(0),
|
|
170
|
+
sampleRate: this.sampleRate,
|
|
171
|
+
isFinal: true,
|
|
172
|
+
});
|
|
173
|
+
return { cancelled };
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
dispose(): void {
|
|
177
|
+
if (this.disposed) return;
|
|
178
|
+
this.disposed = true;
|
|
179
|
+
if (this.ownsCtx) this.ffi.destroy(this.ctx);
|
|
180
|
+
if (this.ownsFfi) this.ffi.close();
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Load the GGUF + the requested voice `.bin` into the ctx, reloading only
|
|
185
|
+
* when the voice changes (Kokoro keeps the model resident; swapping voices
|
|
186
|
+
* is a cheap re-load of the 256-float style tensor).
|
|
187
|
+
*/
|
|
188
|
+
private ensureVoiceLoaded(requestedVoiceId: string): void {
|
|
189
|
+
const voice = resolveKokoroVoiceOrDefault(requestedVoiceId);
|
|
190
|
+
if (this.loadedVoiceId === voice.id) return;
|
|
191
|
+
|
|
192
|
+
const ggufPath = path.join(this.layout.root, this.layout.modelFile);
|
|
193
|
+
const voiceBinPath = path.join(this.layout.voicesDir, voice.file);
|
|
194
|
+
if (!existsSync(ggufPath)) {
|
|
195
|
+
throw new VoiceLifecycleError(
|
|
196
|
+
"kernel-missing",
|
|
197
|
+
`[KokoroFfiRuntime] Eliza-1 Kokoro model file not found at ${ggufPath}`,
|
|
198
|
+
);
|
|
199
|
+
}
|
|
200
|
+
if (!existsSync(voiceBinPath)) {
|
|
201
|
+
throw new VoiceLifecycleError(
|
|
202
|
+
"kernel-missing",
|
|
203
|
+
`[KokoroFfiRuntime] Eliza-1 voice preset not found at ${voiceBinPath} for voice ${voice.id}`,
|
|
204
|
+
);
|
|
205
|
+
}
|
|
206
|
+
if (typeof this.ffi.kokoroLoad !== "function") {
|
|
207
|
+
throw new VoiceLifecycleError(
|
|
208
|
+
"kernel-missing",
|
|
209
|
+
"[KokoroFfiRuntime] eliza_inference_kokoro_load is not exported by the loaded build",
|
|
210
|
+
);
|
|
211
|
+
}
|
|
212
|
+
this.ffi.kokoroLoad({
|
|
213
|
+
ctx: this.ctx,
|
|
214
|
+
ggufPath,
|
|
215
|
+
voiceBinPath,
|
|
216
|
+
styleDim: voice.dim ?? KOKORO_STYLE_DIM,
|
|
217
|
+
});
|
|
218
|
+
this.loadedVoiceId = voice.id;
|
|
219
|
+
logger.info(
|
|
220
|
+
`[KokoroFfiRuntime] loaded Eliza-1 voice ${voice.id} from ${voiceBinPath}`,
|
|
221
|
+
);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
private kokoroSynthesize(text: string, maxSamples: number): Float32Array {
|
|
225
|
+
if (typeof this.ffi.kokoroSynthesize !== "function") {
|
|
226
|
+
throw new VoiceLifecycleError(
|
|
227
|
+
"kernel-missing",
|
|
228
|
+
"[KokoroFfiRuntime] eliza_inference_kokoro_synthesize is not exported by the loaded build",
|
|
229
|
+
);
|
|
230
|
+
}
|
|
231
|
+
return this.ffi.kokoroSynthesize({ ctx: this.ctx, text, maxSamples });
|
|
232
|
+
}
|
|
233
|
+
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Kokoro-82M model runner.
|
|
3
|
+
*
|
|
4
|
+
* Execution paths:
|
|
5
|
+
*
|
|
6
|
+
* 1. GGUF via llama-server (default). When the host llama-server advertises
|
|
7
|
+
* a Kokoro-capable build and exposes `/v1/audio/speech`, we POST text in
|
|
8
|
+
* and stream PCM out.
|
|
9
|
+
*
|
|
10
|
+
* 2. Python subprocess — eval-loop only. Spawns `python -m kokoro_tts`.
|
|
11
|
+
* Never the default in production.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import type { KokoroPhonemeSequence, KokoroVoicePack } from "./types";
|
|
15
|
+
|
|
16
|
+
/** Pinned GGUF candidate location (carried by our llama.cpp fork). The
|
|
17
|
+
* runtime references this only for diagnostics; the fork-side builder
|
|
18
|
+
* produces the file at this path. */
|
|
19
|
+
export const KOKORO_GGUF_REL_PATH = "voice/kokoro-82m-v1_0.gguf";
|
|
20
|
+
|
|
21
|
+
/** One synthesized PCM segment delivered to the streaming backend. */
|
|
22
|
+
export interface KokoroRuntimeChunk {
|
|
23
|
+
pcm: Float32Array;
|
|
24
|
+
sampleRate: number;
|
|
25
|
+
isFinal: boolean;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Construction-time inputs for a runtime instance. The voice pack contains
|
|
30
|
+
* the style tensor reference; the runtime is responsible for resolving the
|
|
31
|
+
* bytes off `layout.voicesDir/<file>`.
|
|
32
|
+
*/
|
|
33
|
+
export interface KokoroRuntimeInputs {
|
|
34
|
+
phonemes: KokoroPhonemeSequence;
|
|
35
|
+
voice: KokoroVoicePack;
|
|
36
|
+
/**
|
|
37
|
+
* Output sample budget. The runtime always honours the model's native
|
|
38
|
+
* sample rate (`layout.sampleRate`, usually 24 kHz) — this caps the
|
|
39
|
+
* total samples to prevent runaway generation. Defaults to 16 seconds
|
|
40
|
+
* at the layout sample rate (matches the longest phrase the chunker
|
|
41
|
+
* will emit + headroom).
|
|
42
|
+
*/
|
|
43
|
+
maxSamples?: number;
|
|
44
|
+
/** Cancellation signal — polled at chunk boundaries. */
|
|
45
|
+
cancelSignal: { cancelled: boolean };
|
|
46
|
+
/** Per-chunk callback; returning `true` cancels the rest of the run. */
|
|
47
|
+
onChunk: (chunk: KokoroRuntimeChunk) => boolean | undefined;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Shared runtime contract — `KokoroTtsBackend` depends on this, not the
|
|
51
|
+
* concrete classes. Tests inject a mock. */
|
|
52
|
+
export interface KokoroRuntime {
|
|
53
|
+
readonly id: "gguf" | "python" | "mock";
|
|
54
|
+
readonly sampleRate: number;
|
|
55
|
+
synthesize(args: KokoroRuntimeInputs): Promise<{ cancelled: boolean }>;
|
|
56
|
+
dispose(): void;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
// Python subprocess path — eval-loop only.
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
export interface KokoroPythonRuntimeOptions {
|
|
64
|
+
pythonBinary: string;
|
|
65
|
+
/** Resolved layout — the subprocess discovers the model under here. */
|
|
66
|
+
layout: { root: string; sampleRate: number };
|
|
67
|
+
/** Optional env passed through to the subprocess. */
|
|
68
|
+
env?: NodeJS.ProcessEnv;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Subprocess-backed runtime. Spawns `python -m kokoro_tts ...` per
|
|
73
|
+
* synthesis call (no warm-pool — the Python path is the *eval* path, not
|
|
74
|
+
* the realtime path). Production code paths never select this; the
|
|
75
|
+
* fine-tune evaluator wires it explicitly.
|
|
76
|
+
*/
|
|
77
|
+
export class KokoroPythonRuntime implements KokoroRuntime {
|
|
78
|
+
readonly id = "python" as const;
|
|
79
|
+
readonly sampleRate: number;
|
|
80
|
+
|
|
81
|
+
constructor(opts: KokoroPythonRuntimeOptions) {
|
|
82
|
+
this.sampleRate = opts.layout.sampleRate;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async synthesize(
|
|
86
|
+
_args: KokoroRuntimeInputs,
|
|
87
|
+
): Promise<{ cancelled: boolean }> {
|
|
88
|
+
// The eval driver in `packages/training` is the canonical caller and
|
|
89
|
+
// already wires `child_process.spawn`. Surfacing a clear error here
|
|
90
|
+
// keeps the production runtime from accidentally enabling this path.
|
|
91
|
+
throw new Error(
|
|
92
|
+
"[kokoro] KokoroPythonRuntime is eval-only — use it from the fine-tune driver, not the runtime scheduler",
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
dispose(): void {
|
|
97
|
+
// No long-lived state.
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
// Mock runtime — synthesizes a sine sweep keyed to phoneme count so tests
|
|
103
|
+
// can observe deterministic PCM without loading a model.
|
|
104
|
+
// ---------------------------------------------------------------------------
|
|
105
|
+
|
|
106
|
+
export interface KokoroMockRuntimeOptions {
|
|
107
|
+
sampleRate: number;
|
|
108
|
+
/** Total samples emitted per synthesis call. */
|
|
109
|
+
totalSamples?: number;
|
|
110
|
+
/** Number of body chunks to split the output across. */
|
|
111
|
+
chunkCount?: number;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export class KokoroMockRuntime implements KokoroRuntime {
|
|
115
|
+
readonly id = "mock" as const;
|
|
116
|
+
readonly sampleRate: number;
|
|
117
|
+
private readonly opts: Required<KokoroMockRuntimeOptions>;
|
|
118
|
+
calls = 0;
|
|
119
|
+
|
|
120
|
+
constructor(opts: KokoroMockRuntimeOptions) {
|
|
121
|
+
this.sampleRate = opts.sampleRate;
|
|
122
|
+
this.opts = {
|
|
123
|
+
sampleRate: opts.sampleRate,
|
|
124
|
+
totalSamples: opts.totalSamples ?? Math.floor(opts.sampleRate * 0.2),
|
|
125
|
+
chunkCount: opts.chunkCount ?? 4,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async synthesize(args: KokoroRuntimeInputs): Promise<{ cancelled: boolean }> {
|
|
130
|
+
this.calls++;
|
|
131
|
+
const { totalSamples, chunkCount } = this.opts;
|
|
132
|
+
const perChunk = Math.max(1, Math.ceil(totalSamples / chunkCount));
|
|
133
|
+
const freqHz = 100 + (args.phonemes.ids.length % 200);
|
|
134
|
+
let written = 0;
|
|
135
|
+
let cancelled = false;
|
|
136
|
+
for (let off = 0; off < totalSamples; off += perChunk) {
|
|
137
|
+
if (args.cancelSignal.cancelled) {
|
|
138
|
+
cancelled = true;
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
const n = Math.min(perChunk, totalSamples - off);
|
|
142
|
+
const pcm = new Float32Array(n);
|
|
143
|
+
for (let i = 0; i < n; i++) {
|
|
144
|
+
const t = (off + i) / this.sampleRate;
|
|
145
|
+
pcm[i] = Math.sin(2 * Math.PI * freqHz * t) * 0.1;
|
|
146
|
+
}
|
|
147
|
+
written += n;
|
|
148
|
+
const want = args.onChunk({
|
|
149
|
+
pcm,
|
|
150
|
+
sampleRate: this.sampleRate,
|
|
151
|
+
isFinal: false,
|
|
152
|
+
});
|
|
153
|
+
if (want === true || args.cancelSignal.cancelled) {
|
|
154
|
+
cancelled = true;
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
args.onChunk({
|
|
159
|
+
pcm: new Float32Array(0),
|
|
160
|
+
sampleRate: this.sampleRate,
|
|
161
|
+
isFinal: true,
|
|
162
|
+
});
|
|
163
|
+
void written;
|
|
164
|
+
return { cancelled };
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
dispose(): void {
|
|
168
|
+
/* nothing */
|
|
169
|
+
}
|
|
170
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phoneme streaming for Kokoro-82M.
|
|
3
|
+
*
|
|
4
|
+
* Kokoro consumes a sequence of phoneme ids (espeak-ng IPA tokenised against a
|
|
5
|
+
* small fixed vocab). The scheduler emits phrases at punctuation or
|
|
6
|
+
* `phoneme-stream` boundaries (see `voice/phrase-chunker.ts` `chunkOn`
|
|
7
|
+
* option). This module is the seam between those phrase boundaries and the
|
|
8
|
+
* model's input tensor:
|
|
9
|
+
*
|
|
10
|
+
* text → phonemizer.phonemize() → KokoroPhonemeSequence (ids) → runtime
|
|
11
|
+
*
|
|
12
|
+
* For maximum responsiveness the runtime can call `streamPhonemes()` against
|
|
13
|
+
* an async text iterator (chunked draft tokens) and forward each window of
|
|
14
|
+
* accumulated ids as soon as a phoneme boundary fires. The default `flushAt`
|
|
15
|
+
* is one phoneme — i.e. emit progress per id — but production deployments
|
|
16
|
+
* lift this to ~8 phonemes to amortise the ONNX forward pass on small
|
|
17
|
+
* windows. This file intentionally has no dependency on the rest of the
|
|
18
|
+
* voice scaffold so it can be reused by the fine-tune evaluator script.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import type { KokoroPhonemeSequence, KokoroPhonemizer } from "./types";
|
|
22
|
+
|
|
23
|
+
export interface PhonemeStreamWindow {
|
|
24
|
+
/** Cumulative ids since stream start. The runtime can re-tokenise or
|
|
25
|
+
* carry state by id; the simplest implementation forwards the full
|
|
26
|
+
* window each call. */
|
|
27
|
+
ids: Int32Array;
|
|
28
|
+
/** Cumulative phoneme string for debugging / display. */
|
|
29
|
+
phonemes: string;
|
|
30
|
+
/** True for the final window in the stream. */
|
|
31
|
+
isFinal: boolean;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface StreamPhonemesOptions {
|
|
35
|
+
phonemizer: KokoroPhonemizer;
|
|
36
|
+
lang: string;
|
|
37
|
+
/** Emit a window every N new phoneme ids. Default 8 (≈ first audio after a
|
|
38
|
+
* short syllable cluster — matches the phrase chunker's default cap). */
|
|
39
|
+
flushAt?: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Phonemize an async text source and emit cumulative windows. The caller
|
|
44
|
+
* consumes the iterator with `for await (const window of streamPhonemes(…))`.
|
|
45
|
+
* A pull-style API keeps this independent of the scheduler's event loop —
|
|
46
|
+
* the bench harness and the eval loop both reuse it without taking on a
|
|
47
|
+
* scheduler dependency.
|
|
48
|
+
*/
|
|
49
|
+
export async function* streamPhonemes(
|
|
50
|
+
textChunks: AsyncIterable<string>,
|
|
51
|
+
opts: StreamPhonemesOptions,
|
|
52
|
+
): AsyncIterable<PhonemeStreamWindow> {
|
|
53
|
+
const flushAt = Math.max(1, opts.flushAt ?? 8);
|
|
54
|
+
const idsAcc: number[] = [];
|
|
55
|
+
let phonemesAcc = "";
|
|
56
|
+
let lastFlushAt = 0;
|
|
57
|
+
let leftover = "";
|
|
58
|
+
|
|
59
|
+
for await (const chunk of textChunks) {
|
|
60
|
+
if (!chunk) continue;
|
|
61
|
+
leftover += chunk;
|
|
62
|
+
// Only phonemize when we have at least a whole word to feed to the
|
|
63
|
+
// phonemizer — espeak-ng is significantly more accurate when fed
|
|
64
|
+
// word-aligned input. Look back to the last whitespace as the split.
|
|
65
|
+
const split = leftover.lastIndexOf(" ");
|
|
66
|
+
if (split === -1) continue;
|
|
67
|
+
const head = leftover.slice(0, split);
|
|
68
|
+
leftover = leftover.slice(split + 1);
|
|
69
|
+
const seq = await opts.phonemizer.phonemize(head, opts.lang);
|
|
70
|
+
appendSeq(seq, idsAcc);
|
|
71
|
+
phonemesAcc += seq.phonemes;
|
|
72
|
+
if (idsAcc.length - lastFlushAt >= flushAt) {
|
|
73
|
+
lastFlushAt = idsAcc.length;
|
|
74
|
+
yield {
|
|
75
|
+
ids: Int32Array.from(idsAcc),
|
|
76
|
+
phonemes: phonemesAcc,
|
|
77
|
+
isFinal: false,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (leftover.length > 0) {
|
|
83
|
+
const seq = await opts.phonemizer.phonemize(leftover, opts.lang);
|
|
84
|
+
appendSeq(seq, idsAcc);
|
|
85
|
+
phonemesAcc += seq.phonemes;
|
|
86
|
+
}
|
|
87
|
+
yield {
|
|
88
|
+
ids: Int32Array.from(idsAcc),
|
|
89
|
+
phonemes: phonemesAcc,
|
|
90
|
+
isFinal: true,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function appendSeq(seq: KokoroPhonemeSequence, target: number[]): void {
|
|
95
|
+
// The phonemizer emits a sequence framed with BOS/EOS — strip both when
|
|
96
|
+
// accumulating windows so the model sees one BOS at the head and one EOS
|
|
97
|
+
// at the tail. Defensive against phonemizers that omit framing (the
|
|
98
|
+
// accumulator simply appends raw ids in that case).
|
|
99
|
+
const ids = seq.ids;
|
|
100
|
+
let start = 0;
|
|
101
|
+
let end = ids.length;
|
|
102
|
+
if (ids.length >= 2) {
|
|
103
|
+
// Heuristic: ids ≤ 2 are <pad>/<s>/</s> in the bundled vocab.
|
|
104
|
+
if (ids[0] !== undefined && ids[0] <= 2) start = 1;
|
|
105
|
+
if (ids[end - 1] !== undefined && (ids[end - 1] as number) <= 2) end -= 1;
|
|
106
|
+
}
|
|
107
|
+
for (let i = start; i < end; i++) {
|
|
108
|
+
const id = ids[i];
|
|
109
|
+
if (id !== undefined) target.push(id);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/** Synchronous variant for whole-phrase callers (the scheduler dispatches
|
|
114
|
+
* one phrase at a time in the default `punctuation` mode). Returns the
|
|
115
|
+
* full id array — equivalent to draining `streamPhonemes` on a single-item
|
|
116
|
+
* iterator and taking the last window. */
|
|
117
|
+
export async function phonemizePhrase(
|
|
118
|
+
text: string,
|
|
119
|
+
opts: StreamPhonemesOptions,
|
|
120
|
+
): Promise<PhonemeStreamWindow> {
|
|
121
|
+
const seq = await opts.phonemizer.phonemize(text, opts.lang);
|
|
122
|
+
return { ids: seq.ids, phonemes: seq.phonemes, isFinal: true };
|
|
123
|
+
}
|