@elizaos/plugin-local-inference 2.0.3-beta.2 → 2.0.3-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -10
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +39647 -0
- package/dist/index.js.map +217 -0
- package/{src → dist}/local-inference-routes.d.ts +9 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts.map +1 -0
- package/{src → dist}/routes/compat-helpers.d.ts +1 -1
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/{src → dist}/routes/index.d.ts +1 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/{src → dist}/routes/live-diarization-route.d.ts +7 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/{src → dist}/routes/transcripts-routes.d.ts +8 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/{src → dist}/runtime/ensure-local-inference-handler.d.ts +8 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/{src → dist}/runtime/index.d.ts +1 -1
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/{src → dist}/runtime/voice-entity-binding.d.ts +10 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/{src → dist}/services/active-model.d.ts +28 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/{src → dist}/services/assignments.d.ts +16 -3
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/{src → dist}/services/backend.d.ts +110 -16
- package/dist/services/backend.d.ts.map +1 -0
- package/{src → dist}/services/bionic-host-loader.d.ts +21 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/{src → dist}/services/desktop-fused-ffi-backend-runtime.d.ts +22 -6
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/{src → dist}/services/device-tier.d.ts +19 -1
- package/dist/services/device-tier.d.ts.map +1 -0
- package/{src → dist}/services/downloader.d.ts +16 -4
- package/dist/services/downloader.d.ts.map +1 -0
- package/{src → dist}/services/engine.d.ts +43 -4
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-backend.d.ts +28 -7
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-runner.d.ts +24 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/{src → dist}/services/imagegen/sd-cpp.d.ts +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/{src → dist}/services/index.d.ts +3 -1
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/{src → dist}/services/manifest/schema.d.ts +196 -6
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/{src → dist}/services/manifest/types.d.ts +3 -1
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/{src → dist}/services/memory-arbiter.d.ts +33 -3
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/{src → dist}/services/memory-monitor.d.ts +6 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/{src → dist}/services/registry.d.ts +11 -13
- package/dist/services/registry.d.ts.map +1 -0
- package/{src → dist}/services/router-handler.d.ts +2 -2
- package/dist/services/router-handler.d.ts.map +1 -0
- package/{src → dist}/services/routing-policy.d.ts +32 -9
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/{src → dist}/services/service.d.ts +1 -1
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/{src → dist}/services/types.d.ts +1 -1
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/{src → dist}/services/vision/index.d.ts +1 -1
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/{src → dist}/services/vision/types.d.ts +13 -4
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/{src → dist}/services/vision-embedding-cache.d.ts +1 -1
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/audio-frame-consumer.d.ts +82 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/{src → dist}/services/voice/eliza1-eot-scorer.d.ts +8 -8
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/{src → dist}/services/voice/embedding.d.ts +2 -3
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/engine-bridge.d.ts +8 -5
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier-ggml.d.ts +22 -22
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier.d.ts +9 -12
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/{src → dist}/services/voice/errors.d.ts +1 -1
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/{src → dist}/services/voice/expressive-tags.d.ts +5 -5
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/{src → dist}/services/voice/ffi-bindings.d.ts +26 -4
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/{src → dist}/services/voice/fused-eot-scorer.d.ts +6 -6
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/{src → dist}/services/voice/index.d.ts +8 -3
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-backend.d.ts +15 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-engine-discovery.d.ts +1 -1
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-ffi-runtime.d.ts +3 -3
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/pick-runtime.d.ts +1 -1
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/{src → dist}/services/voice/mic-source.d.ts +1 -1
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/{src → dist}/services/voice/partial-stabilizer.d.ts +1 -1
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/{src → dist}/services/voice/shared-resources.d.ts +14 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcriber.d.ts +4 -4
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-service.d.ts +20 -1
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-store.d.ts +12 -1
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/{src → dist}/services/voice/types.d.ts +6 -6
- package/dist/services/voice/types.d.ts.map +1 -0
- package/{src → dist}/services/voice/vad.d.ts +6 -5
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/{src → dist}/services/voice/voice-preset-format.d.ts +2 -2
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/{src → dist}/services/voice/wake-word-ggml.d.ts +8 -9
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +28 -9
- package/registry-entry.json +137 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +1 -1
- package/src/adapters/capacitor-llama/index.ts +28 -4
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +2 -2
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +1 -1
- package/src/local-inference-routes.test.ts +57 -11
- package/src/local-inference-routes.ts +90 -8
- package/src/provider.ts +32 -3
- package/src/routes/compat-helpers.ts +2 -1
- package/src/routes/index.ts +1 -0
- package/src/routes/live-diarization-route.test.ts +134 -0
- package/src/routes/live-diarization-route.ts +79 -3
- package/src/routes/local-inference-asr-route.test.ts +43 -2
- package/src/routes/local-inference-asr-route.ts +7 -4
- package/src/routes/local-inference-asr-transcribe.test.ts +4 -4
- package/src/routes/local-inference-asr-transcribe.ts +1 -1
- package/src/routes/local-inference-compat-routes.test.ts +3 -3
- package/src/routes/local-inference-compat-routes.ts +23 -56
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcripts-routes.test.ts +51 -0
- package/src/routes/transcripts-routes.ts +35 -3
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +203 -5
- package/src/runtime/ensure-local-inference-handler.ts +203 -11
- package/src/runtime/index.ts +4 -1
- package/src/runtime/mobile-local-inference-gate.test.ts +85 -2
- package/src/runtime/mobile-local-inference-gate.ts +60 -5
- package/src/runtime/voice-entity-binding.transcript.test.ts +29 -0
- package/src/runtime/voice-entity-binding.ts +46 -6
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +2 -2
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model.ts +211 -8
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +26 -0
- package/src/services/assignments.ts +52 -4
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +198 -19
- package/src/services/bionic-host-loader.test.ts +94 -1
- package/src/services/bionic-host-loader.ts +72 -0
- package/src/services/cache-bridge.test.ts +7 -7
- package/src/services/catalog.test.ts +32 -11
- package/src/services/catalog.ts +6 -0
- package/src/services/cloud-fallback.ts +1 -1
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +99 -7
- package/src/services/device-tier.test.ts +89 -2
- package/src/services/device-tier.ts +103 -11
- package/src/services/downloader.test.ts +199 -58
- package/src/services/downloader.ts +141 -27
- package/src/services/engine-direct-bundle.test.ts +38 -6
- package/src/services/engine.ts +291 -104
- package/src/services/ensure-local-artifacts.ts +1 -1
- package/src/services/ffi-llm-streaming-abi.ts +6 -3
- package/src/services/ffi-streaming-backend.ts +44 -8
- package/src/services/ffi-streaming-runner.test.ts +163 -3
- package/src/services/ffi-streaming-runner.ts +54 -1
- package/src/services/ffi-unload-ordering.test.ts +5 -1
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/hardware.test.ts +7 -2
- package/src/services/hardware.ts +28 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/sd-cpp.ts +6 -9
- package/src/services/index.ts +18 -0
- package/src/services/ios-llama-streaming.ts +1 -1
- package/src/services/kv-spill.ts +6 -5
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +84 -2
- package/src/services/manifest/index.ts +6 -0
- package/src/services/manifest/manifest.test.ts +156 -54
- package/src/services/manifest/schema.ts +160 -52
- package/src/services/manifest/types.ts +6 -0
- package/src/services/manifest/validator.ts +91 -25
- package/src/services/memory-arbiter.test.ts +139 -0
- package/src/services/memory-arbiter.ts +81 -15
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +24 -0
- package/src/services/memory-monitor.ts +12 -0
- package/src/services/mtp-doctor.ts +10 -2
- package/src/services/network-policy.ts +5 -5
- package/src/services/ram-budget-cache.test.ts +2 -1
- package/src/services/ram-budget.ts +0 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/registry.ts +25 -19
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.ts +43 -24
- package/src/services/routing-policy.test.ts +211 -23
- package/src/services/routing-policy.ts +92 -22
- package/src/services/service.test.ts +3 -3
- package/src/services/service.ts +22 -7
- package/src/services/transcription-priority.test.ts +2 -2
- package/src/services/types.ts +4 -0
- package/src/services/verify-on-device.test.ts +2 -2
- package/src/services/vision/hash.ts +1 -1
- package/src/services/vision/index.ts +2 -2
- package/src/services/vision/llama-server.ts +1 -1
- package/src/services/vision/types.ts +13 -4
- package/src/services/vision-embedding-cache.ts +1 -1
- package/src/services/voice/VOICE_WORKBENCH.md +71 -26
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +72 -2
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +29 -29
- package/src/services/voice/__tests__/streaming-asr.test.ts +1 -1
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +6 -8
- package/src/services/voice/audio-frame-consumer.test.ts +327 -1
- package/src/services/voice/audio-frame-consumer.ts +165 -5
- package/src/services/voice/barge-in.ts +2 -3
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +2 -2
- package/src/services/voice/e2e-harness.ts +175 -16
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +22 -22
- package/src/services/voice/embedding.ts +2 -3
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.ts +151 -110
- package/src/services/voice/eot-classifier-ggml.ts +42 -39
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +11 -122
- package/src/services/voice/errors.ts +2 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +8 -8
- package/src/services/voice/ffi-bindings.test.ts +10 -3
- package/src/services/voice/ffi-bindings.ts +177 -15
- package/src/services/voice/fused-eot-scorer.ts +17 -13
- package/src/services/voice/index.ts +33 -12
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +112 -1
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +88 -3
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +37 -201
- package/src/services/voice/kokoro/kokoro-backend.ts +16 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +1 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +3 -3
- package/src/services/voice/kokoro/pick-runtime.ts +1 -1
- package/src/services/voice/kokoro/runtime-selection.ts +28 -201
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +335 -2
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.ts +1 -1
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/partial-stabilizer.ts +1 -1
- package/src/services/voice/pipeline.ts +3 -4
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +23 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +85 -22
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.ts +4 -4
- package/src/services/voice/transcript-service.test.ts +58 -0
- package/src/services/voice/transcript-service.ts +64 -0
- package/src/services/voice/transcript-store.test.ts +36 -0
- package/src/services/voice/transcript-store.ts +32 -0
- package/src/services/voice/types.ts +7 -7
- package/src/services/voice/vad.test.ts +33 -15
- package/src/services/voice/vad.ts +25 -20
- package/src/services/voice/voice-budget.test.ts +0 -3
- package/src/services/voice/voice-budget.ts +6 -6
- package/src/services/voice/voice-duet.test.ts +1 -1
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +17 -4
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +133 -7
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-workbench-report.ts +58 -17
- package/src/services/voice/wake-word-ggml.ts +12 -13
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice-prewarm.ts +1 -1
- package/src/voice-workbench.ts +71 -0
- package/src/actions/generate-media.d.ts.map +0 -1
- package/src/actions/identify-speaker.d.ts.map +0 -1
- package/src/actions/transcription-control.d.ts.map +0 -1
- package/src/index.d.ts.map +0 -1
- package/src/local-inference-routes.d.ts.map +0 -1
- package/src/provider.d.ts.map +0 -1
- package/src/routes/compat-helpers.d.ts.map +0 -1
- package/src/routes/family-member-route.d.ts.map +0 -1
- package/src/routes/index.d.ts.map +0 -1
- package/src/routes/live-diarization-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-transcribe.d.ts.map +0 -1
- package/src/routes/local-inference-compat-routes.d.ts.map +0 -1
- package/src/routes/local-inference-tts-route.d.ts.map +0 -1
- package/src/routes/transcript-audio-store.d.ts.map +0 -1
- package/src/routes/transcripts-routes.d.ts.map +0 -1
- package/src/routes/voice-first-run-routes.d.ts.map +0 -1
- package/src/routes/voice-models-routes.d.ts.map +0 -1
- package/src/routes/voice-profile-plugin-routes.d.ts.map +0 -1
- package/src/routes/voice-profiles-management-routes.d.ts.map +0 -1
- package/src/routes/voice-speaker-profile-routes.d.ts.map +0 -1
- package/src/runtime/embedding-manager-support.d.ts.map +0 -1
- package/src/runtime/embedding-presets.d.ts.map +0 -1
- package/src/runtime/embedding-warmup-policy.d.ts.map +0 -1
- package/src/runtime/ensure-local-inference-handler.d.ts.map +0 -1
- package/src/runtime/index.d.ts.map +0 -1
- package/src/runtime/mobile-local-inference-gate.d.ts +0 -31
- package/src/runtime/mobile-local-inference-gate.d.ts.map +0 -1
- package/src/runtime/voice-entity-binding.d.ts.map +0 -1
- package/src/services/active-model.d.ts.map +0 -1
- package/src/services/assignments.d.ts.map +0 -1
- package/src/services/backend.d.ts.map +0 -1
- package/src/services/bionic-host-loader.d.ts.map +0 -1
- package/src/services/bundled-models.d.ts.map +0 -1
- package/src/services/cache-bridge.d.ts.map +0 -1
- package/src/services/catalog.d.ts +0 -10
- package/src/services/catalog.d.ts.map +0 -1
- package/src/services/checkpoint-client.d.ts.map +0 -1
- package/src/services/cloud-fallback.d.ts.map +0 -1
- package/src/services/conversation-registry.d.ts.map +0 -1
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +0 -1
- package/src/services/device-bridge.d.ts.map +0 -1
- package/src/services/device-resource-metrics.d.ts.map +0 -1
- package/src/services/device-tier.d.ts.map +0 -1
- package/src/services/downloader.d.ts.map +0 -1
- package/src/services/engine.d.ts.map +0 -1
- package/src/services/external-scanner.d.ts.map +0 -1
- package/src/services/ffi-streaming-backend.d.ts.map +0 -1
- package/src/services/ffi-streaming-runner.d.ts.map +0 -1
- package/src/services/gpu-detect.d.ts.map +0 -1
- package/src/services/handler-registry.d.ts.map +0 -1
- package/src/services/hardware.d.ts.map +0 -1
- package/src/services/hf-search.d.ts +0 -26
- package/src/services/hf-search.d.ts.map +0 -1
- package/src/services/hf-search.test.ts +0 -69
- package/src/services/hf-search.ts +0 -420
- package/src/services/image-description-runtime.d.ts.map +0 -1
- package/src/services/imagegen/aosp-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/backend-selector.d.ts.map +0 -1
- package/src/services/imagegen/coreml-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/errors.d.ts.map +0 -1
- package/src/services/imagegen/index.d.ts.map +0 -1
- package/src/services/imagegen/mflux.d.ts.map +0 -1
- package/src/services/imagegen/sd-cpp.d.ts.map +0 -1
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/types.d.ts.map +0 -1
- package/src/services/index.d.ts.map +0 -1
- package/src/services/inference-capabilities.d.ts.map +0 -1
- package/src/services/inference-telemetry.d.ts.map +0 -1
- package/src/services/kv-spill.d.ts.map +0 -1
- package/src/services/latency-trace.d.ts.map +0 -1
- package/src/services/llm-streaming-binding.d.ts.map +0 -1
- package/src/services/load-args.d.ts.map +0 -1
- package/src/services/manifest/index.d.ts +0 -4
- package/src/services/manifest/index.d.ts.map +0 -1
- package/src/services/manifest/schema.d.ts.map +0 -1
- package/src/services/manifest/types.d.ts.map +0 -1
- package/src/services/manifest/validator.d.ts.map +0 -1
- package/src/services/memory-arbiter.d.ts.map +0 -1
- package/src/services/memory-monitor.d.ts.map +0 -1
- package/src/services/memory-pressure.d.ts.map +0 -1
- package/src/services/mtp-doctor.d.ts.map +0 -1
- package/src/services/network-policy.d.ts.map +0 -1
- package/src/services/paths.d.ts.map +0 -1
- package/src/services/planner-skeleton.d.ts.map +0 -1
- package/src/services/providers.d.ts.map +0 -1
- package/src/services/ram-budget.d.ts.map +0 -1
- package/src/services/readiness.d.ts.map +0 -1
- package/src/services/recommendation.d.ts.map +0 -1
- package/src/services/registry.d.ts.map +0 -1
- package/src/services/router-handler.d.ts.map +0 -1
- package/src/services/routing-policy.d.ts.map +0 -1
- package/src/services/routing-preferences.d.ts.map +0 -1
- package/src/services/runtime-target.d.ts.map +0 -1
- package/src/services/service.d.ts.map +0 -1
- package/src/services/session-pool.d.ts.map +0 -1
- package/src/services/structured-output/deterministic-repair.d.ts.map +0 -1
- package/src/services/structured-output.d.ts.map +0 -1
- package/src/services/system-memory.d.ts.map +0 -1
- package/src/services/types.d.ts.map +0 -1
- package/src/services/verify-on-device.d.ts.map +0 -1
- package/src/services/verify.d.ts.map +0 -1
- package/src/services/vision/aosp-unavailable.d.ts.map +0 -1
- package/src/services/vision/capacitor-llama.d.ts.map +0 -1
- package/src/services/vision/cloud-fallback.d.ts.map +0 -1
- package/src/services/vision/hash.d.ts.map +0 -1
- package/src/services/vision/index.d.ts.map +0 -1
- package/src/services/vision/llama-server.d.ts.map +0 -1
- package/src/services/vision/types.d.ts.map +0 -1
- package/src/services/vision/vast-fallback.d.ts.map +0 -1
- package/src/services/vision-embedding-cache.d.ts.map +0 -1
- package/src/services/voice/audio-frame-consumer.d.ts.map +0 -1
- package/src/services/voice/barge-in.d.ts.map +0 -1
- package/src/services/voice/cancellation-coordinator.d.ts.map +0 -1
- package/src/services/voice/checkpoint-manager.d.ts.map +0 -1
- package/src/services/voice/eager-context-builder.d.ts.map +0 -1
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/embedding.d.ts.map +0 -1
- package/src/services/voice/emotion-attribution.d.ts.map +0 -1
- package/src/services/voice/engine-bridge.d.ts.map +0 -1
- package/src/services/voice/eot-classifier-ggml.d.ts.map +0 -1
- package/src/services/voice/eot-classifier.d.ts.map +0 -1
- package/src/services/voice/errors.d.ts.map +0 -1
- package/src/services/voice/expressive-tags.d.ts.map +0 -1
- package/src/services/voice/ffi-bindings.d.ts.map +0 -1
- package/src/services/voice/first-line-cache.d.ts.map +0 -1
- package/src/services/voice/fused-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/index.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/phonemizer.d.ts.map +0 -1
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/runtime-selection.d.ts +0 -92
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +0 -1
- package/src/services/voice/kokoro/types.d.ts.map +0 -1
- package/src/services/voice/kokoro/voice-presets.d.ts.map +0 -1
- package/src/services/voice/kokoro/voices.d.ts.map +0 -1
- package/src/services/voice/lifecycle.d.ts.map +0 -1
- package/src/services/voice/live-diarization-session.d.ts +0 -96
- package/src/services/voice/live-diarization-session.d.ts.map +0 -1
- package/src/services/voice/mic-source.d.ts.map +0 -1
- package/src/services/voice/optimistic-policy.d.ts.map +0 -1
- package/src/services/voice/partial-stabilizer.d.ts.map +0 -1
- package/src/services/voice/phoneme-tokenizer.d.ts.map +0 -1
- package/src/services/voice/phrase-cache.d.ts.map +0 -1
- package/src/services/voice/phrase-chunker.d.ts.map +0 -1
- package/src/services/voice/pipeline-impls.d.ts.map +0 -1
- package/src/services/voice/pipeline.d.ts.map +0 -1
- package/src/services/voice/prefill-client.d.ts.map +0 -1
- package/src/services/voice/prefix-preserving-queue.d.ts.map +0 -1
- package/src/services/voice/profile-store.d.ts.map +0 -1
- package/src/services/voice/ring-buffer.d.ts.map +0 -1
- package/src/services/voice/rollback-queue.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +0 -1
- package/src/services/voice/scheduler.d.ts.map +0 -1
- package/src/services/voice/shared-resources.d.ts.map +0 -1
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder.d.ts.map +0 -1
- package/src/services/voice/speaker-imprint.d.ts.map +0 -1
- package/src/services/voice/speaker-preset-cache.d.ts.map +0 -1
- package/src/services/voice/system-audio-sink.d.ts.map +0 -1
- package/src/services/voice/transcriber.d.ts.map +0 -1
- package/src/services/voice/transcript-knowledge.d.ts.map +0 -1
- package/src/services/voice/transcript-service.d.ts.map +0 -1
- package/src/services/voice/transcript-store.d.ts.map +0 -1
- package/src/services/voice/turn-controller.d.ts.map +0 -1
- package/src/services/voice/types.d.ts.map +0 -1
- package/src/services/voice/vad.d.ts.map +0 -1
- package/src/services/voice/voice-budget.d.ts.map +0 -1
- package/src/services/voice/voice-emotion-classifier.d.ts.map +0 -1
- package/src/services/voice/voice-preset-format.d.ts.map +0 -1
- package/src/services/voice/voice-profile-artifact.d.ts.map +0 -1
- package/src/services/voice/voice-profile-routes.d.ts.map +0 -1
- package/src/services/voice/voice-settings.d.ts +0 -82
- package/src/services/voice/voice-settings.d.ts.map +0 -1
- package/src/services/voice/voice-settings.ts +0 -172
- package/src/services/voice/voice-state-machine.d.ts.map +0 -1
- package/src/services/voice/wake-word-ggml.d.ts.map +0 -1
- package/src/services/voice/wake-word.d.ts.map +0 -1
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +0 -1
- package/src/services/voice-model-updater.d.ts.map +0 -1
- package/src/services/voice-prewarm.d.ts.map +0 -1
- /package/{src → dist}/actions/generate-media.d.ts +0 -0
- /package/{src → dist}/actions/identify-speaker.d.ts +0 -0
- /package/{src → dist}/actions/transcription-control.d.ts +0 -0
- /package/{src → dist}/index.d.ts +0 -0
- /package/{src → dist}/provider.d.ts +0 -0
- /package/{src → dist}/routes/family-member-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-transcribe.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-compat-routes.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-tts-route.d.ts +0 -0
- /package/{src → dist}/routes/transcript-audio-store.d.ts +0 -0
- /package/{src → dist}/routes/voice-first-run-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-models-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profile-plugin-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profiles-management-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-speaker-profile-routes.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-manager-support.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-presets.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-warmup-policy.d.ts +0 -0
- /package/{src → dist}/services/bundled-models.d.ts +0 -0
- /package/{src → dist}/services/cache-bridge.d.ts +0 -0
- /package/{src → dist}/services/checkpoint-client.d.ts +0 -0
- /package/{src → dist}/services/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/conversation-registry.d.ts +0 -0
- /package/{src → dist}/services/device-bridge.d.ts +0 -0
- /package/{src → dist}/services/device-resource-metrics.d.ts +0 -0
- /package/{src → dist}/services/external-scanner.d.ts +0 -0
- /package/{src → dist}/services/gpu-detect.d.ts +0 -0
- /package/{src → dist}/services/handler-registry.d.ts +0 -0
- /package/{src → dist}/services/hardware.d.ts +0 -0
- /package/{src → dist}/services/image-description-runtime.d.ts +0 -0
- /package/{src → dist}/services/imagegen/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/backend-selector.d.ts +0 -0
- /package/{src → dist}/services/imagegen/coreml-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/errors.d.ts +0 -0
- /package/{src → dist}/services/imagegen/index.d.ts +0 -0
- /package/{src → dist}/services/imagegen/mflux.d.ts +0 -0
- /package/{src → dist}/services/imagegen/tensorrt-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/types.d.ts +0 -0
- /package/{src → dist}/services/inference-capabilities.d.ts +0 -0
- /package/{src → dist}/services/inference-telemetry.d.ts +0 -0
- /package/{src → dist}/services/kv-spill.d.ts +0 -0
- /package/{src → dist}/services/latency-trace.d.ts +0 -0
- /package/{src → dist}/services/llm-streaming-binding.d.ts +0 -0
- /package/{src → dist}/services/load-args.d.ts +0 -0
- /package/{src → dist}/services/manifest/validator.d.ts +0 -0
- /package/{src → dist}/services/memory-pressure.d.ts +0 -0
- /package/{src → dist}/services/mtp-doctor.d.ts +0 -0
- /package/{src → dist}/services/network-policy.d.ts +0 -0
- /package/{src → dist}/services/paths.d.ts +0 -0
- /package/{src → dist}/services/planner-skeleton.d.ts +0 -0
- /package/{src → dist}/services/providers.d.ts +0 -0
- /package/{src → dist}/services/ram-budget.d.ts +0 -0
- /package/{src → dist}/services/readiness.d.ts +0 -0
- /package/{src → dist}/services/recommendation.d.ts +0 -0
- /package/{src → dist}/services/routing-preferences.d.ts +0 -0
- /package/{src → dist}/services/runtime-target.d.ts +0 -0
- /package/{src → dist}/services/session-pool.d.ts +0 -0
- /package/{src → dist}/services/structured-output/deterministic-repair.d.ts +0 -0
- /package/{src → dist}/services/structured-output.d.ts +0 -0
- /package/{src → dist}/services/system-memory.d.ts +0 -0
- /package/{src → dist}/services/verify-on-device.d.ts +0 -0
- /package/{src → dist}/services/verify.d.ts +0 -0
- /package/{src → dist}/services/vision/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/vision/capacitor-llama.d.ts +0 -0
- /package/{src → dist}/services/vision/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/vision/hash.d.ts +0 -0
- /package/{src → dist}/services/vision/llama-server.d.ts +0 -0
- /package/{src → dist}/services/vision/vast-fallback.d.ts +0 -0
- /package/{src → dist}/services/voice/barge-in.d.ts +0 -0
- /package/{src → dist}/services/voice/cancellation-coordinator.d.ts +0 -0
- /package/{src → dist}/services/voice/checkpoint-manager.d.ts +0 -0
- /package/{src → dist}/services/voice/eager-context-builder.d.ts +0 -0
- /package/{src → dist}/services/voice/emotion-attribution.d.ts +0 -0
- /package/{src → dist}/services/voice/first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/kokoro-runtime.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/phonemizer.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/types.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voice-presets.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voices.d.ts +0 -0
- /package/{src → dist}/services/voice/lifecycle.d.ts +0 -0
- /package/{src → dist}/services/voice/optimistic-policy.d.ts +0 -0
- /package/{src → dist}/services/voice/phoneme-tokenizer.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-chunker.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline-impls.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/prefill-client.d.ts +0 -0
- /package/{src → dist}/services/voice/prefix-preserving-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/profile-store.d.ts +0 -0
- /package/{src → dist}/services/voice/ring-buffer.d.ts +0 -0
- /package/{src → dist}/services/voice/rollback-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-placeholder.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-regenerator.d.ts +0 -0
- /package/{src → dist}/services/voice/scheduler.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/attribution-pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-ggml.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-imprint.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-preset-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/system-audio-sink.d.ts +0 -0
- /package/{src → dist}/services/voice/transcript-knowledge.d.ts +0 -0
- /package/{src → dist}/services/voice/turn-controller.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-budget.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-emotion-classifier.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-artifact.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-routes.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-state-machine.d.ts +0 -0
- /package/{src → dist}/services/voice/wake-word.d.ts +0 -0
- /package/{src → dist}/services/voice/wrap-with-first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice-model-updater.d.ts +0 -0
- /package/{src → dist}/services/voice-prewarm.d.ts +0 -0
|
@@ -23,8 +23,37 @@
|
|
|
23
23
|
* the UI.
|
|
24
24
|
*/
|
|
25
25
|
|
|
26
|
+
import path from "node:path";
|
|
27
|
+
|
|
26
28
|
import { VoiceLifecycleError } from "./lifecycle";
|
|
27
29
|
|
|
30
|
+
/**
|
|
31
|
+
* Make a directory discoverable by the Win32 DLL loader for this process by
|
|
32
|
+
* prepending it to PATH (step 6 of the standard DLL search order).
|
|
33
|
+
*
|
|
34
|
+
* The fused lib's sibling backends (`ggml*.dll`, `llama*.dll`, `mtmd.dll`) are
|
|
35
|
+
* staged NEXT TO `elizainference.dll`, but when a DLL is opened by absolute
|
|
36
|
+
* path the Win32 loader does NOT search that DLL's own directory for its
|
|
37
|
+
* dependencies — it searches the host EXE's dir, the system dirs, and PATH. So
|
|
38
|
+
* `dlopen` fails with "error code 126" (a dependent DLL could not be found)
|
|
39
|
+
* even though the siblings are right there. Linux/macOS don't need this:
|
|
40
|
+
* `stage-desktop-fused-lib.mjs` bakes a relative rpath (`$ORIGIN` /
|
|
41
|
+
* `@loader_path`) at link time so the loader resolves siblings from the lib's
|
|
42
|
+
* own dir. Idempotent; a no-op off win32 and when `dir` is already on PATH.
|
|
43
|
+
*/
|
|
44
|
+
function ensureWin32DllSearchDir(dir: string): void {
|
|
45
|
+
if (process.platform !== "win32" || !dir) return;
|
|
46
|
+
const current = process.env.PATH ?? "";
|
|
47
|
+
const resolved = path.resolve(dir);
|
|
48
|
+
const already = current
|
|
49
|
+
.split(path.delimiter)
|
|
50
|
+
.some((seg) => seg && path.resolve(seg) === resolved);
|
|
51
|
+
if (already) return;
|
|
52
|
+
process.env.PATH = current
|
|
53
|
+
? `${resolved}${path.delimiter}${current}`
|
|
54
|
+
: resolved;
|
|
55
|
+
}
|
|
56
|
+
|
|
28
57
|
/**
|
|
29
58
|
* ABI version the JS binding was authored against. Must match the value
|
|
30
59
|
* `eliza_inference_abi_version()` returns at runtime — a mismatch is a
|
|
@@ -77,7 +106,7 @@ import { VoiceLifecycleError } from "./lifecycle";
|
|
|
77
106
|
* degraded capability: its voice/ASR/VAD/LLM/text surface is unchanged and
|
|
78
107
|
* Kokoro just probes unsupported on it.
|
|
79
108
|
*/
|
|
80
|
-
export const ELIZA_INFERENCE_ABI_VERSION =
|
|
109
|
+
export const ELIZA_INFERENCE_ABI_VERSION = 13 as const;
|
|
81
110
|
|
|
82
111
|
/** One transcribed word with playback-synced timing (ms from utterance start). */
|
|
83
112
|
export interface AsrWordTiming {
|
|
@@ -189,7 +218,7 @@ export type LlmStreamHandle = bigint;
|
|
|
189
218
|
/**
|
|
190
219
|
* Per-session config handed to `llmStreamOpen`. Mirrors
|
|
191
220
|
* `eliza_llm_stream_config_t` in
|
|
192
|
-
* `native/llama.cpp/tools/omnivoice/include/eliza-inference-ffi.h` (ABI
|
|
221
|
+
* `native/llama.cpp/tools/omnivoice/include/eliza-inference-ffi.h` (ABI v9).
|
|
193
222
|
*/
|
|
194
223
|
export interface LlmStreamConfig {
|
|
195
224
|
maxTokens: number;
|
|
@@ -213,7 +242,7 @@ export interface LlmStreamConfig {
|
|
|
213
242
|
* path. `null`/empty disables grammar constraint.
|
|
214
243
|
*/
|
|
215
244
|
gbnfGrammar?: string | null;
|
|
216
|
-
/**
|
|
245
|
+
/** Thinking-tag suppression passthrough (v1 no-op). */
|
|
217
246
|
disableThinking?: boolean;
|
|
218
247
|
/**
|
|
219
248
|
* Per-load GPU offload (ABI v8). Number of model layers to place on GPU.
|
|
@@ -229,6 +258,8 @@ export interface LlmStreamConfig {
|
|
|
229
258
|
cacheTypeK?: string | null;
|
|
230
259
|
/** KV-cache V quant type name (ABI v8); see `cacheTypeK`. */
|
|
231
260
|
cacheTypeV?: string | null;
|
|
261
|
+
/** Runtime context window in tokens (ABI v9). `undefined`/0 uses native fallback. */
|
|
262
|
+
contextSize?: number;
|
|
232
263
|
}
|
|
233
264
|
|
|
234
265
|
/**
|
|
@@ -645,6 +676,29 @@ export interface ElizaInferenceFfi {
|
|
|
645
676
|
maxTextBytes?: number;
|
|
646
677
|
}): string;
|
|
647
678
|
|
|
679
|
+
/* ---- Streaming mmproj vision describe (ABI v13) -------------- */
|
|
680
|
+
|
|
681
|
+
/**
|
|
682
|
+
* True when this build wires token-by-token vision describe
|
|
683
|
+
* (`eliza_inference_describe_image_stream_open`). A <=v12 / vision-off
|
|
684
|
+
* library returns false, so the IMAGE_DESCRIPTION handler falls back to the
|
|
685
|
+
* buffered {@link describeImage}.
|
|
686
|
+
*/
|
|
687
|
+
visionStreamSupported?(): boolean;
|
|
688
|
+
/**
|
|
689
|
+
* Open a streaming vision-describe session: prime an `LlmStreamHandle`'s KV
|
|
690
|
+
* with `imageBytes` (raw PNG/JPEG/WebP) + `prompt` through the mmproj at
|
|
691
|
+
* `mmprojPath`, then PULL tokens with the existing {@link llmStreamNext} loop
|
|
692
|
+
* and release via {@link llmStreamClose} — the same machinery as chat text.
|
|
693
|
+
* Throws `VoiceLifecycleError` when the build lacks vision streaming.
|
|
694
|
+
*/
|
|
695
|
+
describeImageStreamOpen?(args: {
|
|
696
|
+
ctx: ElizaInferenceContextHandle;
|
|
697
|
+
imageBytes: Uint8Array;
|
|
698
|
+
mmprojPath: string;
|
|
699
|
+
prompt?: string;
|
|
700
|
+
}): LlmStreamHandle;
|
|
701
|
+
|
|
648
702
|
/* ---- Tokenizer (ABI v9) -------------------------------------- */
|
|
649
703
|
|
|
650
704
|
/**
|
|
@@ -688,7 +742,7 @@ export interface ElizaInferenceFfi {
|
|
|
688
742
|
/**
|
|
689
743
|
* Single causal forward pass over `tokens` (a tokenized partial transcript)
|
|
690
744
|
* returning the next-token softmax probability of `targetTokenId` (the
|
|
691
|
-
* end-of-turn marker, e.g.
|
|
745
|
+
* end-of-turn marker, e.g. `<end_of_turn>`), plus the argmax next token and its
|
|
692
746
|
* probability. Runs on a dedicated scoring context over the loaded text
|
|
693
747
|
* model; KV is cleared per call so scores are independent.
|
|
694
748
|
*/
|
|
@@ -1006,6 +1060,21 @@ interface BunFfiSymbols {
|
|
|
1006
1060
|
maxTextBytes: bigint | number,
|
|
1007
1061
|
outErr: unknown,
|
|
1008
1062
|
) => number;
|
|
1063
|
+
// Streaming mmproj vision describe (ABI v13). Optional — absent on <=v12
|
|
1064
|
+
// builds (the probe then reports unsupported and IMAGE_DESCRIPTION falls back
|
|
1065
|
+
// to the buffered `eliza_inference_describe_image`). `_stream_open` returns an
|
|
1066
|
+
// EliLlmStream* (as a pointer/bigint) primed with the image+prompt KV; the
|
|
1067
|
+
// caller drives the existing `eliza_inference_llm_stream_next` loop and frees
|
|
1068
|
+
// via `eliza_inference_llm_stream_close`.
|
|
1069
|
+
eliza_inference_vision_stream_supported?: () => number;
|
|
1070
|
+
eliza_inference_describe_image_stream_open?: (
|
|
1071
|
+
ctx: bigint,
|
|
1072
|
+
imageBytes: unknown,
|
|
1073
|
+
nBytes: bigint | number,
|
|
1074
|
+
mmprojPath: unknown,
|
|
1075
|
+
prompt: unknown,
|
|
1076
|
+
outErr: unknown,
|
|
1077
|
+
) => bigint;
|
|
1009
1078
|
// Tokenizer (ABI v9). Optional — absent on v8 builds.
|
|
1010
1079
|
eliza_inference_tokenize_supported?: () => number;
|
|
1011
1080
|
eliza_inference_tokenize?: (
|
|
@@ -1089,8 +1158,8 @@ interface BunFfiModule {
|
|
|
1089
1158
|
byteOffset?: number,
|
|
1090
1159
|
byteLength?: number,
|
|
1091
1160
|
): ArrayBuffer;
|
|
1092
|
-
JSCallback: new (
|
|
1093
|
-
fn:
|
|
1161
|
+
JSCallback: new <F extends (...args: never[]) => unknown>(
|
|
1162
|
+
fn: F,
|
|
1094
1163
|
def: { args: number[]; returns: number },
|
|
1095
1164
|
) => BunFfiJSCallback;
|
|
1096
1165
|
}
|
|
@@ -1134,6 +1203,11 @@ function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
|
1134
1203
|
}
|
|
1135
1204
|
const T = ffi.FFIType;
|
|
1136
1205
|
|
|
1206
|
+
// Windows-only: make the fused lib's co-located backends (ggml*/llama*/mtmd
|
|
1207
|
+
// .dll) resolvable before dlopen, which otherwise fails with error 126. See
|
|
1208
|
+
// ensureWin32DllSearchDir for the full rationale.
|
|
1209
|
+
ensureWin32DllSearchDir(path.dirname(dylibPath));
|
|
1210
|
+
|
|
1137
1211
|
// All `char *` arguments are typed as T.ptr — Bun's `T.cstring` is a
|
|
1138
1212
|
// RETURN-only type for "library hands back a NUL-terminated string".
|
|
1139
1213
|
// For inputs we encode UTF-8 to a NUL-terminated Buffer on the JS
|
|
@@ -1383,6 +1457,21 @@ function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
|
1383
1457
|
returns: T.i32,
|
|
1384
1458
|
},
|
|
1385
1459
|
};
|
|
1460
|
+
// Streaming mmproj vision describe (ABI v13): open returns an EliLlmStream*
|
|
1461
|
+
// primed with the image+prompt KV; the caller drives the existing
|
|
1462
|
+
// `eliza_inference_llm_stream_next` loop. Layered on top of the v12 surface;
|
|
1463
|
+
// the cascade peels it when a <=v12 library is loaded (the
|
|
1464
|
+
// `visionStreamSupported()` probe then reports false and IMAGE_DESCRIPTION
|
|
1465
|
+
// falls back to the buffered `eliza_inference_describe_image`).
|
|
1466
|
+
let visionStreamSymbolsAvailable = true;
|
|
1467
|
+
const visionStreamDefs = {
|
|
1468
|
+
eliza_inference_vision_stream_supported: { args: [], returns: T.i32 },
|
|
1469
|
+
eliza_inference_describe_image_stream_open: {
|
|
1470
|
+
// ctx, image_bytes, n_bytes, mmproj_path, prompt, out_error -> EliLlmStream*
|
|
1471
|
+
args: [T.ptr, T.ptr, T.usize, T.ptr, T.ptr, T.ptr],
|
|
1472
|
+
returns: T.ptr,
|
|
1473
|
+
},
|
|
1474
|
+
};
|
|
1386
1475
|
const coreDefs = {
|
|
1387
1476
|
eliza_inference_abi_version: { args: [], returns: T.cstring },
|
|
1388
1477
|
eliza_inference_create: {
|
|
@@ -1459,7 +1548,36 @@ function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
|
1459
1548
|
const classifierDefs = { ...speakerDefs, ...diarizDefs };
|
|
1460
1549
|
const attempts = [
|
|
1461
1550
|
{
|
|
1462
|
-
// Full
|
|
1551
|
+
// Full v13 surface (v12 + token-by-token mmproj vision describe).
|
|
1552
|
+
defs: {
|
|
1553
|
+
...coreDefs,
|
|
1554
|
+
...referenceEncodeDefs,
|
|
1555
|
+
...nativeVadDefs,
|
|
1556
|
+
...wakewordDefs,
|
|
1557
|
+
...classifierDefs,
|
|
1558
|
+
...llmStreamDefs,
|
|
1559
|
+
...llmCapabilityDefs,
|
|
1560
|
+
...textModalitiesDefs,
|
|
1561
|
+
...kokoroDefs,
|
|
1562
|
+
...eotDefs,
|
|
1563
|
+
...timedAsrDefs,
|
|
1564
|
+
...visionStreamDefs,
|
|
1565
|
+
},
|
|
1566
|
+
referenceEncode: true,
|
|
1567
|
+
nativeVad: true,
|
|
1568
|
+
wakeword: true,
|
|
1569
|
+
classifiers: true,
|
|
1570
|
+
llmStream: true,
|
|
1571
|
+
llmCapability: true,
|
|
1572
|
+
textModalities: true,
|
|
1573
|
+
kokoro: true,
|
|
1574
|
+
eot: true,
|
|
1575
|
+
timedAsr: true,
|
|
1576
|
+
visionStream: true,
|
|
1577
|
+
},
|
|
1578
|
+
{
|
|
1579
|
+
// Full v12 surface (v11 + the in-process ASR word-timestamp decoder);
|
|
1580
|
+
// a v12 build lacks the v13 streaming-vision symbols.
|
|
1463
1581
|
defs: {
|
|
1464
1582
|
...coreDefs,
|
|
1465
1583
|
...referenceEncodeDefs,
|
|
@@ -1695,6 +1813,8 @@ function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
|
1695
1813
|
eotSymbolsAvailable = (attempt as { eot?: boolean }).eot ?? false;
|
|
1696
1814
|
timedAsrSymbolsAvailable =
|
|
1697
1815
|
(attempt as { timedAsr?: boolean }).timedAsr ?? false;
|
|
1816
|
+
visionStreamSymbolsAvailable =
|
|
1817
|
+
(attempt as { visionStream?: boolean }).visionStream ?? false;
|
|
1698
1818
|
break;
|
|
1699
1819
|
} catch (err) {
|
|
1700
1820
|
lastOpenError = err;
|
|
@@ -1738,6 +1858,7 @@ function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
|
1738
1858
|
// tokenizer), accepted only when those are absent too.
|
|
1739
1859
|
const abiOk =
|
|
1740
1860
|
reported === String(ELIZA_INFERENCE_ABI_VERSION) ||
|
|
1861
|
+
(reported === "12" && !visionStreamSymbolsAvailable) ||
|
|
1741
1862
|
(reported === "11" && !timedAsrSymbolsAvailable) ||
|
|
1742
1863
|
(reported === "10" && !eotSymbolsAvailable && !timedAsrSymbolsAvailable) ||
|
|
1743
1864
|
(reported === "9" && !kokoroSymbolsAvailable && !eotSymbolsAvailable) ||
|
|
@@ -1992,7 +2113,7 @@ function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
|
1992
2113
|
const speakerArg = cstr(speakerPresetId);
|
|
1993
2114
|
// (pcm: ptr, n_samples: usize, is_final: i32, user_data: ptr) -> i32
|
|
1994
2115
|
const cb = new ffi.JSCallback(
|
|
1995
|
-
(
|
|
2116
|
+
(pcmPtr: bigint, nSamples: bigint, isFinal: number) => {
|
|
1996
2117
|
const n = Number(nSamples);
|
|
1997
2118
|
// Bun delivers the C pointer as a bigint; copy the floats out
|
|
1998
2119
|
// before returning — the buffer is the library's, valid only
|
|
@@ -2003,7 +2124,7 @@ function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
|
2003
2124
|
: new Float32Array(0);
|
|
2004
2125
|
const requestCancel = onChunk({ pcm, isFinal: isFinal !== 0 });
|
|
2005
2126
|
return requestCancel === true ? 1 : 0;
|
|
2006
|
-
}
|
|
2127
|
+
},
|
|
2007
2128
|
{
|
|
2008
2129
|
args: [T.ptr, T.usize, T.i32, T.ptr],
|
|
2009
2130
|
returns: T.i32,
|
|
@@ -2139,9 +2260,9 @@ function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
|
2139
2260
|
}
|
|
2140
2261
|
// (ev: ptr to EliVerifierEvent, user_data: ptr) -> void
|
|
2141
2262
|
const cb = new ffi.JSCallback(
|
|
2142
|
-
(
|
|
2263
|
+
(evPtr: bigint) => {
|
|
2143
2264
|
cbFn(readVerifierEvent(evPtr, ffi));
|
|
2144
|
-
}
|
|
2265
|
+
},
|
|
2145
2266
|
{ args: [T.ptr, T.ptr], returns: T.void },
|
|
2146
2267
|
);
|
|
2147
2268
|
const rc = loadedLib.symbols.eliza_inference_set_verifier_callback(
|
|
@@ -2567,7 +2688,7 @@ function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
|
2567
2688
|
const err = makeOutErr();
|
|
2568
2689
|
// Marshal the config struct into a Buffer. Layout matches
|
|
2569
2690
|
// `eliza_llm_stream_config_t` in `eliza-inference-ffi.h`
|
|
2570
|
-
// (8-byte aligned, ABI
|
|
2691
|
+
// (8-byte aligned, ABI v9):
|
|
2571
2692
|
// off 0 : i32 max_tokens
|
|
2572
2693
|
// off 4 : f32 temperature
|
|
2573
2694
|
// off 8 : f32 top_p
|
|
@@ -2583,8 +2704,9 @@ function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
|
2583
2704
|
// off 60 : i32 n_gpu_layers (ABI v8 — fills old tail pad)
|
|
2584
2705
|
// off 64 : ptr cache_type_k (ABI v8)
|
|
2585
2706
|
// off 72 : ptr cache_type_v (ABI v8)
|
|
2586
|
-
//
|
|
2587
|
-
|
|
2707
|
+
// off 80 : i32 context_size (ABI v9)
|
|
2708
|
+
// sizeof = 88
|
|
2709
|
+
const buf = Buffer.alloc(88);
|
|
2588
2710
|
buf.writeInt32LE(config.maxTokens, 0);
|
|
2589
2711
|
buf.writeFloatLE(config.temperature, 4);
|
|
2590
2712
|
buf.writeFloatLE(config.topP, 8);
|
|
@@ -2621,6 +2743,7 @@ function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
|
2621
2743
|
);
|
|
2622
2744
|
buf.writeBigUInt64LE(toPtrBigInt(cacheKArg.ptr), 64);
|
|
2623
2745
|
buf.writeBigUInt64LE(toPtrBigInt(cacheVArg.ptr), 72);
|
|
2746
|
+
buf.writeInt32LE(config.contextSize ?? 0, 80);
|
|
2624
2747
|
const handle = open(ctx, ffi.ptr(buf), err.ptr);
|
|
2625
2748
|
if (isNullPointer(handle)) {
|
|
2626
2749
|
const message =
|
|
@@ -2858,6 +2981,45 @@ function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
|
2858
2981
|
);
|
|
2859
2982
|
},
|
|
2860
2983
|
|
|
2984
|
+
/* ---- Streaming mmproj vision describe (ABI v13) ------------ */
|
|
2985
|
+
|
|
2986
|
+
visionStreamSupported(): boolean {
|
|
2987
|
+
const probe = loadedLib.symbols.eliza_inference_vision_stream_supported;
|
|
2988
|
+
return (
|
|
2989
|
+
visionStreamSymbolsAvailable &&
|
|
2990
|
+
typeof probe === "function" &&
|
|
2991
|
+
probe() === 1
|
|
2992
|
+
);
|
|
2993
|
+
},
|
|
2994
|
+
|
|
2995
|
+
describeImageStreamOpen({ ctx, imageBytes, mmprojPath, prompt }) {
|
|
2996
|
+
const open = loadedLib.symbols.eliza_inference_describe_image_stream_open;
|
|
2997
|
+
if (!visionStreamSymbolsAvailable || typeof open !== "function") {
|
|
2998
|
+
throw new VoiceLifecycleError(
|
|
2999
|
+
"kernel-missing",
|
|
3000
|
+
"[ffi-bindings] eliza_inference_describe_image_stream_open is not exported by this build",
|
|
3001
|
+
);
|
|
3002
|
+
}
|
|
3003
|
+
const err = makeOutErr();
|
|
3004
|
+
const mmprojArg = cstr(mmprojPath);
|
|
3005
|
+
const promptArg = cstr(prompt ?? null);
|
|
3006
|
+
const handle = open(
|
|
3007
|
+
ctx,
|
|
3008
|
+
ffi.ptr(imageBytes),
|
|
3009
|
+
BigInt(imageBytes.length),
|
|
3010
|
+
mmprojArg.ptr,
|
|
3011
|
+
promptArg.ptr,
|
|
3012
|
+
err.ptr,
|
|
3013
|
+
);
|
|
3014
|
+
if (isNullPointer(handle)) {
|
|
3015
|
+
const message =
|
|
3016
|
+
takeError(err.buf) ??
|
|
3017
|
+
"[ffi-bindings] eliza_inference_describe_image_stream_open returned NULL with no diagnostic";
|
|
3018
|
+
throw new VoiceLifecycleError("kernel-missing", message);
|
|
3019
|
+
}
|
|
3020
|
+
return handle as LlmStreamHandle;
|
|
3021
|
+
},
|
|
3022
|
+
|
|
2861
3023
|
/* ---- Tokenizer (ABI v9) ------------------------------------ */
|
|
2862
3024
|
|
|
2863
3025
|
tokenizeSupported(): boolean {
|
|
@@ -3100,7 +3262,7 @@ function bindWithBunFfi(dylibPath: string): ElizaInferenceFfi {
|
|
|
3100
3262
|
if (typeof value === "bigint") return value;
|
|
3101
3263
|
if (typeof value === "number") return BigInt(value);
|
|
3102
3264
|
// Bun returns its internal pointer object that coerces to bigint.
|
|
3103
|
-
return BigInt(value as
|
|
3265
|
+
return BigInt(value as number);
|
|
3104
3266
|
}
|
|
3105
3267
|
|
|
3106
3268
|
/**
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
* Fused FFI end-of-turn scorer (ABI v11).
|
|
3
3
|
*
|
|
4
4
|
* The fused replacement for the retired node-llama-cpp `controlledEvaluate()`
|
|
5
|
-
* path the EOT classifiers depended on. Computes P(
|
|
5
|
+
* path the EOT classifiers depended on. Computes P(`<end_of_turn>` next | partial
|
|
6
6
|
* transcript) through the single `libelizainference` handle: tokenize the
|
|
7
|
-
*
|
|
7
|
+
* Gemma-formatted partial transcript, then one causal forward pass
|
|
8
8
|
* (`eliza_inference_llm_eot_score`) reads the next-token probability of the
|
|
9
9
|
* end-of-turn marker. No separate model weights, no sampling loop, and no KV
|
|
10
10
|
* growth on the chat session — the dedicated native scoring context clears its
|
|
@@ -17,7 +17,7 @@ import type {
|
|
|
17
17
|
ElizaInferenceFfi,
|
|
18
18
|
} from "./ffi-bindings";
|
|
19
19
|
|
|
20
|
-
const
|
|
20
|
+
const END_OF_TURN_TOKEN = "<end_of_turn>";
|
|
21
21
|
|
|
22
22
|
export interface FfiEotScorerOptions {
|
|
23
23
|
/** The loaded fused inference binding (must expose the v11 EOT symbols). */
|
|
@@ -31,7 +31,7 @@ export interface FfiEotScorerOptions {
|
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
export interface FfiEotScoreResult {
|
|
34
|
-
/** Probability of
|
|
34
|
+
/** Probability of `<end_of_turn>` as the next token, ∈ [0, 1]. */
|
|
35
35
|
probability: number;
|
|
36
36
|
/** Wall-clock model latency for this scoring call. */
|
|
37
37
|
latencyMs: number;
|
|
@@ -40,7 +40,7 @@ export interface FfiEotScoreResult {
|
|
|
40
40
|
}
|
|
41
41
|
|
|
42
42
|
/**
|
|
43
|
-
* Stateful EOT scorer bound to a loaded fused text model. The
|
|
43
|
+
* Stateful EOT scorer bound to a loaded fused text model. The `<end_of_turn>`
|
|
44
44
|
* token id is resolved once and cached. Safe to keep across many voice turns.
|
|
45
45
|
*/
|
|
46
46
|
export class FfiEotScorer {
|
|
@@ -48,7 +48,7 @@ export class FfiEotScorer {
|
|
|
48
48
|
private readonly getContext: () => ElizaInferenceContextHandle;
|
|
49
49
|
private readonly maxHistoryTokens: number;
|
|
50
50
|
readonly modelLabel: string;
|
|
51
|
-
private
|
|
51
|
+
private endOfTurnTokenId: number | null = null;
|
|
52
52
|
|
|
53
53
|
constructor(options: FfiEotScorerOptions) {
|
|
54
54
|
this.ffi = options.ffi;
|
|
@@ -71,25 +71,25 @@ export class FfiEotScorer {
|
|
|
71
71
|
);
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
-
private
|
|
75
|
-
if (this.
|
|
74
|
+
private resolveEndOfTurn(ctx: ElizaInferenceContextHandle): number {
|
|
75
|
+
if (this.endOfTurnTokenId !== null) return this.endOfTurnTokenId;
|
|
76
76
|
const tokenize = this.ffi.tokenize;
|
|
77
77
|
if (!tokenize) {
|
|
78
78
|
throw new Error("[voice] FfiEotScorer: fused tokenizer is unavailable.");
|
|
79
79
|
}
|
|
80
80
|
const ids = tokenize({
|
|
81
81
|
ctx,
|
|
82
|
-
text:
|
|
82
|
+
text: END_OF_TURN_TOKEN,
|
|
83
83
|
addSpecial: false,
|
|
84
84
|
parseSpecial: true,
|
|
85
85
|
});
|
|
86
86
|
const first = ids[0];
|
|
87
87
|
if (ids.length !== 1 || first === undefined || !Number.isInteger(first)) {
|
|
88
88
|
throw new Error(
|
|
89
|
-
`[voice] FfiEotScorer: tokenizer did not resolve
|
|
89
|
+
`[voice] FfiEotScorer: tokenizer did not resolve <end_of_turn> to a single special token (got ${JSON.stringify([...ids])}). The text bundle must be Gemma-template compatible.`,
|
|
90
90
|
);
|
|
91
91
|
}
|
|
92
|
-
this.
|
|
92
|
+
this.endOfTurnTokenId = first;
|
|
93
93
|
return first;
|
|
94
94
|
}
|
|
95
95
|
|
|
@@ -103,7 +103,7 @@ export class FfiEotScorer {
|
|
|
103
103
|
"[voice] FfiEotScorer: fused EOT symbols are unavailable.",
|
|
104
104
|
);
|
|
105
105
|
}
|
|
106
|
-
const
|
|
106
|
+
const endOfTurnId = this.resolveEndOfTurn(ctx);
|
|
107
107
|
const formatted = formatEotPrompt(partialTranscript);
|
|
108
108
|
const all = tokenize({
|
|
109
109
|
ctx,
|
|
@@ -122,7 +122,11 @@ export class FfiEotScorer {
|
|
|
122
122
|
promptTokens: 0,
|
|
123
123
|
};
|
|
124
124
|
}
|
|
125
|
-
const { targetProb } = eotScore({
|
|
125
|
+
const { targetProb } = eotScore({
|
|
126
|
+
ctx,
|
|
127
|
+
tokens,
|
|
128
|
+
targetTokenId: endOfTurnId,
|
|
129
|
+
});
|
|
126
130
|
const probability = Number.isFinite(targetProb)
|
|
127
131
|
? Math.max(0, Math.min(1, targetProb))
|
|
128
132
|
: 0.5;
|
|
@@ -33,6 +33,13 @@ export {
|
|
|
33
33
|
type MockCheckpointSnapshot,
|
|
34
34
|
type MockSnapshotSource,
|
|
35
35
|
} from "./checkpoint-manager";
|
|
36
|
+
export {
|
|
37
|
+
computeDiarizationErrorRate,
|
|
38
|
+
type DerOptions,
|
|
39
|
+
type DerResult,
|
|
40
|
+
type DiarizationSegment,
|
|
41
|
+
diarizationWithinBudget,
|
|
42
|
+
} from "./diarization-error-rate";
|
|
36
43
|
export {
|
|
37
44
|
type BuildDeterministicFn,
|
|
38
45
|
type BuildMessageDependentFn,
|
|
@@ -42,6 +49,20 @@ export {
|
|
|
42
49
|
type FullContext,
|
|
43
50
|
mergeContext,
|
|
44
51
|
} from "./eager-context-builder";
|
|
52
|
+
export {
|
|
53
|
+
DEFAULT_PLAYBACK_DELAY_MS,
|
|
54
|
+
type EchoDelayEstimate,
|
|
55
|
+
type EchoDelayOptions,
|
|
56
|
+
estimateEchoDelaySamples,
|
|
57
|
+
PLATFORM_PLAYBACK_DELAY_DEFAULTS,
|
|
58
|
+
platformPlaybackDelayMs,
|
|
59
|
+
platformPlaybackDelaySamples,
|
|
60
|
+
} from "./echo-delay";
|
|
61
|
+
export { computeErle } from "./echo-metrics";
|
|
62
|
+
export {
|
|
63
|
+
EchoReferenceBuffer,
|
|
64
|
+
type EchoReferenceBufferOptions,
|
|
65
|
+
} from "./echo-reference-buffer";
|
|
45
66
|
export type {
|
|
46
67
|
LlamaContextLike as Eliza1EotLlamaContext,
|
|
47
68
|
LlamaContextSequenceLike as Eliza1EotLlamaSequence,
|
|
@@ -104,13 +125,13 @@ export {
|
|
|
104
125
|
type VoiceTurnSignal,
|
|
105
126
|
} from "./eot-classifier";
|
|
106
127
|
export {
|
|
107
|
-
|
|
128
|
+
applyGemmaUserTemplate,
|
|
108
129
|
createBundledLiveKitGgmlTurnDetector,
|
|
109
130
|
DEFAULT_LIVEKIT_TURN_DETECTOR_GGML_DIR,
|
|
110
131
|
DEFAULT_LIVEKIT_TURN_DETECTOR_GGUF_EN,
|
|
111
132
|
DEFAULT_LIVEKIT_TURN_DETECTOR_GGUF_INTL,
|
|
112
133
|
EotGgmlUnavailableError,
|
|
113
|
-
|
|
134
|
+
LIVEKIT_END_OF_TURN_TOKEN,
|
|
114
135
|
LiveKitGgmlTurnDetector,
|
|
115
136
|
type LiveKitGgmlTurnDetectorOptions,
|
|
116
137
|
turnDetectorGgufForTier,
|
|
@@ -153,6 +174,10 @@ export {
|
|
|
153
174
|
pipeMicToRingBuffer,
|
|
154
175
|
resolveDesktopRecorder,
|
|
155
176
|
} from "./mic-source";
|
|
177
|
+
export {
|
|
178
|
+
NlmsEchoCanceller,
|
|
179
|
+
type NlmsEchoCancellerOptions,
|
|
180
|
+
} from "./nlms-echo-canceller";
|
|
156
181
|
export {
|
|
157
182
|
DEFAULT_OPTIMISTIC_EOT_THRESHOLD,
|
|
158
183
|
OptimisticGenerationPolicy,
|
|
@@ -335,9 +360,9 @@ export * from "./types";
|
|
|
335
360
|
export {
|
|
336
361
|
createSileroVadDetector,
|
|
337
362
|
createVadDetector,
|
|
363
|
+
type ExternalVadAdapter,
|
|
338
364
|
GgmlSileroVad,
|
|
339
365
|
NativeSileroVad,
|
|
340
|
-
type QwenToolkitVadAdapter,
|
|
341
366
|
type ResolvedVadProvider,
|
|
342
367
|
RmsEnergyGate,
|
|
343
368
|
type RmsEnergyGateConfig,
|
|
@@ -371,6 +396,11 @@ export {
|
|
|
371
396
|
voiceEnsemblePeakMb,
|
|
372
397
|
voiceEnsembleSteadyStateMb,
|
|
373
398
|
} from "./voice-budget";
|
|
399
|
+
export {
|
|
400
|
+
type ArbiterPreloader,
|
|
401
|
+
VoicePreloadPredictor,
|
|
402
|
+
type VoicePreloadPredictorOptions,
|
|
403
|
+
} from "./voice-preload-predictor";
|
|
374
404
|
export {
|
|
375
405
|
readVoicePresetFile,
|
|
376
406
|
VOICE_PRESET_MAGIC,
|
|
@@ -395,15 +425,6 @@ export {
|
|
|
395
425
|
type VoiceProfileSampleInput,
|
|
396
426
|
verifyVoiceProfileArtifact,
|
|
397
427
|
} from "./voice-profile-artifact";
|
|
398
|
-
export {
|
|
399
|
-
DEFAULT_VOICE_SETTINGS,
|
|
400
|
-
effectiveBackendMode,
|
|
401
|
-
qualityPresetQuantizationRanking,
|
|
402
|
-
resolveVoiceSettings,
|
|
403
|
-
type VoiceBackendMode,
|
|
404
|
-
type VoiceModelQualityPreset,
|
|
405
|
-
type VoiceSettings,
|
|
406
|
-
} from "./voice-settings";
|
|
407
428
|
export {
|
|
408
429
|
type DrafterAbortReason,
|
|
409
430
|
type DrafterHandle,
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { scoreFirstResponseLatency } from "../../e2e-harness";
|
|
2
3
|
import type {
|
|
3
4
|
AudioSink,
|
|
4
5
|
Phrase,
|
|
5
6
|
SpeakerPreset,
|
|
6
7
|
TtsPcmChunk,
|
|
7
8
|
} from "../../types";
|
|
8
|
-
import {
|
|
9
|
+
import {
|
|
10
|
+
KOKORO_MOBILE_TTFA_BUDGET_MS,
|
|
11
|
+
KokoroTtsBackend,
|
|
12
|
+
} from "../kokoro-backend";
|
|
13
|
+
import type { KokoroRuntime, KokoroRuntimeInputs } from "../kokoro-runtime";
|
|
9
14
|
import { KokoroMockRuntime } from "../kokoro-runtime";
|
|
10
15
|
import type { KokoroPhonemizer } from "../types";
|
|
11
16
|
import { KOKORO_DEFAULT_VOICE_ID } from "../voices";
|
|
@@ -147,5 +152,111 @@ describe("KokoroTtsBackend", () => {
|
|
|
147
152
|
});
|
|
148
153
|
});
|
|
149
154
|
|
|
155
|
+
// ── TTFA: the first AUDIBLE chunk streams out before the phrase finishes ──
|
|
156
|
+
//
|
|
157
|
+
// These lock the streaming-TTFA contract (issue #8787 acceptance criterion 7)
|
|
158
|
+
// deterministically — no native model, no wall-clock flakiness. The gated
|
|
159
|
+
// real-FFI synth that measures TTFA against a true Kokoro forward lives in
|
|
160
|
+
// `kokoro-engine-bridge.real.test.ts` and skips when artifacts are absent.
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* A runtime that emits `chunkCount` body chunks and records, for every body
|
|
164
|
+
* chunk, how many runtime chunks had been produced when the backend re-emitted
|
|
165
|
+
* its first audible slice. Proves TTFA is bounded by ONE runtime forward
|
|
166
|
+
* boundary, not the whole phrase.
|
|
167
|
+
*/
|
|
168
|
+
class CountingKokoroRuntime implements KokoroRuntime {
|
|
169
|
+
readonly id = "mock" as const;
|
|
170
|
+
readonly sampleRate = 24000;
|
|
171
|
+
emitted = 0;
|
|
172
|
+
constructor(
|
|
173
|
+
private readonly totalSamples: number,
|
|
174
|
+
private readonly chunkCount: number,
|
|
175
|
+
) {}
|
|
176
|
+
async synthesize(args: KokoroRuntimeInputs): Promise<{ cancelled: boolean }> {
|
|
177
|
+
const perChunk = Math.max(
|
|
178
|
+
1,
|
|
179
|
+
Math.ceil(this.totalSamples / this.chunkCount),
|
|
180
|
+
);
|
|
181
|
+
for (let off = 0; off < this.totalSamples; off += perChunk) {
|
|
182
|
+
if (args.cancelSignal.cancelled) return { cancelled: true };
|
|
183
|
+
const n = Math.min(perChunk, this.totalSamples - off);
|
|
184
|
+
this.emitted++;
|
|
185
|
+
const want = args.onChunk({
|
|
186
|
+
pcm: new Float32Array(n).fill(0.05),
|
|
187
|
+
sampleRate: this.sampleRate,
|
|
188
|
+
isFinal: false,
|
|
189
|
+
});
|
|
190
|
+
if (want === true) return { cancelled: true };
|
|
191
|
+
}
|
|
192
|
+
args.onChunk({
|
|
193
|
+
pcm: new Float32Array(0),
|
|
194
|
+
sampleRate: this.sampleRate,
|
|
195
|
+
isFinal: true,
|
|
196
|
+
});
|
|
197
|
+
return { cancelled: false };
|
|
198
|
+
}
|
|
199
|
+
dispose(): void {}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
describe("KokoroTtsBackend — streaming TTFA", () => {
|
|
203
|
+
it("emits the first audible chunk from the first runtime forward (sub-phrase TTFA)", async () => {
|
|
204
|
+
const runtime = new CountingKokoroRuntime(48000, 4); // 2s across 4 forwards
|
|
205
|
+
const backend = new KokoroTtsBackend({
|
|
206
|
+
runtime,
|
|
207
|
+
layout: {
|
|
208
|
+
root: "/tmp/kokoro",
|
|
209
|
+
modelFile: "kokoro-82m-v1_0.gguf",
|
|
210
|
+
voicesDir: "/tmp/kokoro/voices",
|
|
211
|
+
sampleRate: 24000,
|
|
212
|
+
},
|
|
213
|
+
defaultVoiceId: KOKORO_DEFAULT_VOICE_ID,
|
|
214
|
+
phonemizer: fixedPhonemizer(),
|
|
215
|
+
streamingChunkSamples: 1200, // 50ms slices
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
let firstAudibleAtRuntimeEmits = -1;
|
|
219
|
+
let firstAudibleSamples = -1;
|
|
220
|
+
await backend.synthesizeStream({
|
|
221
|
+
phrase: makePhrase("a full sentence that decodes in one forward"),
|
|
222
|
+
preset: makePreset(KOKORO_DEFAULT_VOICE_ID),
|
|
223
|
+
cancelSignal: { cancelled: false },
|
|
224
|
+
onChunk: (c) => {
|
|
225
|
+
if (!c.isFinal && c.pcm.length > 0 && firstAudibleAtRuntimeEmits < 0) {
|
|
226
|
+
firstAudibleAtRuntimeEmits = runtime.emitted;
|
|
227
|
+
firstAudibleSamples = c.pcm.length;
|
|
228
|
+
}
|
|
229
|
+
return undefined;
|
|
230
|
+
},
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
// The listener hears audio after the FIRST runtime forward, not all 4.
|
|
234
|
+
expect(firstAudibleAtRuntimeEmits).toBe(1);
|
|
235
|
+
// The first audible chunk is a bounded sub-phrase slice, not the phrase.
|
|
236
|
+
expect(firstAudibleSamples).toBeGreaterThan(0);
|
|
237
|
+
expect(firstAudibleSamples).toBeLessThanOrEqual(1200);
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
it("a mobile-class TTFA gate passes within budget and fails past it", () => {
|
|
241
|
+
// Representative warm-handle Kokoro first-phrase TTFB (~97ms TTFB +
|
|
242
|
+
// phonemize). Well within the mobile budget.
|
|
243
|
+
const within = scoreFirstResponseLatency({
|
|
244
|
+
turnStartedAtMs: 1_000,
|
|
245
|
+
ttsFirstAudioAtMs: 1_000 + 180,
|
|
246
|
+
maxFirstAudioMs: KOKORO_MOBILE_TTFA_BUDGET_MS,
|
|
247
|
+
});
|
|
248
|
+
expect(within.firstAudioMs).toBe(180);
|
|
249
|
+
expect(within.passed).toBe(true);
|
|
250
|
+
|
|
251
|
+
// A regression that blows the budget must fail the gate, never silently pass.
|
|
252
|
+
const blown = scoreFirstResponseLatency({
|
|
253
|
+
turnStartedAtMs: 1_000,
|
|
254
|
+
ttsFirstAudioAtMs: 1_000 + KOKORO_MOBILE_TTFA_BUDGET_MS + 50,
|
|
255
|
+
maxFirstAudioMs: KOKORO_MOBILE_TTFA_BUDGET_MS,
|
|
256
|
+
});
|
|
257
|
+
expect(blown.passed).toBe(false);
|
|
258
|
+
});
|
|
259
|
+
});
|
|
260
|
+
|
|
150
261
|
// Local declaration so the test file does not import the audio sink (unused).
|
|
151
262
|
void (null as unknown as AudioSink);
|