@elizaos/plugin-local-inference 2.0.3-beta.2 → 2.0.3-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -10
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +39647 -0
- package/dist/index.js.map +217 -0
- package/{src → dist}/local-inference-routes.d.ts +9 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts.map +1 -0
- package/{src → dist}/routes/compat-helpers.d.ts +1 -1
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/{src → dist}/routes/index.d.ts +1 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/{src → dist}/routes/live-diarization-route.d.ts +7 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/{src → dist}/routes/transcripts-routes.d.ts +8 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/{src → dist}/runtime/ensure-local-inference-handler.d.ts +8 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/{src → dist}/runtime/index.d.ts +1 -1
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/{src → dist}/runtime/voice-entity-binding.d.ts +10 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/{src → dist}/services/active-model.d.ts +28 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/{src → dist}/services/assignments.d.ts +16 -3
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/{src → dist}/services/backend.d.ts +110 -16
- package/dist/services/backend.d.ts.map +1 -0
- package/{src → dist}/services/bionic-host-loader.d.ts +21 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/{src → dist}/services/desktop-fused-ffi-backend-runtime.d.ts +22 -6
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/{src → dist}/services/device-tier.d.ts +19 -1
- package/dist/services/device-tier.d.ts.map +1 -0
- package/{src → dist}/services/downloader.d.ts +16 -4
- package/dist/services/downloader.d.ts.map +1 -0
- package/{src → dist}/services/engine.d.ts +43 -4
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-backend.d.ts +28 -7
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-runner.d.ts +24 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/{src → dist}/services/imagegen/sd-cpp.d.ts +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/{src → dist}/services/index.d.ts +3 -1
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/{src → dist}/services/manifest/schema.d.ts +196 -6
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/{src → dist}/services/manifest/types.d.ts +3 -1
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/{src → dist}/services/memory-arbiter.d.ts +33 -3
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/{src → dist}/services/memory-monitor.d.ts +6 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/{src → dist}/services/registry.d.ts +11 -13
- package/dist/services/registry.d.ts.map +1 -0
- package/{src → dist}/services/router-handler.d.ts +2 -2
- package/dist/services/router-handler.d.ts.map +1 -0
- package/{src → dist}/services/routing-policy.d.ts +32 -9
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/{src → dist}/services/service.d.ts +1 -1
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/{src → dist}/services/types.d.ts +1 -1
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/{src → dist}/services/vision/index.d.ts +1 -1
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/{src → dist}/services/vision/types.d.ts +13 -4
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/{src → dist}/services/vision-embedding-cache.d.ts +1 -1
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/audio-frame-consumer.d.ts +82 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/{src → dist}/services/voice/eliza1-eot-scorer.d.ts +8 -8
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/{src → dist}/services/voice/embedding.d.ts +2 -3
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/engine-bridge.d.ts +8 -5
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier-ggml.d.ts +22 -22
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier.d.ts +9 -12
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/{src → dist}/services/voice/errors.d.ts +1 -1
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/{src → dist}/services/voice/expressive-tags.d.ts +5 -5
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/{src → dist}/services/voice/ffi-bindings.d.ts +26 -4
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/{src → dist}/services/voice/fused-eot-scorer.d.ts +6 -6
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/{src → dist}/services/voice/index.d.ts +8 -3
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-backend.d.ts +15 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-engine-discovery.d.ts +1 -1
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-ffi-runtime.d.ts +3 -3
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/pick-runtime.d.ts +1 -1
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/{src → dist}/services/voice/mic-source.d.ts +1 -1
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/{src → dist}/services/voice/partial-stabilizer.d.ts +1 -1
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/{src → dist}/services/voice/shared-resources.d.ts +14 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcriber.d.ts +4 -4
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-service.d.ts +20 -1
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-store.d.ts +12 -1
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/{src → dist}/services/voice/types.d.ts +6 -6
- package/dist/services/voice/types.d.ts.map +1 -0
- package/{src → dist}/services/voice/vad.d.ts +6 -5
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/{src → dist}/services/voice/voice-preset-format.d.ts +2 -2
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/{src → dist}/services/voice/wake-word-ggml.d.ts +8 -9
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +28 -9
- package/registry-entry.json +137 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +1 -1
- package/src/adapters/capacitor-llama/index.ts +28 -4
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +2 -2
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +1 -1
- package/src/local-inference-routes.test.ts +57 -11
- package/src/local-inference-routes.ts +90 -8
- package/src/provider.ts +32 -3
- package/src/routes/compat-helpers.ts +2 -1
- package/src/routes/index.ts +1 -0
- package/src/routes/live-diarization-route.test.ts +134 -0
- package/src/routes/live-diarization-route.ts +79 -3
- package/src/routes/local-inference-asr-route.test.ts +43 -2
- package/src/routes/local-inference-asr-route.ts +7 -4
- package/src/routes/local-inference-asr-transcribe.test.ts +4 -4
- package/src/routes/local-inference-asr-transcribe.ts +1 -1
- package/src/routes/local-inference-compat-routes.test.ts +3 -3
- package/src/routes/local-inference-compat-routes.ts +23 -56
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcripts-routes.test.ts +51 -0
- package/src/routes/transcripts-routes.ts +35 -3
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +203 -5
- package/src/runtime/ensure-local-inference-handler.ts +203 -11
- package/src/runtime/index.ts +4 -1
- package/src/runtime/mobile-local-inference-gate.test.ts +85 -2
- package/src/runtime/mobile-local-inference-gate.ts +60 -5
- package/src/runtime/voice-entity-binding.transcript.test.ts +29 -0
- package/src/runtime/voice-entity-binding.ts +46 -6
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +2 -2
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model.ts +211 -8
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +26 -0
- package/src/services/assignments.ts +52 -4
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +198 -19
- package/src/services/bionic-host-loader.test.ts +94 -1
- package/src/services/bionic-host-loader.ts +72 -0
- package/src/services/cache-bridge.test.ts +7 -7
- package/src/services/catalog.test.ts +32 -11
- package/src/services/catalog.ts +6 -0
- package/src/services/cloud-fallback.ts +1 -1
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +99 -7
- package/src/services/device-tier.test.ts +89 -2
- package/src/services/device-tier.ts +103 -11
- package/src/services/downloader.test.ts +199 -58
- package/src/services/downloader.ts +141 -27
- package/src/services/engine-direct-bundle.test.ts +38 -6
- package/src/services/engine.ts +291 -104
- package/src/services/ensure-local-artifacts.ts +1 -1
- package/src/services/ffi-llm-streaming-abi.ts +6 -3
- package/src/services/ffi-streaming-backend.ts +44 -8
- package/src/services/ffi-streaming-runner.test.ts +163 -3
- package/src/services/ffi-streaming-runner.ts +54 -1
- package/src/services/ffi-unload-ordering.test.ts +5 -1
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/hardware.test.ts +7 -2
- package/src/services/hardware.ts +28 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/sd-cpp.ts +6 -9
- package/src/services/index.ts +18 -0
- package/src/services/ios-llama-streaming.ts +1 -1
- package/src/services/kv-spill.ts +6 -5
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +84 -2
- package/src/services/manifest/index.ts +6 -0
- package/src/services/manifest/manifest.test.ts +156 -54
- package/src/services/manifest/schema.ts +160 -52
- package/src/services/manifest/types.ts +6 -0
- package/src/services/manifest/validator.ts +91 -25
- package/src/services/memory-arbiter.test.ts +139 -0
- package/src/services/memory-arbiter.ts +81 -15
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +24 -0
- package/src/services/memory-monitor.ts +12 -0
- package/src/services/mtp-doctor.ts +10 -2
- package/src/services/network-policy.ts +5 -5
- package/src/services/ram-budget-cache.test.ts +2 -1
- package/src/services/ram-budget.ts +0 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/registry.ts +25 -19
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.ts +43 -24
- package/src/services/routing-policy.test.ts +211 -23
- package/src/services/routing-policy.ts +92 -22
- package/src/services/service.test.ts +3 -3
- package/src/services/service.ts +22 -7
- package/src/services/transcription-priority.test.ts +2 -2
- package/src/services/types.ts +4 -0
- package/src/services/verify-on-device.test.ts +2 -2
- package/src/services/vision/hash.ts +1 -1
- package/src/services/vision/index.ts +2 -2
- package/src/services/vision/llama-server.ts +1 -1
- package/src/services/vision/types.ts +13 -4
- package/src/services/vision-embedding-cache.ts +1 -1
- package/src/services/voice/VOICE_WORKBENCH.md +71 -26
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +72 -2
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +29 -29
- package/src/services/voice/__tests__/streaming-asr.test.ts +1 -1
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +6 -8
- package/src/services/voice/audio-frame-consumer.test.ts +327 -1
- package/src/services/voice/audio-frame-consumer.ts +165 -5
- package/src/services/voice/barge-in.ts +2 -3
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +2 -2
- package/src/services/voice/e2e-harness.ts +175 -16
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +22 -22
- package/src/services/voice/embedding.ts +2 -3
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.ts +151 -110
- package/src/services/voice/eot-classifier-ggml.ts +42 -39
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +11 -122
- package/src/services/voice/errors.ts +2 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +8 -8
- package/src/services/voice/ffi-bindings.test.ts +10 -3
- package/src/services/voice/ffi-bindings.ts +177 -15
- package/src/services/voice/fused-eot-scorer.ts +17 -13
- package/src/services/voice/index.ts +33 -12
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +112 -1
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +88 -3
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +37 -201
- package/src/services/voice/kokoro/kokoro-backend.ts +16 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +1 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +3 -3
- package/src/services/voice/kokoro/pick-runtime.ts +1 -1
- package/src/services/voice/kokoro/runtime-selection.ts +28 -201
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +335 -2
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.ts +1 -1
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/partial-stabilizer.ts +1 -1
- package/src/services/voice/pipeline.ts +3 -4
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +23 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +85 -22
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.ts +4 -4
- package/src/services/voice/transcript-service.test.ts +58 -0
- package/src/services/voice/transcript-service.ts +64 -0
- package/src/services/voice/transcript-store.test.ts +36 -0
- package/src/services/voice/transcript-store.ts +32 -0
- package/src/services/voice/types.ts +7 -7
- package/src/services/voice/vad.test.ts +33 -15
- package/src/services/voice/vad.ts +25 -20
- package/src/services/voice/voice-budget.test.ts +0 -3
- package/src/services/voice/voice-budget.ts +6 -6
- package/src/services/voice/voice-duet.test.ts +1 -1
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +17 -4
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +133 -7
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-workbench-report.ts +58 -17
- package/src/services/voice/wake-word-ggml.ts +12 -13
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice-prewarm.ts +1 -1
- package/src/voice-workbench.ts +71 -0
- package/src/actions/generate-media.d.ts.map +0 -1
- package/src/actions/identify-speaker.d.ts.map +0 -1
- package/src/actions/transcription-control.d.ts.map +0 -1
- package/src/index.d.ts.map +0 -1
- package/src/local-inference-routes.d.ts.map +0 -1
- package/src/provider.d.ts.map +0 -1
- package/src/routes/compat-helpers.d.ts.map +0 -1
- package/src/routes/family-member-route.d.ts.map +0 -1
- package/src/routes/index.d.ts.map +0 -1
- package/src/routes/live-diarization-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-transcribe.d.ts.map +0 -1
- package/src/routes/local-inference-compat-routes.d.ts.map +0 -1
- package/src/routes/local-inference-tts-route.d.ts.map +0 -1
- package/src/routes/transcript-audio-store.d.ts.map +0 -1
- package/src/routes/transcripts-routes.d.ts.map +0 -1
- package/src/routes/voice-first-run-routes.d.ts.map +0 -1
- package/src/routes/voice-models-routes.d.ts.map +0 -1
- package/src/routes/voice-profile-plugin-routes.d.ts.map +0 -1
- package/src/routes/voice-profiles-management-routes.d.ts.map +0 -1
- package/src/routes/voice-speaker-profile-routes.d.ts.map +0 -1
- package/src/runtime/embedding-manager-support.d.ts.map +0 -1
- package/src/runtime/embedding-presets.d.ts.map +0 -1
- package/src/runtime/embedding-warmup-policy.d.ts.map +0 -1
- package/src/runtime/ensure-local-inference-handler.d.ts.map +0 -1
- package/src/runtime/index.d.ts.map +0 -1
- package/src/runtime/mobile-local-inference-gate.d.ts +0 -31
- package/src/runtime/mobile-local-inference-gate.d.ts.map +0 -1
- package/src/runtime/voice-entity-binding.d.ts.map +0 -1
- package/src/services/active-model.d.ts.map +0 -1
- package/src/services/assignments.d.ts.map +0 -1
- package/src/services/backend.d.ts.map +0 -1
- package/src/services/bionic-host-loader.d.ts.map +0 -1
- package/src/services/bundled-models.d.ts.map +0 -1
- package/src/services/cache-bridge.d.ts.map +0 -1
- package/src/services/catalog.d.ts +0 -10
- package/src/services/catalog.d.ts.map +0 -1
- package/src/services/checkpoint-client.d.ts.map +0 -1
- package/src/services/cloud-fallback.d.ts.map +0 -1
- package/src/services/conversation-registry.d.ts.map +0 -1
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +0 -1
- package/src/services/device-bridge.d.ts.map +0 -1
- package/src/services/device-resource-metrics.d.ts.map +0 -1
- package/src/services/device-tier.d.ts.map +0 -1
- package/src/services/downloader.d.ts.map +0 -1
- package/src/services/engine.d.ts.map +0 -1
- package/src/services/external-scanner.d.ts.map +0 -1
- package/src/services/ffi-streaming-backend.d.ts.map +0 -1
- package/src/services/ffi-streaming-runner.d.ts.map +0 -1
- package/src/services/gpu-detect.d.ts.map +0 -1
- package/src/services/handler-registry.d.ts.map +0 -1
- package/src/services/hardware.d.ts.map +0 -1
- package/src/services/hf-search.d.ts +0 -26
- package/src/services/hf-search.d.ts.map +0 -1
- package/src/services/hf-search.test.ts +0 -69
- package/src/services/hf-search.ts +0 -420
- package/src/services/image-description-runtime.d.ts.map +0 -1
- package/src/services/imagegen/aosp-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/backend-selector.d.ts.map +0 -1
- package/src/services/imagegen/coreml-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/errors.d.ts.map +0 -1
- package/src/services/imagegen/index.d.ts.map +0 -1
- package/src/services/imagegen/mflux.d.ts.map +0 -1
- package/src/services/imagegen/sd-cpp.d.ts.map +0 -1
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/types.d.ts.map +0 -1
- package/src/services/index.d.ts.map +0 -1
- package/src/services/inference-capabilities.d.ts.map +0 -1
- package/src/services/inference-telemetry.d.ts.map +0 -1
- package/src/services/kv-spill.d.ts.map +0 -1
- package/src/services/latency-trace.d.ts.map +0 -1
- package/src/services/llm-streaming-binding.d.ts.map +0 -1
- package/src/services/load-args.d.ts.map +0 -1
- package/src/services/manifest/index.d.ts +0 -4
- package/src/services/manifest/index.d.ts.map +0 -1
- package/src/services/manifest/schema.d.ts.map +0 -1
- package/src/services/manifest/types.d.ts.map +0 -1
- package/src/services/manifest/validator.d.ts.map +0 -1
- package/src/services/memory-arbiter.d.ts.map +0 -1
- package/src/services/memory-monitor.d.ts.map +0 -1
- package/src/services/memory-pressure.d.ts.map +0 -1
- package/src/services/mtp-doctor.d.ts.map +0 -1
- package/src/services/network-policy.d.ts.map +0 -1
- package/src/services/paths.d.ts.map +0 -1
- package/src/services/planner-skeleton.d.ts.map +0 -1
- package/src/services/providers.d.ts.map +0 -1
- package/src/services/ram-budget.d.ts.map +0 -1
- package/src/services/readiness.d.ts.map +0 -1
- package/src/services/recommendation.d.ts.map +0 -1
- package/src/services/registry.d.ts.map +0 -1
- package/src/services/router-handler.d.ts.map +0 -1
- package/src/services/routing-policy.d.ts.map +0 -1
- package/src/services/routing-preferences.d.ts.map +0 -1
- package/src/services/runtime-target.d.ts.map +0 -1
- package/src/services/service.d.ts.map +0 -1
- package/src/services/session-pool.d.ts.map +0 -1
- package/src/services/structured-output/deterministic-repair.d.ts.map +0 -1
- package/src/services/structured-output.d.ts.map +0 -1
- package/src/services/system-memory.d.ts.map +0 -1
- package/src/services/types.d.ts.map +0 -1
- package/src/services/verify-on-device.d.ts.map +0 -1
- package/src/services/verify.d.ts.map +0 -1
- package/src/services/vision/aosp-unavailable.d.ts.map +0 -1
- package/src/services/vision/capacitor-llama.d.ts.map +0 -1
- package/src/services/vision/cloud-fallback.d.ts.map +0 -1
- package/src/services/vision/hash.d.ts.map +0 -1
- package/src/services/vision/index.d.ts.map +0 -1
- package/src/services/vision/llama-server.d.ts.map +0 -1
- package/src/services/vision/types.d.ts.map +0 -1
- package/src/services/vision/vast-fallback.d.ts.map +0 -1
- package/src/services/vision-embedding-cache.d.ts.map +0 -1
- package/src/services/voice/audio-frame-consumer.d.ts.map +0 -1
- package/src/services/voice/barge-in.d.ts.map +0 -1
- package/src/services/voice/cancellation-coordinator.d.ts.map +0 -1
- package/src/services/voice/checkpoint-manager.d.ts.map +0 -1
- package/src/services/voice/eager-context-builder.d.ts.map +0 -1
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/embedding.d.ts.map +0 -1
- package/src/services/voice/emotion-attribution.d.ts.map +0 -1
- package/src/services/voice/engine-bridge.d.ts.map +0 -1
- package/src/services/voice/eot-classifier-ggml.d.ts.map +0 -1
- package/src/services/voice/eot-classifier.d.ts.map +0 -1
- package/src/services/voice/errors.d.ts.map +0 -1
- package/src/services/voice/expressive-tags.d.ts.map +0 -1
- package/src/services/voice/ffi-bindings.d.ts.map +0 -1
- package/src/services/voice/first-line-cache.d.ts.map +0 -1
- package/src/services/voice/fused-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/index.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/phonemizer.d.ts.map +0 -1
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/runtime-selection.d.ts +0 -92
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +0 -1
- package/src/services/voice/kokoro/types.d.ts.map +0 -1
- package/src/services/voice/kokoro/voice-presets.d.ts.map +0 -1
- package/src/services/voice/kokoro/voices.d.ts.map +0 -1
- package/src/services/voice/lifecycle.d.ts.map +0 -1
- package/src/services/voice/live-diarization-session.d.ts +0 -96
- package/src/services/voice/live-diarization-session.d.ts.map +0 -1
- package/src/services/voice/mic-source.d.ts.map +0 -1
- package/src/services/voice/optimistic-policy.d.ts.map +0 -1
- package/src/services/voice/partial-stabilizer.d.ts.map +0 -1
- package/src/services/voice/phoneme-tokenizer.d.ts.map +0 -1
- package/src/services/voice/phrase-cache.d.ts.map +0 -1
- package/src/services/voice/phrase-chunker.d.ts.map +0 -1
- package/src/services/voice/pipeline-impls.d.ts.map +0 -1
- package/src/services/voice/pipeline.d.ts.map +0 -1
- package/src/services/voice/prefill-client.d.ts.map +0 -1
- package/src/services/voice/prefix-preserving-queue.d.ts.map +0 -1
- package/src/services/voice/profile-store.d.ts.map +0 -1
- package/src/services/voice/ring-buffer.d.ts.map +0 -1
- package/src/services/voice/rollback-queue.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +0 -1
- package/src/services/voice/scheduler.d.ts.map +0 -1
- package/src/services/voice/shared-resources.d.ts.map +0 -1
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder.d.ts.map +0 -1
- package/src/services/voice/speaker-imprint.d.ts.map +0 -1
- package/src/services/voice/speaker-preset-cache.d.ts.map +0 -1
- package/src/services/voice/system-audio-sink.d.ts.map +0 -1
- package/src/services/voice/transcriber.d.ts.map +0 -1
- package/src/services/voice/transcript-knowledge.d.ts.map +0 -1
- package/src/services/voice/transcript-service.d.ts.map +0 -1
- package/src/services/voice/transcript-store.d.ts.map +0 -1
- package/src/services/voice/turn-controller.d.ts.map +0 -1
- package/src/services/voice/types.d.ts.map +0 -1
- package/src/services/voice/vad.d.ts.map +0 -1
- package/src/services/voice/voice-budget.d.ts.map +0 -1
- package/src/services/voice/voice-emotion-classifier.d.ts.map +0 -1
- package/src/services/voice/voice-preset-format.d.ts.map +0 -1
- package/src/services/voice/voice-profile-artifact.d.ts.map +0 -1
- package/src/services/voice/voice-profile-routes.d.ts.map +0 -1
- package/src/services/voice/voice-settings.d.ts +0 -82
- package/src/services/voice/voice-settings.d.ts.map +0 -1
- package/src/services/voice/voice-settings.ts +0 -172
- package/src/services/voice/voice-state-machine.d.ts.map +0 -1
- package/src/services/voice/wake-word-ggml.d.ts.map +0 -1
- package/src/services/voice/wake-word.d.ts.map +0 -1
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +0 -1
- package/src/services/voice-model-updater.d.ts.map +0 -1
- package/src/services/voice-prewarm.d.ts.map +0 -1
- /package/{src → dist}/actions/generate-media.d.ts +0 -0
- /package/{src → dist}/actions/identify-speaker.d.ts +0 -0
- /package/{src → dist}/actions/transcription-control.d.ts +0 -0
- /package/{src → dist}/index.d.ts +0 -0
- /package/{src → dist}/provider.d.ts +0 -0
- /package/{src → dist}/routes/family-member-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-transcribe.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-compat-routes.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-tts-route.d.ts +0 -0
- /package/{src → dist}/routes/transcript-audio-store.d.ts +0 -0
- /package/{src → dist}/routes/voice-first-run-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-models-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profile-plugin-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profiles-management-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-speaker-profile-routes.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-manager-support.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-presets.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-warmup-policy.d.ts +0 -0
- /package/{src → dist}/services/bundled-models.d.ts +0 -0
- /package/{src → dist}/services/cache-bridge.d.ts +0 -0
- /package/{src → dist}/services/checkpoint-client.d.ts +0 -0
- /package/{src → dist}/services/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/conversation-registry.d.ts +0 -0
- /package/{src → dist}/services/device-bridge.d.ts +0 -0
- /package/{src → dist}/services/device-resource-metrics.d.ts +0 -0
- /package/{src → dist}/services/external-scanner.d.ts +0 -0
- /package/{src → dist}/services/gpu-detect.d.ts +0 -0
- /package/{src → dist}/services/handler-registry.d.ts +0 -0
- /package/{src → dist}/services/hardware.d.ts +0 -0
- /package/{src → dist}/services/image-description-runtime.d.ts +0 -0
- /package/{src → dist}/services/imagegen/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/backend-selector.d.ts +0 -0
- /package/{src → dist}/services/imagegen/coreml-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/errors.d.ts +0 -0
- /package/{src → dist}/services/imagegen/index.d.ts +0 -0
- /package/{src → dist}/services/imagegen/mflux.d.ts +0 -0
- /package/{src → dist}/services/imagegen/tensorrt-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/types.d.ts +0 -0
- /package/{src → dist}/services/inference-capabilities.d.ts +0 -0
- /package/{src → dist}/services/inference-telemetry.d.ts +0 -0
- /package/{src → dist}/services/kv-spill.d.ts +0 -0
- /package/{src → dist}/services/latency-trace.d.ts +0 -0
- /package/{src → dist}/services/llm-streaming-binding.d.ts +0 -0
- /package/{src → dist}/services/load-args.d.ts +0 -0
- /package/{src → dist}/services/manifest/validator.d.ts +0 -0
- /package/{src → dist}/services/memory-pressure.d.ts +0 -0
- /package/{src → dist}/services/mtp-doctor.d.ts +0 -0
- /package/{src → dist}/services/network-policy.d.ts +0 -0
- /package/{src → dist}/services/paths.d.ts +0 -0
- /package/{src → dist}/services/planner-skeleton.d.ts +0 -0
- /package/{src → dist}/services/providers.d.ts +0 -0
- /package/{src → dist}/services/ram-budget.d.ts +0 -0
- /package/{src → dist}/services/readiness.d.ts +0 -0
- /package/{src → dist}/services/recommendation.d.ts +0 -0
- /package/{src → dist}/services/routing-preferences.d.ts +0 -0
- /package/{src → dist}/services/runtime-target.d.ts +0 -0
- /package/{src → dist}/services/session-pool.d.ts +0 -0
- /package/{src → dist}/services/structured-output/deterministic-repair.d.ts +0 -0
- /package/{src → dist}/services/structured-output.d.ts +0 -0
- /package/{src → dist}/services/system-memory.d.ts +0 -0
- /package/{src → dist}/services/verify-on-device.d.ts +0 -0
- /package/{src → dist}/services/verify.d.ts +0 -0
- /package/{src → dist}/services/vision/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/vision/capacitor-llama.d.ts +0 -0
- /package/{src → dist}/services/vision/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/vision/hash.d.ts +0 -0
- /package/{src → dist}/services/vision/llama-server.d.ts +0 -0
- /package/{src → dist}/services/vision/vast-fallback.d.ts +0 -0
- /package/{src → dist}/services/voice/barge-in.d.ts +0 -0
- /package/{src → dist}/services/voice/cancellation-coordinator.d.ts +0 -0
- /package/{src → dist}/services/voice/checkpoint-manager.d.ts +0 -0
- /package/{src → dist}/services/voice/eager-context-builder.d.ts +0 -0
- /package/{src → dist}/services/voice/emotion-attribution.d.ts +0 -0
- /package/{src → dist}/services/voice/first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/kokoro-runtime.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/phonemizer.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/types.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voice-presets.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voices.d.ts +0 -0
- /package/{src → dist}/services/voice/lifecycle.d.ts +0 -0
- /package/{src → dist}/services/voice/optimistic-policy.d.ts +0 -0
- /package/{src → dist}/services/voice/phoneme-tokenizer.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-chunker.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline-impls.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/prefill-client.d.ts +0 -0
- /package/{src → dist}/services/voice/prefix-preserving-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/profile-store.d.ts +0 -0
- /package/{src → dist}/services/voice/ring-buffer.d.ts +0 -0
- /package/{src → dist}/services/voice/rollback-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-placeholder.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-regenerator.d.ts +0 -0
- /package/{src → dist}/services/voice/scheduler.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/attribution-pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-ggml.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-imprint.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-preset-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/system-audio-sink.d.ts +0 -0
- /package/{src → dist}/services/voice/transcript-knowledge.d.ts +0 -0
- /package/{src → dist}/services/voice/turn-controller.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-budget.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-emotion-classifier.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-artifact.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-routes.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-state-machine.d.ts +0 -0
- /package/{src → dist}/services/voice/wake-word.d.ts +0 -0
- /package/{src → dist}/services/voice/wrap-with-first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice-model-updater.d.ts +0 -0
- /package/{src → dist}/services/voice-prewarm.d.ts +0 -0
|
@@ -78,6 +78,7 @@ import {
|
|
|
78
78
|
} from "./pipeline-impls";
|
|
79
79
|
import type { VoiceProfileStore } from "./profile-store";
|
|
80
80
|
import { type SchedulerEvents, VoiceScheduler } from "./scheduler";
|
|
81
|
+
import { AgentSelfVoiceImprint } from "./self-voice-imprint";
|
|
81
82
|
import {
|
|
82
83
|
type MmapRegionHandle,
|
|
83
84
|
SharedResourceRegistry,
|
|
@@ -120,6 +121,7 @@ import type {
|
|
|
120
121
|
TranscriptionAudio,
|
|
121
122
|
VadEventSource,
|
|
122
123
|
} from "./types";
|
|
124
|
+
import { decodeMonoPcm16Wav, encodeMonoPcm16Wav } from "./wav-codec";
|
|
123
125
|
|
|
124
126
|
const SAMPLE_RATE_DEFAULT = 24_000;
|
|
125
127
|
const RING_BUFFER_CAPACITY_DEFAULT = SAMPLE_RATE_DEFAULT * 4; // 4s
|
|
@@ -605,7 +607,7 @@ export interface EngineVoiceBridgeOptions {
|
|
|
605
607
|
/**
|
|
606
608
|
* Override only the TTS backend while keeping the fused bundle lifecycle
|
|
607
609
|
* and ASR FFI loaded. Used when a bundle falls back from OmniVoice speech
|
|
608
|
-
* to Kokoro speech but still needs bundled
|
|
610
|
+
* to Kokoro speech but still needs bundled Gemma ASR for mic input.
|
|
609
611
|
*/
|
|
610
612
|
ttsBackendOverride?: OmniVoiceBackend;
|
|
611
613
|
/** Optional speaker preset paired with `ttsBackendOverride`. */
|
|
@@ -820,13 +822,25 @@ function isEventRuntime(
|
|
|
820
822
|
* Flatten the (possibly lazy) `LiveAttributionConfig` into the plain options
|
|
821
823
|
* the runtime helper consumes. Resolved per turn so a changing household roster
|
|
822
824
|
* is picked up without re-arming voice.
|
|
825
|
+
*
|
|
826
|
+
* `transcript` is the turn's joined ASR text. The in-process engine owns ASR, so
|
|
827
|
+
* it threads the real transcript through to `handleLiveVoiceAttribution` — the
|
|
828
|
+
* merge engine's live name/partner extraction (`VoiceObserver.ingestTurn`) needs
|
|
829
|
+
* *what* was said, not just *who* said it (#8786). When empty it is omitted, so
|
|
830
|
+
* the helper falls back to "" exactly as before and diarization-only callers are
|
|
831
|
+
* unaffected.
|
|
823
832
|
*/
|
|
824
|
-
function resolveLiveAttributionOptions(
|
|
833
|
+
function resolveLiveAttributionOptions(
|
|
834
|
+
cfg: LiveAttributionConfig | null,
|
|
835
|
+
transcript = "",
|
|
836
|
+
): {
|
|
825
837
|
ownerEntityId?: string | null;
|
|
826
838
|
knownSpeakerEntityIds?: readonly string[];
|
|
827
839
|
wakeWordActive?: boolean;
|
|
840
|
+
transcript?: string;
|
|
828
841
|
} {
|
|
829
|
-
|
|
842
|
+
const transcriptOpt = transcript !== "" ? { transcript } : {};
|
|
843
|
+
if (!cfg) return transcriptOpt;
|
|
830
844
|
const ownerEntityId =
|
|
831
845
|
typeof cfg.ownerEntityId === "function"
|
|
832
846
|
? cfg.ownerEntityId()
|
|
@@ -843,6 +857,7 @@ function resolveLiveAttributionOptions(cfg: LiveAttributionConfig | null): {
|
|
|
843
857
|
...(ownerEntityId !== undefined ? { ownerEntityId } : {}),
|
|
844
858
|
...(knownSpeakerEntityIds !== undefined ? { knownSpeakerEntityIds } : {}),
|
|
845
859
|
...(wakeWordActive !== undefined ? { wakeWordActive } : {}),
|
|
860
|
+
...transcriptOpt,
|
|
846
861
|
};
|
|
847
862
|
}
|
|
848
863
|
|
|
@@ -946,6 +961,7 @@ export class EngineVoiceBridge {
|
|
|
946
961
|
asrAvailable: boolean,
|
|
947
962
|
phraseCache: PhraseCache,
|
|
948
963
|
attributionPipeline: VoiceAttributionPipeline | null = null,
|
|
964
|
+
private readonly selfVoiceImprint: AgentSelfVoiceImprint | null = null,
|
|
949
965
|
cancellationCoordinator: VoiceCancellationCoordinator | null = null,
|
|
950
966
|
optimisticGenerationPolicy: OptimisticGenerationPolicy | null = null,
|
|
951
967
|
eventRuntime: IAgentRuntime | null = null,
|
|
@@ -1133,12 +1149,30 @@ export class EngineVoiceBridge {
|
|
|
1133
1149
|
};
|
|
1134
1150
|
|
|
1135
1151
|
const sinkOverride = opts.sink;
|
|
1152
|
+
let selfVoiceImprint: AgentSelfVoiceImprint | null = null;
|
|
1153
|
+
const schedulerEvents: SchedulerEvents = {
|
|
1154
|
+
...(opts.events ?? {}),
|
|
1155
|
+
onAudio(chunk) {
|
|
1156
|
+
opts.events?.onAudio?.(chunk);
|
|
1157
|
+
if (!selfVoiceImprint) return;
|
|
1158
|
+
void selfVoiceImprint
|
|
1159
|
+
.observeAudio(chunk.pcm, chunk.sampleRate)
|
|
1160
|
+
.catch((err: unknown) => {
|
|
1161
|
+
logger.warn(
|
|
1162
|
+
{
|
|
1163
|
+
error: err instanceof Error ? err.message : String(err),
|
|
1164
|
+
},
|
|
1165
|
+
"[voice-bridge] agent self-voice imprint update failed",
|
|
1166
|
+
);
|
|
1167
|
+
});
|
|
1168
|
+
},
|
|
1169
|
+
};
|
|
1136
1170
|
const scheduler = new VoiceScheduler(
|
|
1137
1171
|
config,
|
|
1138
1172
|
sinkOverride
|
|
1139
1173
|
? { backend, sink: sinkOverride, phraseCache }
|
|
1140
1174
|
: { backend, phraseCache },
|
|
1141
|
-
|
|
1175
|
+
schedulerEvents,
|
|
1142
1176
|
);
|
|
1143
1177
|
|
|
1144
1178
|
// Wire the voice lifecycle. The lifecycle starts in `voice-off` —
|
|
@@ -1150,7 +1184,9 @@ export class EngineVoiceBridge {
|
|
|
1150
1184
|
const registry = opts.sharedResources ?? new SharedResourceRegistry();
|
|
1151
1185
|
const loaders =
|
|
1152
1186
|
opts.lifecycleLoaders ??
|
|
1153
|
-
defaultLifecycleLoaders(opts.bundleRoot, ffiHandle, ffiContextRef
|
|
1187
|
+
defaultLifecycleLoaders(opts.bundleRoot, ffiHandle, ffiContextRef, {
|
|
1188
|
+
skipTtsRegion: Boolean(opts.ttsBackendOverride),
|
|
1189
|
+
});
|
|
1154
1190
|
const lifecycle = new VoiceLifecycle({ registry, loaders });
|
|
1155
1191
|
|
|
1156
1192
|
// Wire speaker-attribution when a profile store is provided. The
|
|
@@ -1208,6 +1244,9 @@ export class EngineVoiceBridge {
|
|
|
1208
1244
|
await resolvedEncoder?.dispose();
|
|
1209
1245
|
},
|
|
1210
1246
|
};
|
|
1247
|
+
selfVoiceImprint = new AgentSelfVoiceImprint({
|
|
1248
|
+
encoder: lazyEncoder,
|
|
1249
|
+
});
|
|
1211
1250
|
// Fused diarizer (optional). When the build does not advertise the
|
|
1212
1251
|
// diarizer ABI, attribution runs without it — a single-speaker turn
|
|
1213
1252
|
// collapses to one segment (the attribution-pipeline localSpeakerId=0
|
|
@@ -1265,6 +1304,7 @@ export class EngineVoiceBridge {
|
|
|
1265
1304
|
asrAvailable,
|
|
1266
1305
|
phraseCache,
|
|
1267
1306
|
attributionPipeline,
|
|
1307
|
+
selfVoiceImprint,
|
|
1268
1308
|
wiring?.coordinator ?? null,
|
|
1269
1309
|
wiring?.policy ?? null,
|
|
1270
1310
|
isEventRuntime(opts.runtime) ? opts.runtime : null,
|
|
@@ -1359,6 +1399,7 @@ export class EngineVoiceBridge {
|
|
|
1359
1399
|
false, // ASR is not served from this path
|
|
1360
1400
|
phraseCache,
|
|
1361
1401
|
null, // no profile store on Kokoro-only
|
|
1402
|
+
null, // no self-voice imprint without live attribution
|
|
1362
1403
|
wiring?.coordinator ?? null,
|
|
1363
1404
|
wiring?.policy ?? null,
|
|
1364
1405
|
);
|
|
@@ -1743,6 +1784,7 @@ export class EngineVoiceBridge {
|
|
|
1743
1784
|
async transcribePcm(
|
|
1744
1785
|
args: TranscriptionAudio,
|
|
1745
1786
|
signal?: AbortSignal,
|
|
1787
|
+
onPartial?: (delta: string) => void,
|
|
1746
1788
|
): Promise<string> {
|
|
1747
1789
|
this.assertVoiceOn("transcribe audio");
|
|
1748
1790
|
if (signal?.aborted) {
|
|
@@ -1750,6 +1792,54 @@ export class EngineVoiceBridge {
|
|
|
1750
1792
|
? signal.reason
|
|
1751
1793
|
: new DOMException("Aborted", "AbortError");
|
|
1752
1794
|
}
|
|
1795
|
+
// Streaming path: when the caller wants partial transcripts (the
|
|
1796
|
+
// TRANSCRIPTION model handler forwards the runtime's onStreamChunk here),
|
|
1797
|
+
// drive the fused streaming-ASR session and emit each running partial as a
|
|
1798
|
+
// delta — the same per-token pipe as chat text. Feed in ~1s windows so the
|
|
1799
|
+
// decode surfaces partials progressively. Degrades gracefully: when the
|
|
1800
|
+
// fused build's streaming-ASR decoder is a stub, createStreamingTranscriber
|
|
1801
|
+
// resolves the fused batch adapter and the final transcript is emitted once.
|
|
1802
|
+
if (onPartial) {
|
|
1803
|
+
const transcriber = this.createStreamingTranscriber();
|
|
1804
|
+
let shown = 0;
|
|
1805
|
+
const emit = (full: string): void => {
|
|
1806
|
+
if (typeof full === "string" && full.length > shown) {
|
|
1807
|
+
const delta = full.slice(shown);
|
|
1808
|
+
shown = full.length;
|
|
1809
|
+
onPartial(delta);
|
|
1810
|
+
}
|
|
1811
|
+
};
|
|
1812
|
+
const unsub = transcriber.on((ev) => {
|
|
1813
|
+
if (ev.kind === "partial" || ev.kind === "final") {
|
|
1814
|
+
emit(ev.update.partial);
|
|
1815
|
+
}
|
|
1816
|
+
});
|
|
1817
|
+
const abort = () => transcriber.dispose();
|
|
1818
|
+
try {
|
|
1819
|
+
signal?.addEventListener("abort", abort, { once: true });
|
|
1820
|
+
const win = Math.max(1600, Math.round(args.sampleRate));
|
|
1821
|
+
for (let off = 0; off < args.pcm.length; off += win) {
|
|
1822
|
+
if (signal?.aborted) break;
|
|
1823
|
+
transcriber.feed({
|
|
1824
|
+
pcm: args.pcm.subarray(off, Math.min(off + win, args.pcm.length)),
|
|
1825
|
+
sampleRate: args.sampleRate,
|
|
1826
|
+
timestampMs: Math.round((off / args.sampleRate) * 1000),
|
|
1827
|
+
});
|
|
1828
|
+
}
|
|
1829
|
+
const final = await transcriber.flush();
|
|
1830
|
+
emit(final.partial);
|
|
1831
|
+
if (signal?.aborted) {
|
|
1832
|
+
throw signal.reason instanceof Error
|
|
1833
|
+
? signal.reason
|
|
1834
|
+
: new DOMException("Aborted", "AbortError");
|
|
1835
|
+
}
|
|
1836
|
+
return final.partial;
|
|
1837
|
+
} finally {
|
|
1838
|
+
unsub();
|
|
1839
|
+
signal?.removeEventListener("abort", abort);
|
|
1840
|
+
transcriber.dispose();
|
|
1841
|
+
}
|
|
1842
|
+
}
|
|
1753
1843
|
const backendBatch = this.backend as OmniVoiceBackend & {
|
|
1754
1844
|
transcribe?: (args: TranscriptionAudio) => Promise<string>;
|
|
1755
1845
|
};
|
|
@@ -1833,6 +1923,26 @@ export class EngineVoiceBridge {
|
|
|
1833
1923
|
events?: VoiceTurnEvents,
|
|
1834
1924
|
): Promise<"done" | "token-cap" | "cancelled"> {
|
|
1835
1925
|
this.assertVoiceOn("run a voice turn");
|
|
1926
|
+
// The turn's ASR transcript materializes inside `pipeline.run` (the
|
|
1927
|
+
// `onAsrComplete` event) while attribution runs in parallel, so the two
|
|
1928
|
+
// have to be correlated. `transcriptReady` resolves with the joined ASR
|
|
1929
|
+
// text the instant ASR finalizes; the attribution `.then` awaits it before
|
|
1930
|
+
// emitting `VOICE_TURN_OBSERVED` so the merge engine sees *what* was said,
|
|
1931
|
+
// not just *who* said it (#8786). The pipeline's `finally` resolves it with
|
|
1932
|
+
// the captured text (or "") so a cancelled/no-ASR turn never hangs the await.
|
|
1933
|
+
let asrTranscript = "";
|
|
1934
|
+
let resolveTranscript: (text: string) => void = () => {};
|
|
1935
|
+
const transcriptReady = new Promise<string>((resolve) => {
|
|
1936
|
+
resolveTranscript = resolve;
|
|
1937
|
+
});
|
|
1938
|
+
const turnEvents: VoiceTurnEvents = {
|
|
1939
|
+
...events,
|
|
1940
|
+
onAsrComplete(tokens) {
|
|
1941
|
+
asrTranscript = tokens.map((t) => t.text).join("");
|
|
1942
|
+
resolveTranscript(asrTranscript);
|
|
1943
|
+
events?.onAsrComplete?.(tokens);
|
|
1944
|
+
},
|
|
1945
|
+
};
|
|
1836
1946
|
// If a profileStore was wired, kick off speaker-attribution in parallel
|
|
1837
1947
|
// with ASR. The attribution uses the same PCM buffer as the transcriber
|
|
1838
1948
|
// but runs through the diarizer + encoder + profile-store independently.
|
|
@@ -1840,9 +1950,9 @@ export class EngineVoiceBridge {
|
|
|
1840
1950
|
// arrives via `onAttribution` asynchronously (possibly after onComplete).
|
|
1841
1951
|
if (
|
|
1842
1952
|
this.attributionPipeline &&
|
|
1843
|
-
(
|
|
1953
|
+
(turnEvents.onAttribution || this.eventRuntime)
|
|
1844
1954
|
) {
|
|
1845
|
-
const onAttribution =
|
|
1955
|
+
const onAttribution = turnEvents.onAttribution;
|
|
1846
1956
|
const attribution = this.attributionPipeline;
|
|
1847
1957
|
const eventRuntime = this.eventRuntime;
|
|
1848
1958
|
const liveAttribution = this.liveAttribution;
|
|
@@ -1859,14 +1969,23 @@ export class EngineVoiceBridge {
|
|
|
1859
1969
|
// output to the caller. Any caller with a profileStore +
|
|
1860
1970
|
// runtime gets diarization-driven gating for free.
|
|
1861
1971
|
if (eventRuntime) {
|
|
1972
|
+
const transcript = await transcriptReady;
|
|
1862
1973
|
const { handleLiveVoiceAttribution } = await import(
|
|
1863
1974
|
"../../runtime/voice-entity-binding.js"
|
|
1864
1975
|
);
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
1868
|
-
|
|
1869
|
-
|
|
1976
|
+
const selfVoiceSimilarity =
|
|
1977
|
+
output.observation?.embedding && this.selfVoiceImprint
|
|
1978
|
+
? await this.selfVoiceImprint.similarity(
|
|
1979
|
+
output.observation.embedding,
|
|
1980
|
+
)
|
|
1981
|
+
: null;
|
|
1982
|
+
await handleLiveVoiceAttribution(eventRuntime, output, {
|
|
1983
|
+
...resolveLiveAttributionOptions(liveAttribution, transcript),
|
|
1984
|
+
agentSpeaking: this.scheduler.bargeIn.isAgentSpeaking,
|
|
1985
|
+
...(typeof selfVoiceSimilarity === "number"
|
|
1986
|
+
? { selfVoiceSimilarity }
|
|
1987
|
+
: {}),
|
|
1988
|
+
});
|
|
1870
1989
|
}
|
|
1871
1990
|
onAttribution?.(output);
|
|
1872
1991
|
})
|
|
@@ -1881,11 +2000,14 @@ export class EngineVoiceBridge {
|
|
|
1881
2000
|
);
|
|
1882
2001
|
});
|
|
1883
2002
|
}
|
|
1884
|
-
const pipeline = this.buildPipeline(textRunner, config,
|
|
2003
|
+
const pipeline = this.buildPipeline(textRunner, config, turnEvents);
|
|
1885
2004
|
this.activePipeline = pipeline;
|
|
1886
2005
|
try {
|
|
1887
2006
|
return await pipeline.run(audio);
|
|
1888
2007
|
} finally {
|
|
2008
|
+
// Settle the transcript promise so a cancelled/no-ASR turn (where
|
|
2009
|
+
// `onAsrComplete` never fired) cannot leave the attribution await pending.
|
|
2010
|
+
resolveTranscript(asrTranscript);
|
|
1889
2011
|
if (this.activePipeline === pipeline) this.activePipeline = null;
|
|
1890
2012
|
}
|
|
1891
2013
|
}
|
|
@@ -1950,97 +2072,10 @@ export class EngineVoiceBridge {
|
|
|
1950
2072
|
}
|
|
1951
2073
|
}
|
|
1952
2074
|
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
const channels = 1;
|
|
1958
|
-
const bytesPerSample = 2;
|
|
1959
|
-
const dataBytes = pcm.length * bytesPerSample;
|
|
1960
|
-
const out = new Uint8Array(44 + dataBytes);
|
|
1961
|
-
const view = new DataView(out.buffer);
|
|
1962
|
-
writeAscii(out, 0, "RIFF");
|
|
1963
|
-
view.setUint32(4, 36 + dataBytes, true);
|
|
1964
|
-
writeAscii(out, 8, "WAVE");
|
|
1965
|
-
writeAscii(out, 12, "fmt ");
|
|
1966
|
-
view.setUint32(16, 16, true);
|
|
1967
|
-
view.setUint16(20, 1, true);
|
|
1968
|
-
view.setUint16(22, channels, true);
|
|
1969
|
-
view.setUint32(24, sampleRate, true);
|
|
1970
|
-
view.setUint32(28, sampleRate * channels * bytesPerSample, true);
|
|
1971
|
-
view.setUint16(32, channels * bytesPerSample, true);
|
|
1972
|
-
view.setUint16(34, bytesPerSample * 8, true);
|
|
1973
|
-
writeAscii(out, 36, "data");
|
|
1974
|
-
view.setUint32(40, dataBytes, true);
|
|
1975
|
-
|
|
1976
|
-
let offset = 44;
|
|
1977
|
-
for (const sample of pcm) {
|
|
1978
|
-
const clamped = Math.max(-1, Math.min(1, sample));
|
|
1979
|
-
const value = clamped < 0 ? clamped * 0x8000 : clamped * 0x7fff;
|
|
1980
|
-
view.setInt16(offset, Math.round(value), true);
|
|
1981
|
-
offset += bytesPerSample;
|
|
1982
|
-
}
|
|
1983
|
-
return out;
|
|
1984
|
-
}
|
|
1985
|
-
|
|
1986
|
-
export function decodeMonoPcm16Wav(bytes: Uint8Array): TranscriptionAudio {
|
|
1987
|
-
if (bytes.byteLength < 44) {
|
|
1988
|
-
throw new Error("[voice] WAV input is too short to contain a header");
|
|
1989
|
-
}
|
|
1990
|
-
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
1991
|
-
if (
|
|
1992
|
-
readAscii(bytes, 0, 4) !== "RIFF" ||
|
|
1993
|
-
readAscii(bytes, 8, 4) !== "WAVE" ||
|
|
1994
|
-
readAscii(bytes, 12, 4) !== "fmt "
|
|
1995
|
-
) {
|
|
1996
|
-
throw new Error("[voice] Local transcription expects mono PCM16 WAV bytes");
|
|
1997
|
-
}
|
|
1998
|
-
const audioFormat = view.getUint16(20, true);
|
|
1999
|
-
const channels = view.getUint16(22, true);
|
|
2000
|
-
const sampleRate = view.getUint32(24, true);
|
|
2001
|
-
const bitsPerSample = view.getUint16(34, true);
|
|
2002
|
-
if (audioFormat !== 1 || channels !== 1 || bitsPerSample !== 16) {
|
|
2003
|
-
throw new Error(
|
|
2004
|
-
`[voice] Local transcription expects mono PCM16 WAV (format=1 channels=1 bits=16); got format=${audioFormat} channels=${channels} bits=${bitsPerSample}`,
|
|
2005
|
-
);
|
|
2006
|
-
}
|
|
2007
|
-
|
|
2008
|
-
let pos = 36;
|
|
2009
|
-
while (pos + 8 <= bytes.byteLength) {
|
|
2010
|
-
const chunkId = readAscii(bytes, pos, 4);
|
|
2011
|
-
const chunkBytes = view.getUint32(pos + 4, true);
|
|
2012
|
-
const dataStart = pos + 8;
|
|
2013
|
-
if (chunkId === "data") {
|
|
2014
|
-
if (dataStart + chunkBytes > bytes.byteLength) {
|
|
2015
|
-
throw new Error("[voice] WAV data chunk exceeds input length");
|
|
2016
|
-
}
|
|
2017
|
-
if (chunkBytes % 2 !== 0) {
|
|
2018
|
-
throw new Error("[voice] WAV PCM16 data chunk has odd byte length");
|
|
2019
|
-
}
|
|
2020
|
-
const pcm = new Float32Array(chunkBytes / 2);
|
|
2021
|
-
for (let i = 0; i < pcm.length; i++) {
|
|
2022
|
-
pcm[i] = view.getInt16(dataStart + i * 2, true) / 0x8000;
|
|
2023
|
-
}
|
|
2024
|
-
return { pcm, sampleRate };
|
|
2025
|
-
}
|
|
2026
|
-
pos = dataStart + chunkBytes + (chunkBytes % 2);
|
|
2027
|
-
}
|
|
2028
|
-
throw new Error("[voice] WAV input is missing a data chunk");
|
|
2029
|
-
}
|
|
2030
|
-
|
|
2031
|
-
function writeAscii(out: Uint8Array, offset: number, text: string): void {
|
|
2032
|
-
for (let i = 0; i < text.length; i++) {
|
|
2033
|
-
out[offset + i] = text.charCodeAt(i);
|
|
2034
|
-
}
|
|
2035
|
-
}
|
|
2036
|
-
|
|
2037
|
-
function readAscii(bytes: Uint8Array, offset: number, length: number): string {
|
|
2038
|
-
let out = "";
|
|
2039
|
-
for (let i = 0; i < length; i++) {
|
|
2040
|
-
out += String.fromCharCode(bytes[offset + i]);
|
|
2041
|
-
}
|
|
2042
|
-
return out;
|
|
2043
|
-
}
|
|
2075
|
+
// The mono PCM16 WAV codec lives in the dependency-light `wav-codec.ts` so
|
|
2076
|
+
// corpus / fixture / test code can encode + decode WAV without dragging in this
|
|
2077
|
+
// heavy module. Re-exported here for the existing callers.
|
|
2078
|
+
export { decodeMonoPcm16Wav, encodeMonoPcm16Wav };
|
|
2044
2079
|
|
|
2045
2080
|
function readPositiveIntEnv(name: string): number | undefined {
|
|
2046
2081
|
const raw = process.env[name]?.trim();
|
|
@@ -2090,8 +2125,8 @@ function ensureContext(
|
|
|
2090
2125
|
* `createStreamingTranscriber` directly (the fused-only chain in
|
|
2091
2126
|
* `transcriber.ts`: fused streaming → fused batch → AsrUnavailableError).
|
|
2092
2127
|
*/
|
|
2093
|
-
function
|
|
2094
|
-
|
|
2128
|
+
function noopMmapRegion(id: string): MmapRegionHandle {
|
|
2129
|
+
return {
|
|
2095
2130
|
id,
|
|
2096
2131
|
path: "",
|
|
2097
2132
|
sizeBytes: 0,
|
|
@@ -2101,10 +2136,13 @@ function kokoroOnlyLifecycleLoaders(): VoiceLifecycleLoaders {
|
|
|
2101
2136
|
async release() {
|
|
2102
2137
|
// No mmap region to release.
|
|
2103
2138
|
},
|
|
2104
|
-
}
|
|
2139
|
+
};
|
|
2140
|
+
}
|
|
2141
|
+
|
|
2142
|
+
function kokoroOnlyLifecycleLoaders(): VoiceLifecycleLoaders {
|
|
2105
2143
|
return {
|
|
2106
|
-
loadTtsRegion: async () =>
|
|
2107
|
-
loadAsrRegion: async () =>
|
|
2144
|
+
loadTtsRegion: async () => noopMmapRegion("kokoro:tts"),
|
|
2145
|
+
loadAsrRegion: async () => noopMmapRegion("kokoro:asr"),
|
|
2108
2146
|
loadVoiceCaches: async () => ({
|
|
2109
2147
|
id: "kokoro:voice-caches",
|
|
2110
2148
|
async release() {},
|
|
@@ -2120,10 +2158,13 @@ function defaultLifecycleLoaders(
|
|
|
2120
2158
|
bundleRoot: string,
|
|
2121
2159
|
ffi: ElizaInferenceFfi | null,
|
|
2122
2160
|
ctx: ElizaInferenceContextHandle | FfiContextRef | null,
|
|
2161
|
+
options: { skipTtsRegion?: boolean } = {},
|
|
2123
2162
|
): VoiceLifecycleLoaders {
|
|
2124
2163
|
return {
|
|
2125
2164
|
loadTtsRegion: async () =>
|
|
2126
|
-
|
|
2165
|
+
options.skipTtsRegion === true
|
|
2166
|
+
? noopMmapRegion(`tts-override:${bundleRoot}`)
|
|
2167
|
+
: bundleMmapRegion(path.join(bundleRoot, "tts"), "tts", ffi, ctx),
|
|
2127
2168
|
loadAsrRegion: async () =>
|
|
2128
2169
|
bundleMmapRegion(path.join(bundleRoot, "asr"), "asr", ffi, ctx),
|
|
2129
2170
|
loadVoiceCaches: async () => ({
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
* LiveKit turn-detector — GGUF-backed binding (J1.d).
|
|
3
3
|
*
|
|
4
4
|
* The text-side turn-completion classifier formats the latest partial
|
|
5
|
-
* user transcript with the
|
|
6
|
-
*
|
|
5
|
+
* user transcript with the Gemma chat template, strips the trailing
|
|
6
|
+
* `<end_of_turn>`, and reads `P(<end_of_turn>)` from the next-token
|
|
7
7
|
* distribution. The upstream `livekit/turn-detector` ships an ONNX
|
|
8
8
|
* graph; this binding consumes the **GGUF** export published at
|
|
9
9
|
* `elizaos/eliza-1` under `voice/turn-detector/onnx/turn-detector-en-q8.gguf`
|
|
@@ -18,9 +18,9 @@
|
|
|
18
18
|
* (`.swarm/impl/I1-single-runtime.md` §B), the turn-detector was the
|
|
19
19
|
* cheapest of the four remaining ONNX surfaces to retire — the GGUF
|
|
20
20
|
* artifact was already published by H4 (see commit history), and the
|
|
21
|
-
* detector's architecture (
|
|
22
|
-
*
|
|
23
|
-
*
|
|
21
|
+
* detector's architecture (a small decoder + classification head on the
|
|
22
|
+
* `<end_of_turn>` logit) is already implemented in the fork. The work is
|
|
23
|
+
* wiring, not porting.
|
|
24
24
|
*
|
|
25
25
|
* No silent fallback (AGENTS.md §3): when `capacitor-llama` is
|
|
26
26
|
* unavailable, the GGUF is missing, or the model load fails, this
|
|
@@ -29,11 +29,11 @@
|
|
|
29
29
|
* legacy ONNX path; the binding itself never fabricates a probability.
|
|
30
30
|
*
|
|
31
31
|
* Tokenizer ownership: the GGUF carries its own tokenizer (BPE +
|
|
32
|
-
* special tokens, including
|
|
32
|
+
* special tokens, including `<end_of_turn>`); this binding does NOT
|
|
33
33
|
* import `@huggingface/transformers`. The `apply_chat_template`
|
|
34
34
|
* formatting is re-implemented here using the same template upstream
|
|
35
|
-
* uses (single-turn user message wrapped in
|
|
36
|
-
* — see `
|
|
35
|
+
* uses (single-turn user message wrapped in `<start_of_turn>user\n... \n`)
|
|
36
|
+
* — see `applyGemmaUserTemplate` below.
|
|
37
37
|
*
|
|
38
38
|
* --- Planned LoRA hot-swap path ---
|
|
39
39
|
*
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
* model. The chat target model (eliza-1-{2b,4b}) is already
|
|
42
42
|
* loaded for conversation — its next-token distribution after the
|
|
43
43
|
* chat-template-formatted partial transcript provides exactly the
|
|
44
|
-
* same `P(
|
|
44
|
+
* same `P(<end_of_turn>)` signal. A LoRA adapter (rank 8, ~5-10 MB)
|
|
45
45
|
* trained on `(transcript, eot_label)` pairs can shape that signal
|
|
46
46
|
* to match or beat the LiveKit baseline.
|
|
47
47
|
*
|
|
@@ -60,7 +60,7 @@
|
|
|
60
60
|
* LoRA hot-swap over standing up the LiveKit GGUF process.
|
|
61
61
|
* 3. The hot-swap path uses llama.cpp's `--lora` flag on the chat
|
|
62
62
|
* target. A single forward pass against the chat-template-formatted
|
|
63
|
-
* transcript yields the next-token logits; read
|
|
63
|
+
* transcript yields the next-token logits; read `<end_of_turn>`'s
|
|
64
64
|
* probability and return it.
|
|
65
65
|
* 4. Fail-closed: if adapter load fails or the SHA binding mismatches,
|
|
66
66
|
* throw `EotGgmlUnavailableError("model-load-failed", ...)`. No
|
|
@@ -87,7 +87,7 @@ export class EotGgmlUnavailableError extends Error {
|
|
|
87
87
|
| "native-missing"
|
|
88
88
|
| "model-missing"
|
|
89
89
|
| "model-load-failed"
|
|
90
|
-
| "tokenizer-missing-
|
|
90
|
+
| "tokenizer-missing-end-of-turn"
|
|
91
91
|
| "evaluate-failed"
|
|
92
92
|
| "invalid-input";
|
|
93
93
|
constructor(code: EotGgmlUnavailableError["code"], message: string) {
|
|
@@ -113,7 +113,7 @@ export const DEFAULT_LIVEKIT_TURN_DETECTOR_GGUF_INTL =
|
|
|
113
113
|
"voice/turn/intl/turn-detector-intl-q8.gguf";
|
|
114
114
|
|
|
115
115
|
/** Special-token literal the detector reads the probability of. */
|
|
116
|
-
export const
|
|
116
|
+
export const LIVEKIT_END_OF_TURN_TOKEN = "<end_of_turn>";
|
|
117
117
|
|
|
118
118
|
/**
|
|
119
119
|
* Default on-disk location for the staged GGUF. The bundle downloader
|
|
@@ -263,7 +263,7 @@ export interface LiveKitGgmlTurnDetectorOptions {
|
|
|
263
263
|
* `"v0.4.1-intl"`). Does not affect inference.
|
|
264
264
|
*/
|
|
265
265
|
revision?: string;
|
|
266
|
-
/** Max history tokens after
|
|
266
|
+
/** Max history tokens after Gemma-template wrapping. Default: 128. */
|
|
267
267
|
maxHistoryTokens?: number;
|
|
268
268
|
/** Optional model label for telemetry. */
|
|
269
269
|
model?: string;
|
|
@@ -277,8 +277,8 @@ export interface LiveKitGgmlTurnDetectorOptions {
|
|
|
277
277
|
|
|
278
278
|
/**
|
|
279
279
|
* Local GGUF-backed LiveKit turn-detector. Uses a `capacitor-llama`
|
|
280
|
-
* evaluation of the
|
|
281
|
-
*
|
|
280
|
+
* evaluation of the decoder, reading `P(<end_of_turn>)` from the next-token
|
|
281
|
+
* distribution after the truncated user-template prefix.
|
|
282
282
|
*
|
|
283
283
|
* One detector instance owns one `LlamaModel` + one `LlamaContext` +
|
|
284
284
|
* one `LlamaSequence`. `score()` resets the sequence between calls —
|
|
@@ -295,7 +295,7 @@ export class LiveKitGgmlTurnDetector implements EotClassifier {
|
|
|
295
295
|
llamaModel: NlcLlamaModel;
|
|
296
296
|
context: NlcLlamaContext;
|
|
297
297
|
sequence: NlcLlamaSequence;
|
|
298
|
-
|
|
298
|
+
endOfTurnTokenId: number;
|
|
299
299
|
}> | null = null;
|
|
300
300
|
|
|
301
301
|
constructor(opts: LiveKitGgmlTurnDetectorOptions) {
|
|
@@ -325,14 +325,14 @@ export class LiveKitGgmlTurnDetector implements EotClassifier {
|
|
|
325
325
|
const loaded = await this.load();
|
|
326
326
|
const transcript = normalizeTurnDetectorText(partialTranscript);
|
|
327
327
|
// Tokenize the user-templated transcript WITHOUT the trailing
|
|
328
|
-
//
|
|
328
|
+
// `<end_of_turn>` (the head must score that token as the next one).
|
|
329
329
|
// We do not pass `specialTokens=true` for the user text itself —
|
|
330
330
|
// only the template wrappers themselves are special tokens.
|
|
331
|
-
const promptText =
|
|
331
|
+
const promptText = applyGemmaUserTemplate(transcript);
|
|
332
332
|
|
|
333
333
|
// Tokenize: the template wrappers are special tokens; the GGUF's
|
|
334
334
|
// BPE handles the inner text. Passing `true` tells the tokenizer
|
|
335
|
-
// to recognize the
|
|
335
|
+
// to recognize the `<start_of_turn>` / `\n` literals as the real
|
|
336
336
|
// special-token ids. Truncate from the LEFT so the recent text
|
|
337
337
|
// is preserved.
|
|
338
338
|
let tokens = loaded.llamaModel.tokenize(promptText, true);
|
|
@@ -351,7 +351,7 @@ export class LiveKitGgmlTurnDetector implements EotClassifier {
|
|
|
351
351
|
await loaded.sequence.clearHistory();
|
|
352
352
|
|
|
353
353
|
// Feed every token, asking for the probability distribution only
|
|
354
|
-
// on the LAST one. That gives us P(token
|
|
354
|
+
// on the LAST one. That gives us P(token=<end_of_turn>) after the
|
|
355
355
|
// truncated template prefix.
|
|
356
356
|
const lastIdx = tokens.length - 1;
|
|
357
357
|
const input = tokens.map((tok, i) =>
|
|
@@ -371,10 +371,10 @@ export class LiveKitGgmlTurnDetector implements EotClassifier {
|
|
|
371
371
|
"[eot-ggml] controlledEvaluate did not return probabilities for the last token",
|
|
372
372
|
);
|
|
373
373
|
}
|
|
374
|
-
const
|
|
374
|
+
const endOfTurnProb = probs.get(loaded.endOfTurnTokenId) ?? 0;
|
|
375
375
|
|
|
376
376
|
return turnSignalFromProbability({
|
|
377
|
-
probability:
|
|
377
|
+
probability: endOfTurnProb,
|
|
378
378
|
transcript,
|
|
379
379
|
source: "livekit-turn-detector",
|
|
380
380
|
model: this.model,
|
|
@@ -398,7 +398,7 @@ export class LiveKitGgmlTurnDetector implements EotClassifier {
|
|
|
398
398
|
llamaModel: NlcLlamaModel;
|
|
399
399
|
context: NlcLlamaContext;
|
|
400
400
|
sequence: NlcLlamaSequence;
|
|
401
|
-
|
|
401
|
+
endOfTurnTokenId: number;
|
|
402
402
|
}> {
|
|
403
403
|
this.ready ??= this.loadInner();
|
|
404
404
|
return this.ready;
|
|
@@ -408,7 +408,7 @@ export class LiveKitGgmlTurnDetector implements EotClassifier {
|
|
|
408
408
|
llamaModel: NlcLlamaModel;
|
|
409
409
|
context: NlcLlamaContext;
|
|
410
410
|
sequence: NlcLlamaSequence;
|
|
411
|
-
|
|
411
|
+
endOfTurnTokenId: number;
|
|
412
412
|
}> {
|
|
413
413
|
try {
|
|
414
414
|
await access(this.ggufPath);
|
|
@@ -433,18 +433,21 @@ export class LiveKitGgmlTurnDetector implements EotClassifier {
|
|
|
433
433
|
);
|
|
434
434
|
}
|
|
435
435
|
|
|
436
|
-
// Resolve the
|
|
436
|
+
// Resolve the <end_of_turn> token id from the GGUF's BPE tokenizer.
|
|
437
437
|
// Passing `specialTokens=true` tells the tokenizer to recognize
|
|
438
438
|
// the literal as the corresponding special token.
|
|
439
|
-
const
|
|
440
|
-
|
|
439
|
+
const endOfTurnTokens = llamaModel.tokenize(
|
|
440
|
+
LIVEKIT_END_OF_TURN_TOKEN,
|
|
441
|
+
true,
|
|
442
|
+
);
|
|
443
|
+
if (endOfTurnTokens.length !== 1) {
|
|
441
444
|
await llamaModel.dispose().catch(() => undefined);
|
|
442
445
|
throw new EotGgmlUnavailableError(
|
|
443
|
-
"tokenizer-missing-
|
|
444
|
-
`[eot-ggml] tokenizer produced ${
|
|
446
|
+
"tokenizer-missing-end-of-turn",
|
|
447
|
+
`[eot-ggml] tokenizer produced ${endOfTurnTokens.length} tokens for <end_of_turn>; expected exactly 1. The GGUF's special-token table is missing the expected entry.`,
|
|
445
448
|
);
|
|
446
449
|
}
|
|
447
|
-
const
|
|
450
|
+
const endOfTurnTokenId = endOfTurnTokens[0];
|
|
448
451
|
|
|
449
452
|
let context: NlcLlamaContext;
|
|
450
453
|
try {
|
|
@@ -461,7 +464,7 @@ export class LiveKitGgmlTurnDetector implements EotClassifier {
|
|
|
461
464
|
}
|
|
462
465
|
|
|
463
466
|
const sequence = context.getSequence();
|
|
464
|
-
return { llamaModel, context, sequence,
|
|
467
|
+
return { llamaModel, context, sequence, endOfTurnTokenId };
|
|
465
468
|
}
|
|
466
469
|
}
|
|
467
470
|
|
|
@@ -550,17 +553,17 @@ function normalizeTurnDetectorText(text: string): string {
|
|
|
550
553
|
}
|
|
551
554
|
|
|
552
555
|
/**
|
|
553
|
-
* Apply the single-turn user
|
|
554
|
-
*
|
|
556
|
+
* Apply the single-turn user Gemma chat template, omitting the trailing
|
|
557
|
+
* `<end_of_turn>` so the detector head scores it as the next token.
|
|
555
558
|
*
|
|
556
559
|
* Upstream `livekit/turn-detector` formats:
|
|
557
560
|
*
|
|
558
|
-
*
|
|
561
|
+
* <start_of_turn>user\n{transcript}<end_of_turn>\n
|
|
559
562
|
*
|
|
560
|
-
* The detector strips the trailing
|
|
561
|
-
* `P(
|
|
562
|
-
* stopping where the
|
|
563
|
+
* The detector strips the trailing `<end_of_turn>\n` and reads
|
|
564
|
+
* `P(<end_of_turn>)` after the user content. We emit the prefix exactly,
|
|
565
|
+
* stopping where the `<end_of_turn>` would go.
|
|
563
566
|
*/
|
|
564
|
-
export function
|
|
565
|
-
return
|
|
567
|
+
export function applyGemmaUserTemplate(transcript: string): string {
|
|
568
|
+
return `<start_of_turn>user\n${transcript}`;
|
|
566
569
|
}
|