@elizaos/plugin-local-inference 2.0.3-beta.2 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -10
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +39647 -0
- package/dist/index.js.map +217 -0
- package/{src → dist}/local-inference-routes.d.ts +9 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts.map +1 -0
- package/{src → dist}/routes/compat-helpers.d.ts +1 -1
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/{src → dist}/routes/index.d.ts +1 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/{src → dist}/routes/live-diarization-route.d.ts +7 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/{src → dist}/routes/transcripts-routes.d.ts +8 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/{src → dist}/runtime/ensure-local-inference-handler.d.ts +8 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/{src → dist}/runtime/index.d.ts +1 -1
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/{src → dist}/runtime/voice-entity-binding.d.ts +10 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/{src → dist}/services/active-model.d.ts +28 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/{src → dist}/services/assignments.d.ts +16 -3
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/{src → dist}/services/backend.d.ts +110 -16
- package/dist/services/backend.d.ts.map +1 -0
- package/{src → dist}/services/bionic-host-loader.d.ts +21 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/{src → dist}/services/desktop-fused-ffi-backend-runtime.d.ts +22 -6
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/{src → dist}/services/device-tier.d.ts +19 -1
- package/dist/services/device-tier.d.ts.map +1 -0
- package/{src → dist}/services/downloader.d.ts +16 -4
- package/dist/services/downloader.d.ts.map +1 -0
- package/{src → dist}/services/engine.d.ts +43 -4
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-backend.d.ts +28 -7
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/{src → dist}/services/ffi-streaming-runner.d.ts +24 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/{src → dist}/services/imagegen/sd-cpp.d.ts +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/{src → dist}/services/index.d.ts +3 -1
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/{src → dist}/services/manifest/schema.d.ts +196 -6
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/{src → dist}/services/manifest/types.d.ts +3 -1
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/{src → dist}/services/memory-arbiter.d.ts +33 -3
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/{src → dist}/services/memory-monitor.d.ts +6 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/{src → dist}/services/registry.d.ts +11 -13
- package/dist/services/registry.d.ts.map +1 -0
- package/{src → dist}/services/router-handler.d.ts +2 -2
- package/dist/services/router-handler.d.ts.map +1 -0
- package/{src → dist}/services/routing-policy.d.ts +32 -9
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/{src → dist}/services/service.d.ts +1 -1
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/{src → dist}/services/types.d.ts +1 -1
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/{src → dist}/services/vision/index.d.ts +1 -1
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/{src → dist}/services/vision/types.d.ts +13 -4
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/{src → dist}/services/vision-embedding-cache.d.ts +1 -1
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/audio-frame-consumer.d.ts +82 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/{src → dist}/services/voice/eliza1-eot-scorer.d.ts +8 -8
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/{src → dist}/services/voice/embedding.d.ts +2 -3
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/{src → dist}/services/voice/engine-bridge.d.ts +8 -5
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier-ggml.d.ts +22 -22
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/{src → dist}/services/voice/eot-classifier.d.ts +9 -12
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/{src → dist}/services/voice/errors.d.ts +1 -1
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/{src → dist}/services/voice/expressive-tags.d.ts +5 -5
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/{src → dist}/services/voice/ffi-bindings.d.ts +26 -4
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/{src → dist}/services/voice/fused-eot-scorer.d.ts +6 -6
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/{src → dist}/services/voice/index.d.ts +8 -3
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-backend.d.ts +15 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-engine-discovery.d.ts +1 -1
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/kokoro-ffi-runtime.d.ts +3 -3
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/{src → dist}/services/voice/kokoro/pick-runtime.d.ts +1 -1
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/{src → dist}/services/voice/mic-source.d.ts +1 -1
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/{src → dist}/services/voice/partial-stabilizer.d.ts +1 -1
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/{src → dist}/services/voice/shared-resources.d.ts +14 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcriber.d.ts +4 -4
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-service.d.ts +20 -1
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/{src → dist}/services/voice/transcript-store.d.ts +12 -1
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/{src → dist}/services/voice/types.d.ts +6 -6
- package/dist/services/voice/types.d.ts.map +1 -0
- package/{src → dist}/services/voice/vad.d.ts +6 -5
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/{src → dist}/services/voice/voice-preset-format.d.ts +2 -2
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/{src → dist}/services/voice/wake-word-ggml.d.ts +8 -9
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +28 -9
- package/registry-entry.json +137 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +1 -1
- package/src/adapters/capacitor-llama/index.ts +28 -4
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +2 -2
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +1 -1
- package/src/local-inference-routes.test.ts +57 -11
- package/src/local-inference-routes.ts +90 -8
- package/src/provider.ts +32 -3
- package/src/routes/compat-helpers.ts +2 -1
- package/src/routes/index.ts +1 -0
- package/src/routes/live-diarization-route.test.ts +134 -0
- package/src/routes/live-diarization-route.ts +79 -3
- package/src/routes/local-inference-asr-route.test.ts +43 -2
- package/src/routes/local-inference-asr-route.ts +7 -4
- package/src/routes/local-inference-asr-transcribe.test.ts +4 -4
- package/src/routes/local-inference-asr-transcribe.ts +1 -1
- package/src/routes/local-inference-compat-routes.test.ts +3 -3
- package/src/routes/local-inference-compat-routes.ts +23 -56
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcripts-routes.test.ts +51 -0
- package/src/routes/transcripts-routes.ts +35 -3
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +203 -5
- package/src/runtime/ensure-local-inference-handler.ts +203 -11
- package/src/runtime/index.ts +4 -1
- package/src/runtime/mobile-local-inference-gate.test.ts +85 -2
- package/src/runtime/mobile-local-inference-gate.ts +60 -5
- package/src/runtime/voice-entity-binding.transcript.test.ts +29 -0
- package/src/runtime/voice-entity-binding.ts +46 -6
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +2 -2
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model.ts +211 -8
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +26 -0
- package/src/services/assignments.ts +52 -4
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +198 -19
- package/src/services/bionic-host-loader.test.ts +94 -1
- package/src/services/bionic-host-loader.ts +72 -0
- package/src/services/cache-bridge.test.ts +7 -7
- package/src/services/catalog.test.ts +32 -11
- package/src/services/catalog.ts +6 -0
- package/src/services/cloud-fallback.ts +1 -1
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +99 -7
- package/src/services/device-tier.test.ts +89 -2
- package/src/services/device-tier.ts +103 -11
- package/src/services/downloader.test.ts +199 -58
- package/src/services/downloader.ts +141 -27
- package/src/services/engine-direct-bundle.test.ts +38 -6
- package/src/services/engine.ts +291 -104
- package/src/services/ensure-local-artifacts.ts +1 -1
- package/src/services/ffi-llm-streaming-abi.ts +6 -3
- package/src/services/ffi-streaming-backend.ts +44 -8
- package/src/services/ffi-streaming-runner.test.ts +163 -3
- package/src/services/ffi-streaming-runner.ts +54 -1
- package/src/services/ffi-unload-ordering.test.ts +5 -1
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/hardware.test.ts +7 -2
- package/src/services/hardware.ts +28 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/sd-cpp.ts +6 -9
- package/src/services/index.ts +18 -0
- package/src/services/ios-llama-streaming.ts +1 -1
- package/src/services/kv-spill.ts +6 -5
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +84 -2
- package/src/services/manifest/index.ts +6 -0
- package/src/services/manifest/manifest.test.ts +156 -54
- package/src/services/manifest/schema.ts +160 -52
- package/src/services/manifest/types.ts +6 -0
- package/src/services/manifest/validator.ts +91 -25
- package/src/services/memory-arbiter.test.ts +139 -0
- package/src/services/memory-arbiter.ts +81 -15
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +24 -0
- package/src/services/memory-monitor.ts +12 -0
- package/src/services/mtp-doctor.ts +10 -2
- package/src/services/network-policy.ts +5 -5
- package/src/services/ram-budget-cache.test.ts +2 -1
- package/src/services/ram-budget.ts +0 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/registry.ts +25 -19
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.ts +43 -24
- package/src/services/routing-policy.test.ts +211 -23
- package/src/services/routing-policy.ts +92 -22
- package/src/services/service.test.ts +3 -3
- package/src/services/service.ts +22 -7
- package/src/services/transcription-priority.test.ts +2 -2
- package/src/services/types.ts +4 -0
- package/src/services/verify-on-device.test.ts +2 -2
- package/src/services/vision/hash.ts +1 -1
- package/src/services/vision/index.ts +2 -2
- package/src/services/vision/llama-server.ts +1 -1
- package/src/services/vision/types.ts +13 -4
- package/src/services/vision-embedding-cache.ts +1 -1
- package/src/services/voice/VOICE_WORKBENCH.md +71 -26
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +72 -2
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +29 -29
- package/src/services/voice/__tests__/streaming-asr.test.ts +1 -1
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +6 -8
- package/src/services/voice/audio-frame-consumer.test.ts +327 -1
- package/src/services/voice/audio-frame-consumer.ts +165 -5
- package/src/services/voice/barge-in.ts +2 -3
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +2 -2
- package/src/services/voice/e2e-harness.ts +175 -16
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +22 -22
- package/src/services/voice/embedding.ts +2 -3
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.ts +151 -110
- package/src/services/voice/eot-classifier-ggml.ts +42 -39
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +11 -122
- package/src/services/voice/errors.ts +2 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +8 -8
- package/src/services/voice/ffi-bindings.test.ts +10 -3
- package/src/services/voice/ffi-bindings.ts +177 -15
- package/src/services/voice/fused-eot-scorer.ts +17 -13
- package/src/services/voice/index.ts +33 -12
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +112 -1
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +88 -3
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +37 -201
- package/src/services/voice/kokoro/kokoro-backend.ts +16 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +1 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +3 -3
- package/src/services/voice/kokoro/pick-runtime.ts +1 -1
- package/src/services/voice/kokoro/runtime-selection.ts +28 -201
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +335 -2
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.ts +1 -1
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/partial-stabilizer.ts +1 -1
- package/src/services/voice/pipeline.ts +3 -4
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +23 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +85 -22
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.ts +4 -4
- package/src/services/voice/transcript-service.test.ts +58 -0
- package/src/services/voice/transcript-service.ts +64 -0
- package/src/services/voice/transcript-store.test.ts +36 -0
- package/src/services/voice/transcript-store.ts +32 -0
- package/src/services/voice/types.ts +7 -7
- package/src/services/voice/vad.test.ts +33 -15
- package/src/services/voice/vad.ts +25 -20
- package/src/services/voice/voice-budget.test.ts +0 -3
- package/src/services/voice/voice-budget.ts +6 -6
- package/src/services/voice/voice-duet.test.ts +1 -1
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +17 -4
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +133 -7
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-workbench-report.ts +58 -17
- package/src/services/voice/wake-word-ggml.ts +12 -13
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice-prewarm.ts +1 -1
- package/src/voice-workbench.ts +71 -0
- package/src/actions/generate-media.d.ts.map +0 -1
- package/src/actions/identify-speaker.d.ts.map +0 -1
- package/src/actions/transcription-control.d.ts.map +0 -1
- package/src/index.d.ts.map +0 -1
- package/src/local-inference-routes.d.ts.map +0 -1
- package/src/provider.d.ts.map +0 -1
- package/src/routes/compat-helpers.d.ts.map +0 -1
- package/src/routes/family-member-route.d.ts.map +0 -1
- package/src/routes/index.d.ts.map +0 -1
- package/src/routes/live-diarization-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-route.d.ts.map +0 -1
- package/src/routes/local-inference-asr-transcribe.d.ts.map +0 -1
- package/src/routes/local-inference-compat-routes.d.ts.map +0 -1
- package/src/routes/local-inference-tts-route.d.ts.map +0 -1
- package/src/routes/transcript-audio-store.d.ts.map +0 -1
- package/src/routes/transcripts-routes.d.ts.map +0 -1
- package/src/routes/voice-first-run-routes.d.ts.map +0 -1
- package/src/routes/voice-models-routes.d.ts.map +0 -1
- package/src/routes/voice-profile-plugin-routes.d.ts.map +0 -1
- package/src/routes/voice-profiles-management-routes.d.ts.map +0 -1
- package/src/routes/voice-speaker-profile-routes.d.ts.map +0 -1
- package/src/runtime/embedding-manager-support.d.ts.map +0 -1
- package/src/runtime/embedding-presets.d.ts.map +0 -1
- package/src/runtime/embedding-warmup-policy.d.ts.map +0 -1
- package/src/runtime/ensure-local-inference-handler.d.ts.map +0 -1
- package/src/runtime/index.d.ts.map +0 -1
- package/src/runtime/mobile-local-inference-gate.d.ts +0 -31
- package/src/runtime/mobile-local-inference-gate.d.ts.map +0 -1
- package/src/runtime/voice-entity-binding.d.ts.map +0 -1
- package/src/services/active-model.d.ts.map +0 -1
- package/src/services/assignments.d.ts.map +0 -1
- package/src/services/backend.d.ts.map +0 -1
- package/src/services/bionic-host-loader.d.ts.map +0 -1
- package/src/services/bundled-models.d.ts.map +0 -1
- package/src/services/cache-bridge.d.ts.map +0 -1
- package/src/services/catalog.d.ts +0 -10
- package/src/services/catalog.d.ts.map +0 -1
- package/src/services/checkpoint-client.d.ts.map +0 -1
- package/src/services/cloud-fallback.d.ts.map +0 -1
- package/src/services/conversation-registry.d.ts.map +0 -1
- package/src/services/desktop-fused-ffi-backend-runtime.d.ts.map +0 -1
- package/src/services/device-bridge.d.ts.map +0 -1
- package/src/services/device-resource-metrics.d.ts.map +0 -1
- package/src/services/device-tier.d.ts.map +0 -1
- package/src/services/downloader.d.ts.map +0 -1
- package/src/services/engine.d.ts.map +0 -1
- package/src/services/external-scanner.d.ts.map +0 -1
- package/src/services/ffi-streaming-backend.d.ts.map +0 -1
- package/src/services/ffi-streaming-runner.d.ts.map +0 -1
- package/src/services/gpu-detect.d.ts.map +0 -1
- package/src/services/handler-registry.d.ts.map +0 -1
- package/src/services/hardware.d.ts.map +0 -1
- package/src/services/hf-search.d.ts +0 -26
- package/src/services/hf-search.d.ts.map +0 -1
- package/src/services/hf-search.test.ts +0 -69
- package/src/services/hf-search.ts +0 -420
- package/src/services/image-description-runtime.d.ts.map +0 -1
- package/src/services/imagegen/aosp-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/backend-selector.d.ts.map +0 -1
- package/src/services/imagegen/coreml-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/errors.d.ts.map +0 -1
- package/src/services/imagegen/index.d.ts.map +0 -1
- package/src/services/imagegen/mflux.d.ts.map +0 -1
- package/src/services/imagegen/sd-cpp.d.ts.map +0 -1
- package/src/services/imagegen/tensorrt-unavailable.d.ts.map +0 -1
- package/src/services/imagegen/types.d.ts.map +0 -1
- package/src/services/index.d.ts.map +0 -1
- package/src/services/inference-capabilities.d.ts.map +0 -1
- package/src/services/inference-telemetry.d.ts.map +0 -1
- package/src/services/kv-spill.d.ts.map +0 -1
- package/src/services/latency-trace.d.ts.map +0 -1
- package/src/services/llm-streaming-binding.d.ts.map +0 -1
- package/src/services/load-args.d.ts.map +0 -1
- package/src/services/manifest/index.d.ts +0 -4
- package/src/services/manifest/index.d.ts.map +0 -1
- package/src/services/manifest/schema.d.ts.map +0 -1
- package/src/services/manifest/types.d.ts.map +0 -1
- package/src/services/manifest/validator.d.ts.map +0 -1
- package/src/services/memory-arbiter.d.ts.map +0 -1
- package/src/services/memory-monitor.d.ts.map +0 -1
- package/src/services/memory-pressure.d.ts.map +0 -1
- package/src/services/mtp-doctor.d.ts.map +0 -1
- package/src/services/network-policy.d.ts.map +0 -1
- package/src/services/paths.d.ts.map +0 -1
- package/src/services/planner-skeleton.d.ts.map +0 -1
- package/src/services/providers.d.ts.map +0 -1
- package/src/services/ram-budget.d.ts.map +0 -1
- package/src/services/readiness.d.ts.map +0 -1
- package/src/services/recommendation.d.ts.map +0 -1
- package/src/services/registry.d.ts.map +0 -1
- package/src/services/router-handler.d.ts.map +0 -1
- package/src/services/routing-policy.d.ts.map +0 -1
- package/src/services/routing-preferences.d.ts.map +0 -1
- package/src/services/runtime-target.d.ts.map +0 -1
- package/src/services/service.d.ts.map +0 -1
- package/src/services/session-pool.d.ts.map +0 -1
- package/src/services/structured-output/deterministic-repair.d.ts.map +0 -1
- package/src/services/structured-output.d.ts.map +0 -1
- package/src/services/system-memory.d.ts.map +0 -1
- package/src/services/types.d.ts.map +0 -1
- package/src/services/verify-on-device.d.ts.map +0 -1
- package/src/services/verify.d.ts.map +0 -1
- package/src/services/vision/aosp-unavailable.d.ts.map +0 -1
- package/src/services/vision/capacitor-llama.d.ts.map +0 -1
- package/src/services/vision/cloud-fallback.d.ts.map +0 -1
- package/src/services/vision/hash.d.ts.map +0 -1
- package/src/services/vision/index.d.ts.map +0 -1
- package/src/services/vision/llama-server.d.ts.map +0 -1
- package/src/services/vision/types.d.ts.map +0 -1
- package/src/services/vision/vast-fallback.d.ts.map +0 -1
- package/src/services/vision-embedding-cache.d.ts.map +0 -1
- package/src/services/voice/audio-frame-consumer.d.ts.map +0 -1
- package/src/services/voice/barge-in.d.ts.map +0 -1
- package/src/services/voice/cancellation-coordinator.d.ts.map +0 -1
- package/src/services/voice/checkpoint-manager.d.ts.map +0 -1
- package/src/services/voice/eager-context-builder.d.ts.map +0 -1
- package/src/services/voice/eliza1-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/embedding.d.ts.map +0 -1
- package/src/services/voice/emotion-attribution.d.ts.map +0 -1
- package/src/services/voice/engine-bridge.d.ts.map +0 -1
- package/src/services/voice/eot-classifier-ggml.d.ts.map +0 -1
- package/src/services/voice/eot-classifier.d.ts.map +0 -1
- package/src/services/voice/errors.d.ts.map +0 -1
- package/src/services/voice/expressive-tags.d.ts.map +0 -1
- package/src/services/voice/ffi-bindings.d.ts.map +0 -1
- package/src/services/voice/first-line-cache.d.ts.map +0 -1
- package/src/services/voice/fused-eot-scorer.d.ts.map +0 -1
- package/src/services/voice/index.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-backend.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/kokoro-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/phonemizer.d.ts.map +0 -1
- package/src/services/voice/kokoro/pick-runtime.d.ts.map +0 -1
- package/src/services/voice/kokoro/runtime-selection.d.ts +0 -92
- package/src/services/voice/kokoro/runtime-selection.d.ts.map +0 -1
- package/src/services/voice/kokoro/types.d.ts.map +0 -1
- package/src/services/voice/kokoro/voice-presets.d.ts.map +0 -1
- package/src/services/voice/kokoro/voices.d.ts.map +0 -1
- package/src/services/voice/lifecycle.d.ts.map +0 -1
- package/src/services/voice/live-diarization-session.d.ts +0 -96
- package/src/services/voice/live-diarization-session.d.ts.map +0 -1
- package/src/services/voice/mic-source.d.ts.map +0 -1
- package/src/services/voice/optimistic-policy.d.ts.map +0 -1
- package/src/services/voice/partial-stabilizer.d.ts.map +0 -1
- package/src/services/voice/phoneme-tokenizer.d.ts.map +0 -1
- package/src/services/voice/phrase-cache.d.ts.map +0 -1
- package/src/services/voice/phrase-chunker.d.ts.map +0 -1
- package/src/services/voice/pipeline-impls.d.ts.map +0 -1
- package/src/services/voice/pipeline.d.ts.map +0 -1
- package/src/services/voice/prefill-client.d.ts.map +0 -1
- package/src/services/voice/prefix-preserving-queue.d.ts.map +0 -1
- package/src/services/voice/profile-store.d.ts.map +0 -1
- package/src/services/voice/ring-buffer.d.ts.map +0 -1
- package/src/services/voice/rollback-queue.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-placeholder.d.ts.map +0 -1
- package/src/services/voice/samantha-preset-regenerator.d.ts.map +0 -1
- package/src/services/voice/scheduler.d.ts.map +0 -1
- package/src/services/voice/shared-resources.d.ts.map +0 -1
- package/src/services/voice/speaker/attribution-pipeline.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/diarizer.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-fused.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder-ggml.d.ts.map +0 -1
- package/src/services/voice/speaker/encoder.d.ts.map +0 -1
- package/src/services/voice/speaker-imprint.d.ts.map +0 -1
- package/src/services/voice/speaker-preset-cache.d.ts.map +0 -1
- package/src/services/voice/system-audio-sink.d.ts.map +0 -1
- package/src/services/voice/transcriber.d.ts.map +0 -1
- package/src/services/voice/transcript-knowledge.d.ts.map +0 -1
- package/src/services/voice/transcript-service.d.ts.map +0 -1
- package/src/services/voice/transcript-store.d.ts.map +0 -1
- package/src/services/voice/turn-controller.d.ts.map +0 -1
- package/src/services/voice/types.d.ts.map +0 -1
- package/src/services/voice/vad.d.ts.map +0 -1
- package/src/services/voice/voice-budget.d.ts.map +0 -1
- package/src/services/voice/voice-emotion-classifier.d.ts.map +0 -1
- package/src/services/voice/voice-preset-format.d.ts.map +0 -1
- package/src/services/voice/voice-profile-artifact.d.ts.map +0 -1
- package/src/services/voice/voice-profile-routes.d.ts.map +0 -1
- package/src/services/voice/voice-settings.d.ts +0 -82
- package/src/services/voice/voice-settings.d.ts.map +0 -1
- package/src/services/voice/voice-settings.ts +0 -172
- package/src/services/voice/voice-state-machine.d.ts.map +0 -1
- package/src/services/voice/wake-word-ggml.d.ts.map +0 -1
- package/src/services/voice/wake-word.d.ts.map +0 -1
- package/src/services/voice/wrap-with-first-line-cache.d.ts.map +0 -1
- package/src/services/voice-model-updater.d.ts.map +0 -1
- package/src/services/voice-prewarm.d.ts.map +0 -1
- /package/{src → dist}/actions/generate-media.d.ts +0 -0
- /package/{src → dist}/actions/identify-speaker.d.ts +0 -0
- /package/{src → dist}/actions/transcription-control.d.ts +0 -0
- /package/{src → dist}/index.d.ts +0 -0
- /package/{src → dist}/provider.d.ts +0 -0
- /package/{src → dist}/routes/family-member-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-route.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-asr-transcribe.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-compat-routes.d.ts +0 -0
- /package/{src → dist}/routes/local-inference-tts-route.d.ts +0 -0
- /package/{src → dist}/routes/transcript-audio-store.d.ts +0 -0
- /package/{src → dist}/routes/voice-first-run-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-models-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profile-plugin-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-profiles-management-routes.d.ts +0 -0
- /package/{src → dist}/routes/voice-speaker-profile-routes.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-manager-support.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-presets.d.ts +0 -0
- /package/{src → dist}/runtime/embedding-warmup-policy.d.ts +0 -0
- /package/{src → dist}/services/bundled-models.d.ts +0 -0
- /package/{src → dist}/services/cache-bridge.d.ts +0 -0
- /package/{src → dist}/services/checkpoint-client.d.ts +0 -0
- /package/{src → dist}/services/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/conversation-registry.d.ts +0 -0
- /package/{src → dist}/services/device-bridge.d.ts +0 -0
- /package/{src → dist}/services/device-resource-metrics.d.ts +0 -0
- /package/{src → dist}/services/external-scanner.d.ts +0 -0
- /package/{src → dist}/services/gpu-detect.d.ts +0 -0
- /package/{src → dist}/services/handler-registry.d.ts +0 -0
- /package/{src → dist}/services/hardware.d.ts +0 -0
- /package/{src → dist}/services/image-description-runtime.d.ts +0 -0
- /package/{src → dist}/services/imagegen/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/backend-selector.d.ts +0 -0
- /package/{src → dist}/services/imagegen/coreml-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/errors.d.ts +0 -0
- /package/{src → dist}/services/imagegen/index.d.ts +0 -0
- /package/{src → dist}/services/imagegen/mflux.d.ts +0 -0
- /package/{src → dist}/services/imagegen/tensorrt-unavailable.d.ts +0 -0
- /package/{src → dist}/services/imagegen/types.d.ts +0 -0
- /package/{src → dist}/services/inference-capabilities.d.ts +0 -0
- /package/{src → dist}/services/inference-telemetry.d.ts +0 -0
- /package/{src → dist}/services/kv-spill.d.ts +0 -0
- /package/{src → dist}/services/latency-trace.d.ts +0 -0
- /package/{src → dist}/services/llm-streaming-binding.d.ts +0 -0
- /package/{src → dist}/services/load-args.d.ts +0 -0
- /package/{src → dist}/services/manifest/validator.d.ts +0 -0
- /package/{src → dist}/services/memory-pressure.d.ts +0 -0
- /package/{src → dist}/services/mtp-doctor.d.ts +0 -0
- /package/{src → dist}/services/network-policy.d.ts +0 -0
- /package/{src → dist}/services/paths.d.ts +0 -0
- /package/{src → dist}/services/planner-skeleton.d.ts +0 -0
- /package/{src → dist}/services/providers.d.ts +0 -0
- /package/{src → dist}/services/ram-budget.d.ts +0 -0
- /package/{src → dist}/services/readiness.d.ts +0 -0
- /package/{src → dist}/services/recommendation.d.ts +0 -0
- /package/{src → dist}/services/routing-preferences.d.ts +0 -0
- /package/{src → dist}/services/runtime-target.d.ts +0 -0
- /package/{src → dist}/services/session-pool.d.ts +0 -0
- /package/{src → dist}/services/structured-output/deterministic-repair.d.ts +0 -0
- /package/{src → dist}/services/structured-output.d.ts +0 -0
- /package/{src → dist}/services/system-memory.d.ts +0 -0
- /package/{src → dist}/services/verify-on-device.d.ts +0 -0
- /package/{src → dist}/services/verify.d.ts +0 -0
- /package/{src → dist}/services/vision/aosp-unavailable.d.ts +0 -0
- /package/{src → dist}/services/vision/capacitor-llama.d.ts +0 -0
- /package/{src → dist}/services/vision/cloud-fallback.d.ts +0 -0
- /package/{src → dist}/services/vision/hash.d.ts +0 -0
- /package/{src → dist}/services/vision/llama-server.d.ts +0 -0
- /package/{src → dist}/services/vision/vast-fallback.d.ts +0 -0
- /package/{src → dist}/services/voice/barge-in.d.ts +0 -0
- /package/{src → dist}/services/voice/cancellation-coordinator.d.ts +0 -0
- /package/{src → dist}/services/voice/checkpoint-manager.d.ts +0 -0
- /package/{src → dist}/services/voice/eager-context-builder.d.ts +0 -0
- /package/{src → dist}/services/voice/emotion-attribution.d.ts +0 -0
- /package/{src → dist}/services/voice/first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/kokoro-runtime.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/phonemizer.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/types.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voice-presets.d.ts +0 -0
- /package/{src → dist}/services/voice/kokoro/voices.d.ts +0 -0
- /package/{src → dist}/services/voice/lifecycle.d.ts +0 -0
- /package/{src → dist}/services/voice/optimistic-policy.d.ts +0 -0
- /package/{src → dist}/services/voice/phoneme-tokenizer.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/phrase-chunker.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline-impls.d.ts +0 -0
- /package/{src → dist}/services/voice/pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/prefill-client.d.ts +0 -0
- /package/{src → dist}/services/voice/prefix-preserving-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/profile-store.d.ts +0 -0
- /package/{src → dist}/services/voice/ring-buffer.d.ts +0 -0
- /package/{src → dist}/services/voice/rollback-queue.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-placeholder.d.ts +0 -0
- /package/{src → dist}/services/voice/samantha-preset-regenerator.d.ts +0 -0
- /package/{src → dist}/services/voice/scheduler.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/attribution-pipeline.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/diarizer.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-fused.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder-ggml.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker/encoder.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-imprint.d.ts +0 -0
- /package/{src → dist}/services/voice/speaker-preset-cache.d.ts +0 -0
- /package/{src → dist}/services/voice/system-audio-sink.d.ts +0 -0
- /package/{src → dist}/services/voice/transcript-knowledge.d.ts +0 -0
- /package/{src → dist}/services/voice/turn-controller.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-budget.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-emotion-classifier.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-artifact.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-profile-routes.d.ts +0 -0
- /package/{src → dist}/services/voice/voice-state-machine.d.ts +0 -0
- /package/{src → dist}/services/voice/wake-word.d.ts +0 -0
- /package/{src → dist}/services/voice/wrap-with-first-line-cache.d.ts +0 -0
- /package/{src → dist}/services/voice-model-updater.d.ts +0 -0
- /package/{src → dist}/services/voice-prewarm.d.ts +0 -0
|
@@ -441,7 +441,7 @@ describe("FfiBatchTranscriber — flush() returns committed final on speech-end"
|
|
|
441
441
|
|
|
442
442
|
// Guard: real model tests are skipped — no models >2B are loaded.
|
|
443
443
|
it.skipIf(true)(
|
|
444
|
-
"SKIP — real
|
|
444
|
+
"SKIP — real Gemma ASR model: flush() of a live stream returns the full utterance",
|
|
445
445
|
async () => {
|
|
446
446
|
// This test requires a real libelizainference build with a bundled
|
|
447
447
|
// ASR model. It is always skipped in CI to avoid loading large models.
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import {
|
|
3
|
+
AGENT_VOICE_TIMBRE,
|
|
4
|
+
makeSpeechWithSilenceFixture,
|
|
5
|
+
type SpeakerTimbre,
|
|
6
|
+
speakerTimbreForIndex,
|
|
7
|
+
} from "./__test-helpers__/synthetic-speech";
|
|
8
|
+
import {
|
|
9
|
+
extractTimbreEmbedding,
|
|
10
|
+
OnlineSpeakerClusterer,
|
|
11
|
+
selfVoiceSimilarity,
|
|
12
|
+
} from "./acoustic-speaker-attribution";
|
|
13
|
+
import { scoreDiarizationTimeline } from "./e2e-harness";
|
|
14
|
+
import { cosineSimilarity } from "./speaker-imprint";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* The acoustic speaker attributor (#9427). These tests prove the diarization
|
|
18
|
+
* gate is NO LONGER tautological: the predicted label comes from the AUDIO, so
|
|
19
|
+
* it is high only when two clips really sound alike and low when they don't, and
|
|
20
|
+
* the DER scorer trips on a genuine misattribution. Everything is deterministic
|
|
21
|
+
* synthetic speech — no model, no network.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
const SR = 16_000;
|
|
25
|
+
function clip(
|
|
26
|
+
timbre: SpeakerTimbre,
|
|
27
|
+
seed: number,
|
|
28
|
+
speechSec = 1,
|
|
29
|
+
): Float32Array {
|
|
30
|
+
return makeSpeechWithSilenceFixture({
|
|
31
|
+
sampleRate: SR,
|
|
32
|
+
leadSilenceSec: 0.05,
|
|
33
|
+
speechSec,
|
|
34
|
+
tailSilenceSec: 0.05,
|
|
35
|
+
seed,
|
|
36
|
+
timbre,
|
|
37
|
+
}).pcm;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
describe("extractTimbreEmbedding", () => {
|
|
41
|
+
it("is near-identical for two utterances of the SAME voice", () => {
|
|
42
|
+
const t = speakerTimbreForIndex(0, 2);
|
|
43
|
+
const a = extractTimbreEmbedding(clip(t, 1), SR);
|
|
44
|
+
const b = extractTimbreEmbedding(clip(t, 999, 1.4), SR);
|
|
45
|
+
expect(cosineSimilarity(a, b)).toBeGreaterThan(0.9);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it("clearly separates two DIFFERENT voices", () => {
|
|
49
|
+
const a = extractTimbreEmbedding(clip(speakerTimbreForIndex(0, 2), 1), SR);
|
|
50
|
+
const b = extractTimbreEmbedding(clip(speakerTimbreForIndex(1, 2), 1), SR);
|
|
51
|
+
// Distinct timbres land well below the cluster threshold.
|
|
52
|
+
expect(cosineSimilarity(a, b)).toBeLessThan(0.5);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it("returns a zero vector for silence", () => {
|
|
56
|
+
const silent = new Float32Array(SR); // 1s of zeros
|
|
57
|
+
const emb = extractTimbreEmbedding(silent, SR);
|
|
58
|
+
expect(emb.every((v) => v === 0)).toBe(true);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it("is deterministic (byte-stable across calls)", () => {
|
|
62
|
+
const t = speakerTimbreForIndex(1, 3);
|
|
63
|
+
expect(extractTimbreEmbedding(clip(t, 7), SR)).toEqual(
|
|
64
|
+
extractTimbreEmbedding(clip(t, 7), SR),
|
|
65
|
+
);
|
|
66
|
+
});
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
describe("OnlineSpeakerClusterer", () => {
|
|
70
|
+
it("gives two distinct voices two distinct cluster ids", () => {
|
|
71
|
+
const c = new OnlineSpeakerClusterer();
|
|
72
|
+
const a = c.assignAudio(clip(speakerTimbreForIndex(0, 2), 1), SR);
|
|
73
|
+
const b = c.assignAudio(clip(speakerTimbreForIndex(1, 2), 2), SR);
|
|
74
|
+
expect(a).not.toBe(b);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it("re-uses one cluster for repeated turns of the same voice", () => {
|
|
78
|
+
const c = new OnlineSpeakerClusterer();
|
|
79
|
+
const t = speakerTimbreForIndex(0, 2);
|
|
80
|
+
const first = c.assignAudio(clip(t, 1), SR);
|
|
81
|
+
const second = c.assignAudio(clip(t, 2, 1.3), SR);
|
|
82
|
+
const third = c.assignAudio(clip(t, 3, 0.8), SR);
|
|
83
|
+
expect([second, third]).toEqual([first, first]);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it("tracks an A/B/A conversation as spk0/spk1/spk0", () => {
|
|
87
|
+
const c = new OnlineSpeakerClusterer();
|
|
88
|
+
const a = speakerTimbreForIndex(0, 2);
|
|
89
|
+
const b = speakerTimbreForIndex(1, 2);
|
|
90
|
+
expect([
|
|
91
|
+
c.assignAudio(clip(a, 1), SR),
|
|
92
|
+
c.assignAudio(clip(b, 2), SR),
|
|
93
|
+
c.assignAudio(clip(a, 3), SR),
|
|
94
|
+
]).toEqual(["spk0", "spk1", "spk0"]);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it("returns null for a silent turn (carries no speaker signal)", () => {
|
|
98
|
+
const c = new OnlineSpeakerClusterer();
|
|
99
|
+
expect(c.assignAudio(new Float32Array(SR), SR)).toBeNull();
|
|
100
|
+
});
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
describe("selfVoiceSimilarity", () => {
|
|
104
|
+
it("is high for the agent's own voice (an echo) and low for a person", () => {
|
|
105
|
+
const echo = clip(AGENT_VOICE_TIMBRE, 4242, 1.2);
|
|
106
|
+
const person = clip(speakerTimbreForIndex(0, 2), 1);
|
|
107
|
+
expect(selfVoiceSimilarity(echo, SR)).toBeGreaterThan(0.9);
|
|
108
|
+
expect(selfVoiceSimilarity(person, SR)).toBeLessThan(0.5);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it("is 0 for silence", () => {
|
|
112
|
+
expect(selfVoiceSimilarity(new Float32Array(SR), SR)).toBe(0);
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
describe("DER gate is no longer tautological (#9427)", () => {
|
|
117
|
+
const a = speakerTimbreForIndex(0, 2);
|
|
118
|
+
const b = speakerTimbreForIndex(1, 2);
|
|
119
|
+
|
|
120
|
+
it("PASSES (DER 0) when the clusterer attributes two voices correctly", () => {
|
|
121
|
+
const c = new OnlineSpeakerClusterer();
|
|
122
|
+
const turns = [
|
|
123
|
+
{ expectedLabel: "alice", startMs: 0, endMs: 1000 },
|
|
124
|
+
{ expectedLabel: "bob", startMs: 1000, endMs: 2000 },
|
|
125
|
+
{ expectedLabel: "alice", startMs: 2000, endMs: 3000 },
|
|
126
|
+
];
|
|
127
|
+
const audio = [clip(a, 1), clip(b, 2), clip(a, 3)];
|
|
128
|
+
const scored = scoreDiarizationTimeline(
|
|
129
|
+
turns.map((t, i) => ({
|
|
130
|
+
...t,
|
|
131
|
+
predictedLabel: c.assignAudio(audio[i], SR),
|
|
132
|
+
})),
|
|
133
|
+
{ maxDer: 0.2 },
|
|
134
|
+
);
|
|
135
|
+
expect(scored.der).toBe(0);
|
|
136
|
+
expect(scored.passed).toBe(true);
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
it("FAILS when two distinct speakers are acoustically merged", () => {
|
|
140
|
+
// Ground truth says alice then bob, but the SECOND turn's audio is also
|
|
141
|
+
// alice's voice — the blind clusterer (correctly) merges them, so the DER
|
|
142
|
+
// gate trips. A tautological gate (predicted = ground-truth label) could
|
|
143
|
+
// never catch this; this is exactly the defect #9427 closes.
|
|
144
|
+
const c = new OnlineSpeakerClusterer();
|
|
145
|
+
const scored = scoreDiarizationTimeline(
|
|
146
|
+
[
|
|
147
|
+
{
|
|
148
|
+
expectedLabel: "alice",
|
|
149
|
+
predictedLabel: c.assignAudio(clip(a, 1), SR),
|
|
150
|
+
startMs: 0,
|
|
151
|
+
endMs: 1000,
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
expectedLabel: "bob",
|
|
155
|
+
predictedLabel: c.assignAudio(clip(a, 2), SR), // alice's voice again
|
|
156
|
+
startMs: 1000,
|
|
157
|
+
endMs: 2000,
|
|
158
|
+
},
|
|
159
|
+
],
|
|
160
|
+
{ maxDer: 0.2 },
|
|
161
|
+
);
|
|
162
|
+
expect(scored.der).toBeGreaterThan(0.2);
|
|
163
|
+
expect(scored.passed).toBe(false);
|
|
164
|
+
});
|
|
165
|
+
});
|
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model-free acoustic speaker attribution for the Voice Workbench (#9147, #9427).
|
|
3
|
+
*
|
|
4
|
+
* The headless decision-logic lane used to copy the ground-truth speaker label
|
|
5
|
+
* straight into `predictedSpeakerLabel`, so the DER gate compared ground truth
|
|
6
|
+
* to itself and could never fail (#9427). This module is the real thing it
|
|
7
|
+
* needed: it derives a speaker label from the AUDIO and nothing else.
|
|
8
|
+
*
|
|
9
|
+
* extractTimbreEmbedding — a deterministic mean-MFCC voice embedding (the
|
|
10
|
+
* speaker's timbre), via a dependency-free FFT +
|
|
11
|
+
* mel filterbank + DCT. No model, no network.
|
|
12
|
+
* OnlineSpeakerClusterer — blind clustering: each turn is matched to the
|
|
13
|
+
* nearest running speaker centroid by cosine and
|
|
14
|
+
* takes that cluster's id, or seeds a new cluster.
|
|
15
|
+
* It never sees ground truth, so a wrong cluster
|
|
16
|
+
* surfaces as real Diarization Error Rate.
|
|
17
|
+
* selfVoiceSimilarity — cosine of a turn against the agent's own TTS-voice
|
|
18
|
+
* imprint — the acoustic self-echo signal that used
|
|
19
|
+
* to be a hardcoded `0.9`.
|
|
20
|
+
*
|
|
21
|
+
* Pure + deterministic (no `Date.now`/`Math.random`), so the gate is byte-stable
|
|
22
|
+
* in CI yet genuinely able to fail on a misattribution.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import {
|
|
26
|
+
AGENT_VOICE_TIMBRE,
|
|
27
|
+
makeSpeechWithSilenceFixture,
|
|
28
|
+
} from "./__test-helpers__/synthetic-speech";
|
|
29
|
+
import {
|
|
30
|
+
cosineSimilarity,
|
|
31
|
+
normalizeVoiceEmbedding,
|
|
32
|
+
updateVoiceImprintCentroid,
|
|
33
|
+
} from "./speaker-imprint";
|
|
34
|
+
|
|
35
|
+
const FFT_SIZE = 512;
|
|
36
|
+
const FRAME_LEN = 400; // 25 ms @ 16 kHz
|
|
37
|
+
const HOP = 160; // 10 ms @ 16 kHz
|
|
38
|
+
const N_MELS = 26;
|
|
39
|
+
const MEL_FMIN_HZ = 80;
|
|
40
|
+
/** MFCC coefficients kept (1..N_CEPS); c0 is dropped — it is just frame energy. */
|
|
41
|
+
const N_CEPS = 13;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Cosine at/above which a turn joins an existing speaker cluster. Tuned so that
|
|
45
|
+
* the same synthetic speaker (even degraded by the corpus's room noise/reverb)
|
|
46
|
+
* stays in one cluster, while two distinct timbres split — see
|
|
47
|
+
* `acoustic-speaker-attribution.test.ts`.
|
|
48
|
+
*/
|
|
49
|
+
export const DEFAULT_SPEAKER_CLUSTER_THRESHOLD = 0.5;
|
|
50
|
+
|
|
51
|
+
/** In-place iterative radix-2 Cooley–Tukey FFT (size must be a power of two). */
|
|
52
|
+
function fftRadix2(re: Float64Array, im: Float64Array): void {
|
|
53
|
+
const n = re.length;
|
|
54
|
+
for (let i = 1, j = 0; i < n; i++) {
|
|
55
|
+
let bit = n >> 1;
|
|
56
|
+
for (; j & bit; bit >>= 1) j ^= bit;
|
|
57
|
+
j ^= bit;
|
|
58
|
+
if (i < j) {
|
|
59
|
+
const tr = re[i];
|
|
60
|
+
re[i] = re[j];
|
|
61
|
+
re[j] = tr;
|
|
62
|
+
const ti = im[i];
|
|
63
|
+
im[i] = im[j];
|
|
64
|
+
im[j] = ti;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
for (let len = 2; len <= n; len <<= 1) {
|
|
68
|
+
const ang = (-2 * Math.PI) / len;
|
|
69
|
+
const wRe = Math.cos(ang);
|
|
70
|
+
const wIm = Math.sin(ang);
|
|
71
|
+
const half = len >> 1;
|
|
72
|
+
for (let i = 0; i < n; i += len) {
|
|
73
|
+
let curRe = 1;
|
|
74
|
+
let curIm = 0;
|
|
75
|
+
for (let k = 0; k < half; k++) {
|
|
76
|
+
const aRe = re[i + k];
|
|
77
|
+
const aIm = im[i + k];
|
|
78
|
+
const idx = i + k + half;
|
|
79
|
+
const bRe = re[idx] * curRe - im[idx] * curIm;
|
|
80
|
+
const bIm = re[idx] * curIm + im[idx] * curRe;
|
|
81
|
+
re[i + k] = aRe + bRe;
|
|
82
|
+
im[i + k] = aIm + bIm;
|
|
83
|
+
re[idx] = aRe - bRe;
|
|
84
|
+
im[idx] = aIm - bIm;
|
|
85
|
+
const nextRe = curRe * wRe - curIm * wIm;
|
|
86
|
+
curIm = curRe * wIm + curIm * wRe;
|
|
87
|
+
curRe = nextRe;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function hzToMel(hz: number): number {
|
|
94
|
+
return 2595 * Math.log10(1 + hz / 700);
|
|
95
|
+
}
|
|
96
|
+
function melToHz(mel: number): number {
|
|
97
|
+
return 700 * (10 ** (mel / 2595) - 1);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
let hannCache: Float64Array | null = null;
|
|
101
|
+
function hannWindow(): Float64Array {
|
|
102
|
+
if (hannCache) return hannCache;
|
|
103
|
+
const w = new Float64Array(FRAME_LEN);
|
|
104
|
+
for (let i = 0; i < FRAME_LEN; i++) {
|
|
105
|
+
w[i] = 0.5 - 0.5 * Math.cos((2 * Math.PI * i) / (FRAME_LEN - 1));
|
|
106
|
+
}
|
|
107
|
+
hannCache = w;
|
|
108
|
+
return w;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
let melCache: { sampleRate: number; filters: Float64Array[] } | null = null;
|
|
112
|
+
/** Triangular mel filterbank over the `FFT_SIZE/2 + 1` magnitude bins. */
|
|
113
|
+
function melFilterbank(sampleRate: number): Float64Array[] {
|
|
114
|
+
if (melCache && melCache.sampleRate === sampleRate) return melCache.filters;
|
|
115
|
+
const nBins = FFT_SIZE / 2 + 1;
|
|
116
|
+
const melMin = hzToMel(MEL_FMIN_HZ);
|
|
117
|
+
const melMax = hzToMel(sampleRate / 2);
|
|
118
|
+
const edges: number[] = [];
|
|
119
|
+
for (let i = 0; i < N_MELS + 2; i++) {
|
|
120
|
+
edges.push(melToHz(melMin + ((melMax - melMin) * i) / (N_MELS + 1)));
|
|
121
|
+
}
|
|
122
|
+
const binHz = (b: number) => (b * sampleRate) / FFT_SIZE;
|
|
123
|
+
const filters: Float64Array[] = [];
|
|
124
|
+
for (let m = 1; m <= N_MELS; m++) {
|
|
125
|
+
const lo = edges[m - 1];
|
|
126
|
+
const ctr = edges[m];
|
|
127
|
+
const hi = edges[m + 1];
|
|
128
|
+
const f = new Float64Array(nBins);
|
|
129
|
+
for (let b = 0; b < nBins; b++) {
|
|
130
|
+
const hz = binHz(b);
|
|
131
|
+
if (hz >= lo && hz <= ctr && ctr > lo) f[b] = (hz - lo) / (ctr - lo);
|
|
132
|
+
else if (hz > ctr && hz <= hi && hi > ctr) f[b] = (hi - hz) / (hi - ctr);
|
|
133
|
+
}
|
|
134
|
+
filters.push(f);
|
|
135
|
+
}
|
|
136
|
+
melCache = { sampleRate, filters };
|
|
137
|
+
return filters;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/** Sinusoidal cepstral lifter weight; L=22 is the long-standing HTK default. */
|
|
141
|
+
const CEPSTRAL_LIFTER_L = 22;
|
|
142
|
+
let lifterCache: number[] | null = null;
|
|
143
|
+
function cepstralLifter(): number[] {
|
|
144
|
+
if (lifterCache) return lifterCache;
|
|
145
|
+
const w: number[] = [];
|
|
146
|
+
for (let k = 1; k <= N_CEPS; k++) {
|
|
147
|
+
w.push(
|
|
148
|
+
1 + (CEPSTRAL_LIFTER_L / 2) * Math.sin((Math.PI * k) / CEPSTRAL_LIFTER_L),
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
lifterCache = w;
|
|
152
|
+
return w;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
let dctCache: Float64Array[] | null = null;
|
|
156
|
+
/** DCT-II rows for cepstral coefficients 1..N_CEPS (c0 omitted). */
|
|
157
|
+
function dctRows(): Float64Array[] {
|
|
158
|
+
if (dctCache) return dctCache;
|
|
159
|
+
const rows: Float64Array[] = [];
|
|
160
|
+
for (let k = 1; k <= N_CEPS; k++) {
|
|
161
|
+
const row = new Float64Array(N_MELS);
|
|
162
|
+
for (let n = 0; n < N_MELS; n++) {
|
|
163
|
+
row[n] = Math.cos((Math.PI * k * (n + 0.5)) / N_MELS);
|
|
164
|
+
}
|
|
165
|
+
rows.push(row);
|
|
166
|
+
}
|
|
167
|
+
dctCache = rows;
|
|
168
|
+
return rows;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Mean-MFCC voice embedding of a mono PCM clip: the average cepstrum over its
|
|
173
|
+
* voiced frames, L2-normalized. Captures vocal-tract timbre (the speaker), not
|
|
174
|
+
* the words. Returns a zero vector for silence / too-short audio.
|
|
175
|
+
*/
|
|
176
|
+
export function extractTimbreEmbedding(
|
|
177
|
+
pcm: Float32Array,
|
|
178
|
+
sampleRate: number,
|
|
179
|
+
): number[] {
|
|
180
|
+
const filters = melFilterbank(sampleRate);
|
|
181
|
+
const dct = dctRows();
|
|
182
|
+
const hann = hannWindow();
|
|
183
|
+
const nBins = FFT_SIZE / 2 + 1;
|
|
184
|
+
|
|
185
|
+
// Pass 1: per-frame energy, to keep only voiced frames (relative to the peak).
|
|
186
|
+
const starts: number[] = [];
|
|
187
|
+
const energies: number[] = [];
|
|
188
|
+
let maxEnergy = 0;
|
|
189
|
+
for (let s = 0; s + FRAME_LEN <= pcm.length; s += HOP) {
|
|
190
|
+
let e = 0;
|
|
191
|
+
for (let i = 0; i < FRAME_LEN; i++) {
|
|
192
|
+
const v = pcm[s + i];
|
|
193
|
+
e += v * v;
|
|
194
|
+
}
|
|
195
|
+
starts.push(s);
|
|
196
|
+
energies.push(e);
|
|
197
|
+
if (e > maxEnergy) maxEnergy = e;
|
|
198
|
+
}
|
|
199
|
+
if (maxEnergy <= 1e-9) return new Array<number>(N_CEPS).fill(0);
|
|
200
|
+
const floor = maxEnergy * 0.1;
|
|
201
|
+
|
|
202
|
+
const re = new Float64Array(FFT_SIZE);
|
|
203
|
+
const im = new Float64Array(FFT_SIZE);
|
|
204
|
+
const accum = new Float64Array(N_CEPS);
|
|
205
|
+
let voiced = 0;
|
|
206
|
+
for (let fi = 0; fi < starts.length; fi++) {
|
|
207
|
+
if (energies[fi] < floor) continue;
|
|
208
|
+
const s = starts[fi];
|
|
209
|
+
re.fill(0);
|
|
210
|
+
im.fill(0);
|
|
211
|
+
for (let i = 0; i < FRAME_LEN; i++) re[i] = pcm[s + i] * hann[i];
|
|
212
|
+
fftRadix2(re, im);
|
|
213
|
+
// Log mel-band energies.
|
|
214
|
+
const logMel = new Float64Array(N_MELS);
|
|
215
|
+
for (let m = 0; m < N_MELS; m++) {
|
|
216
|
+
const filt = filters[m];
|
|
217
|
+
let acc = 0;
|
|
218
|
+
for (let b = 0; b < nBins; b++) {
|
|
219
|
+
const power = re[b] * re[b] + im[b] * im[b];
|
|
220
|
+
acc += power * filt[b];
|
|
221
|
+
}
|
|
222
|
+
logMel[m] = Math.log(acc + 1e-10);
|
|
223
|
+
}
|
|
224
|
+
// DCT-II → cepstrum (coefficients 1..N_CEPS).
|
|
225
|
+
for (let k = 0; k < N_CEPS; k++) {
|
|
226
|
+
const row = dct[k];
|
|
227
|
+
let c = 0;
|
|
228
|
+
for (let n = 0; n < N_MELS; n++) c += logMel[n] * row[n];
|
|
229
|
+
accum[k] += c;
|
|
230
|
+
}
|
|
231
|
+
voiced += 1;
|
|
232
|
+
}
|
|
233
|
+
if (voiced === 0) return new Array<number>(N_CEPS).fill(0);
|
|
234
|
+
const lifter = cepstralLifter();
|
|
235
|
+
const mean = new Array<number>(N_CEPS);
|
|
236
|
+
// Sinusoidal liftering de-emphasizes the dominant low-order cepstral tilt (the
|
|
237
|
+
// glottal source slope, shared by every voice) and emphasizes the mid/high
|
|
238
|
+
// coefficients that carry the formant pattern — i.e. the speaker signal.
|
|
239
|
+
for (let k = 0; k < N_CEPS; k++) mean[k] = (accum[k] / voiced) * lifter[k];
|
|
240
|
+
return normalizeVoiceEmbedding(mean);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function isZeroVector(v: ArrayLike<number>): boolean {
|
|
244
|
+
for (let i = 0; i < v.length; i++) if (v[i] !== 0) return false;
|
|
245
|
+
return true;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
interface SpeakerCluster {
|
|
249
|
+
id: string;
|
|
250
|
+
centroid: number[];
|
|
251
|
+
count: number;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Blind online speaker clustering. Each turn embedding is matched to the nearest
|
|
256
|
+
* running centroid; if the best cosine clears the threshold it joins that
|
|
257
|
+
* cluster (and updates its centroid), otherwise it seeds a new one. The cluster
|
|
258
|
+
* ids it returns (`spk0`, `spk1`, …) are label-agnostic — DER maps them onto the
|
|
259
|
+
* ground-truth speakers optimally — so the clusterer never needs (and never
|
|
260
|
+
* sees) the true labels.
|
|
261
|
+
*/
|
|
262
|
+
export class OnlineSpeakerClusterer {
|
|
263
|
+
private readonly clusters: SpeakerCluster[] = [];
|
|
264
|
+
private readonly threshold: number;
|
|
265
|
+
|
|
266
|
+
constructor(threshold: number = DEFAULT_SPEAKER_CLUSTER_THRESHOLD) {
|
|
267
|
+
this.threshold = threshold;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/** Assign an embedding to a cluster id, or `null` if it carries no signal. */
|
|
271
|
+
assign(embedding: ArrayLike<number>): string | null {
|
|
272
|
+
if (embedding.length === 0 || isZeroVector(embedding)) return null;
|
|
273
|
+
const emb = normalizeVoiceEmbedding(embedding);
|
|
274
|
+
let best: SpeakerCluster | null = null;
|
|
275
|
+
let bestSim = Number.NEGATIVE_INFINITY;
|
|
276
|
+
for (const cluster of this.clusters) {
|
|
277
|
+
const sim = cosineSimilarity(emb, cluster.centroid);
|
|
278
|
+
if (sim > bestSim) {
|
|
279
|
+
bestSim = sim;
|
|
280
|
+
best = cluster;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
if (best && bestSim >= this.threshold) {
|
|
284
|
+
const updated = updateVoiceImprintCentroid({
|
|
285
|
+
centroidEmbedding: best.centroid,
|
|
286
|
+
sampleCount: best.count,
|
|
287
|
+
observationEmbedding: emb,
|
|
288
|
+
});
|
|
289
|
+
best.centroid = updated.centroidEmbedding;
|
|
290
|
+
best.count = updated.sampleCount;
|
|
291
|
+
return best.id;
|
|
292
|
+
}
|
|
293
|
+
const id = `spk${this.clusters.length}`;
|
|
294
|
+
this.clusters.push({ id, centroid: emb, count: 1 });
|
|
295
|
+
return id;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
/** Embed `pcm` and assign it to a cluster id (or `null` for silence). */
|
|
299
|
+
assignAudio(pcm: Float32Array, sampleRate: number): string | null {
|
|
300
|
+
return this.assign(extractTimbreEmbedding(pcm, sampleRate));
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
let agentVoiceCache: { sampleRate: number; embedding: number[] } | null = null;
|
|
305
|
+
/** The agent's own TTS-voice imprint embedding (memoized per sample rate). */
|
|
306
|
+
function agentVoiceEmbedding(sampleRate: number): number[] {
|
|
307
|
+
if (agentVoiceCache && agentVoiceCache.sampleRate === sampleRate) {
|
|
308
|
+
return agentVoiceCache.embedding;
|
|
309
|
+
}
|
|
310
|
+
const ref = makeSpeechWithSilenceFixture({
|
|
311
|
+
sampleRate,
|
|
312
|
+
leadSilenceSec: 0.05,
|
|
313
|
+
speechSec: 1.5,
|
|
314
|
+
tailSilenceSec: 0.05,
|
|
315
|
+
seed: 0xa6e7,
|
|
316
|
+
timbre: AGENT_VOICE_TIMBRE,
|
|
317
|
+
});
|
|
318
|
+
const embedding = extractTimbreEmbedding(ref.pcm, sampleRate);
|
|
319
|
+
agentVoiceCache = { sampleRate, embedding };
|
|
320
|
+
return embedding;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Cosine similarity (clamped to 0..1) of a turn's audio against the agent's own
|
|
325
|
+
* synthetic-voice imprint. High ⇒ the agent is hearing ITSELF (its TTS bled back
|
|
326
|
+
* into the mic). This is the acoustic self-echo signal the respond-gate consumes
|
|
327
|
+
* as `selfVoiceSimilarity` — a real measurement, not a constant.
|
|
328
|
+
*/
|
|
329
|
+
export function selfVoiceSimilarity(
|
|
330
|
+
pcm: Float32Array,
|
|
331
|
+
sampleRate: number,
|
|
332
|
+
): number {
|
|
333
|
+
const emb = extractTimbreEmbedding(pcm, sampleRate);
|
|
334
|
+
if (isZeroVector(emb)) return 0;
|
|
335
|
+
return Math.max(0, cosineSimilarity(emb, agentVoiceEmbedding(sampleRate)));
|
|
336
|
+
}
|
|
@@ -13,18 +13,16 @@
|
|
|
13
13
|
* - the fused lib is not resolvable (`ELIZA_INFERENCE_LIBRARY` /
|
|
14
14
|
* `ELIZA_INFERENCE_LIB_DIR`, or a build under build-static-fused),
|
|
15
15
|
* - no Eliza-1 ASR bundle is provided (`ELIZA_ASR_BUNDLE`, or the default
|
|
16
|
-
* `~/.eliza/local-inference/models/eliza-1-
|
|
16
|
+
* `~/.eliza/local-inference/models/eliza-1-2b.bundle`),
|
|
17
17
|
* - the `freeman.wav` speech submodule isn't checked out,
|
|
18
18
|
* - or the loaded build predates v12 (`timedAsrSupported() === false`).
|
|
19
19
|
* Runs via `bun test` (the post-merge lane runner — bun:ffi + `globalThis.Bun`);
|
|
20
20
|
* `*.real.test.ts` is excluded from the default `vitest.config.ts` lane.
|
|
21
21
|
*
|
|
22
|
-
* Reproduce locally
|
|
23
|
-
* B=~/.eliza/local-inference/models/eliza-1-asr
|
|
24
|
-
*
|
|
25
|
-
*
|
|
26
|
-
* mv "$B/asr/Qwen3-ASR-0.6B-Q8_0.gguf" "$B/asr/eliza-1-asr.gguf"
|
|
27
|
-
* mv "$B/asr/mmproj-Qwen3-ASR-0.6B-Q8_0.gguf" "$B/asr/eliza-1-asr-mmproj.gguf"
|
|
22
|
+
* Reproduce locally once Gemma ASR artifacts are staged:
|
|
23
|
+
* B=~/.eliza/local-inference/models/eliza-1-gemma-asr.bundle
|
|
24
|
+
* mkdir -p "$B/asr"
|
|
25
|
+
* # Place the built or downloaded Gemma ASR runtime artifacts under "$B/asr".
|
|
28
26
|
* ELIZA_ASR_BUNDLE="$B" ELIZA_INFERENCE_LIBRARY=<built libelizainference.so> \
|
|
29
27
|
* bun test src/services/voice/asr-timed.real.test.ts
|
|
30
28
|
* The fused `eliza_pick_asr_files` resolves an ASR-only bundle from
|
|
@@ -62,7 +60,7 @@ const LIB_PATH =
|
|
|
62
60
|
|
|
63
61
|
const BUNDLE =
|
|
64
62
|
process.env.ELIZA_ASR_BUNDLE?.trim() ||
|
|
65
|
-
path.join(os.homedir(), ".eliza/local-inference/models/eliza-1-
|
|
63
|
+
path.join(os.homedir(), ".eliza/local-inference/models/eliza-1-2b.bundle");
|
|
66
64
|
const HAVE_BUNDLE = existsSync(BUNDLE);
|
|
67
65
|
|
|
68
66
|
const FREEMAN_WAV = fileURLToPath(
|