@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/dist/actions/generate-media.d.ts +59 -0
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts +23 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts +29 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts +8 -37
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +38979 -430
- package/dist/index.js.map +217 -0
- package/dist/local-inference-routes.d.ts +47 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts +21 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/routes/compat-helpers.d.ts +18 -0
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts +62 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/dist/routes/index.d.ts +20 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/dist/routes/live-diarization-route.d.ts +33 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts +4 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts +16 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts +7 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts +15 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/dist/routes/transcripts-routes.d.ts +44 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts +62 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts +62 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts +77 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts +16 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/dist/runtime/index.d.ts +15 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/dist/runtime/voice-entity-binding.d.ts +113 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/dist/services/active-model.d.ts +310 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/dist/services/assignments.d.ts +84 -0
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/dist/services/backend.d.ts +440 -0
- package/dist/services/backend.d.ts.map +1 -0
- package/dist/services/bionic-host-loader.d.ts +67 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts +34 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts +206 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts +109 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts +102 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts +142 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts +188 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts +149 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/dist/services/device-tier.d.ts +133 -0
- package/dist/services/device-tier.d.ts.map +1 -0
- package/dist/services/downloader.d.ts +94 -0
- package/dist/services/downloader.d.ts.map +1 -0
- package/dist/services/engine.d.ts +579 -0
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts +17 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/dist/services/ffi-streaming-backend.d.ts +201 -0
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/dist/services/ffi-streaming-runner.d.ts +146 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts +56 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts +72 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts +63 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts +14 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts +118 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts +16 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts +58 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts +74 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts +181 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts +181 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/dist/services/index.d.ts +31 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts +132 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts +59 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts +189 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts +346 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts +96 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts +82 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/dist/services/manifest/schema.d.ts +903 -0
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/dist/services/manifest/types.d.ts +32 -0
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts +66 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/dist/services/memory-arbiter.d.ts +348 -0
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/dist/services/memory-monitor.d.ts +128 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts +130 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts +13 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts +127 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts +6 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts +124 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts +38 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts +110 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts +9 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts +111 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/dist/services/registry.d.ts +33 -0
- package/dist/services/registry.d.ts.map +1 -0
- package/dist/services/router-handler.d.ts +92 -0
- package/dist/services/router-handler.d.ts.map +1 -0
- package/dist/services/routing-policy.d.ts +92 -0
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts +8 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts +98 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/dist/services/service.d.ts +128 -0
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts +72 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts +311 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts +33 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/dist/services/types.d.ts +19 -0
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts +34 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts +8 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts +115 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts +99 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts +47 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts +71 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/dist/services/vision/index.d.ts +95 -0
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts +73 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/dist/services/vision/types.d.ts +162 -0
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts +18 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/dist/services/vision-embedding-cache.d.ts +98 -0
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts +112 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts +199 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts +170 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/dist/services/voice/embedding.d.ts +132 -0
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts +68 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/dist/services/voice/engine-bridge.d.ts +762 -0
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier.d.ts +211 -0
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/dist/services/voice/errors.d.ts +20 -0
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/dist/services/voice/expressive-tags.d.ts +158 -0
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/dist/services/voice/ffi-bindings.d.ts +696 -0
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts +181 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/index.d.ts +96 -0
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts +82 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts +30 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts +135 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/dist/services/voice/mic-source.d.ts +136 -0
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts +109 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/dist/services/voice/partial-stabilizer.d.ts +73 -0
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts +76 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts +62 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts +151 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts +216 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts +123 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts +248 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts +40 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts +24 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts +146 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/dist/services/voice/shared-resources.d.ts +204 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts +75 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts +37 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts +83 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts +73 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/dist/services/voice/transcriber.d.ts +244 -0
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts +37 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/dist/services/voice/transcript-service.d.ts +60 -0
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/dist/services/voice/transcript-store.d.ts +64 -0
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts +183 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/dist/services/voice/types.d.ts +643 -0
- package/dist/services/voice/types.d.ts.map +1 -0
- package/dist/services/voice/vad.d.ts +283 -0
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts +241 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/dist/services/voice/voice-preset-format.d.ts +158 -0
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts +83 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts +364 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/dist/services/voice/wake-word-ggml.d.ts +100 -0
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts +255 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts +240 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts +3 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +101 -15
- package/registry-entry.json +137 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +831 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.test.ts +390 -0
- package/src/local-inference-routes.ts +1625 -0
- package/src/provider.ts +1111 -0
- package/src/routes/compat-helpers.ts +275 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.ts +61 -0
- package/src/routes/live-diarization-route.test.ts +347 -0
- package/src/routes/live-diarization-route.ts +198 -0
- package/src/routes/local-inference-asr-route.test.ts +246 -0
- package/src/routes/local-inference-asr-route.ts +166 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +775 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.test.ts +195 -0
- package/src/routes/transcripts-routes.ts +191 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
- package/src/runtime/ensure-local-inference-handler.ts +1640 -0
- package/src/runtime/index.ts +36 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
- package/src/runtime/mobile-local-inference-gate.ts +99 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
- package/src/runtime/voice-entity-binding.ts +368 -0
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.ts +1416 -0
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +106 -0
- package/src/services/assignments.ts +278 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +791 -0
- package/src/services/bionic-host-loader.test.ts +226 -0
- package/src/services/bionic-host-loader.ts +252 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.test.ts +259 -0
- package/src/services/catalog.ts +33 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.test.ts +458 -0
- package/src/services/device-tier.ts +502 -0
- package/src/services/downloader.test.ts +888 -0
- package/src/services/downloader.ts +1039 -0
- package/src/services/engine-direct-bundle.test.ts +90 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.ts +2096 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +445 -0
- package/src/services/ffi-streaming-backend.ts +418 -0
- package/src/services/ffi-streaming-runner.test.ts +220 -0
- package/src/services/ffi-streaming-runner.ts +407 -0
- package/src/services/ffi-unload-ordering.test.ts +166 -0
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.test.ts +236 -0
- package/src/services/hardware.ts +438 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.ts +715 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.ts +229 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +357 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
- package/src/services/manifest/index.ts +72 -0
- package/src/services/manifest/manifest.test.ts +791 -0
- package/src/services/manifest/schema.ts +761 -0
- package/src/services/manifest/types.ts +61 -0
- package/src/services/manifest/validator.ts +633 -0
- package/src/services/memory-arbiter.test.ts +558 -0
- package/src/services/memory-arbiter.ts +991 -0
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +232 -0
- package/src/services/memory-monitor.ts +309 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.ts +86 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +164 -0
- package/src/services/ram-budget.ts +309 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.ts +157 -0
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +426 -0
- package/src/services/routing-policy.test.ts +352 -0
- package/src/services/routing-policy.ts +367 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +750 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.ts +59 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.ts +163 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +133 -0
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +139 -0
- package/src/services/voice/audio-frame-consumer.test.ts +669 -0
- package/src/services/voice/audio-frame-consumer.ts +651 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +335 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +902 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +242 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2343 -0
- package/src/services/voice/eot-classifier-ggml.ts +569 -0
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +422 -0
- package/src/services/voice/errors.ts +34 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.test.ts +735 -0
- package/src/services/voice/ffi-bindings.ts +3387 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.ts +139 -0
- package/src/services/voice/index.ts +502 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.ts +64 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +622 -0
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.ts +504 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +343 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.test.ts +195 -0
- package/src/services/voice/transcript-service.ts +205 -0
- package/src/services/voice/transcript-store.test.ts +189 -0
- package/src/services/voice/transcript-store.ts +164 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.test.ts +498 -0
- package/src/services/voice/vad.ts +832 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.test.ts +415 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +713 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +280 -0
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +367 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.ts +319 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/src/voice-workbench.ts +71 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import type { VoiceInputSource, VoiceSegment, VoiceSpeaker } from "./types";
|
|
2
|
+
export declare const DEFAULT_VOICE_IMPRINT_MATCH_THRESHOLD = 0.78;
|
|
3
|
+
export interface VoiceImprintProfile {
|
|
4
|
+
id: string;
|
|
5
|
+
centroidEmbedding: ArrayLike<number> | null | undefined;
|
|
6
|
+
embeddingModel?: string | null;
|
|
7
|
+
sampleCount?: number | null;
|
|
8
|
+
confidence?: number | null;
|
|
9
|
+
label?: string | null;
|
|
10
|
+
displayName?: string | null;
|
|
11
|
+
entityId?: string | null;
|
|
12
|
+
sourceKind?: string | null;
|
|
13
|
+
sourceScopeId?: string | null;
|
|
14
|
+
metadata?: Record<string, unknown> | null;
|
|
15
|
+
}
|
|
16
|
+
export interface VoiceImprintMatch {
|
|
17
|
+
profile: VoiceImprintProfile;
|
|
18
|
+
similarity: number;
|
|
19
|
+
confidence: number;
|
|
20
|
+
}
|
|
21
|
+
export interface VoiceImprintCentroidUpdate {
|
|
22
|
+
centroidEmbedding: number[];
|
|
23
|
+
sampleCount: number;
|
|
24
|
+
confidence: number;
|
|
25
|
+
}
|
|
26
|
+
export interface VoiceImprintObservationInput {
|
|
27
|
+
id: string;
|
|
28
|
+
segmentId?: string;
|
|
29
|
+
text: string;
|
|
30
|
+
startMs: number;
|
|
31
|
+
endMs: number;
|
|
32
|
+
embedding: ArrayLike<number>;
|
|
33
|
+
embeddingModel?: string | null;
|
|
34
|
+
confidence?: number | null;
|
|
35
|
+
source?: VoiceInputSource;
|
|
36
|
+
metadata?: Record<string, unknown> | null;
|
|
37
|
+
}
|
|
38
|
+
export interface AttributedVoiceObservation {
|
|
39
|
+
observation: VoiceImprintObservationInput;
|
|
40
|
+
match: VoiceImprintMatch | null;
|
|
41
|
+
speaker: VoiceSpeaker | null;
|
|
42
|
+
segment: VoiceSegment;
|
|
43
|
+
}
|
|
44
|
+
export interface SpeakerAttributionResult {
|
|
45
|
+
observations: AttributedVoiceObservation[];
|
|
46
|
+
segments: VoiceSegment[];
|
|
47
|
+
primarySpeaker?: VoiceSpeaker;
|
|
48
|
+
summary: {
|
|
49
|
+
totalObservations: number;
|
|
50
|
+
matchedObservations: number;
|
|
51
|
+
unmatchedObservations: number;
|
|
52
|
+
meanConfidence: number;
|
|
53
|
+
meanSimilarity: number | null;
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
export declare function normalizeVoiceEmbedding(embedding: ArrayLike<number>): number[];
|
|
57
|
+
export declare function cosineSimilarity(left: ArrayLike<number>, right: ArrayLike<number>): number;
|
|
58
|
+
export declare function updateVoiceImprintCentroid(args: {
|
|
59
|
+
centroidEmbedding?: ArrayLike<number> | null;
|
|
60
|
+
sampleCount?: number | null;
|
|
61
|
+
confidence?: number | null;
|
|
62
|
+
observationEmbedding: ArrayLike<number>;
|
|
63
|
+
observationConfidence?: number | null;
|
|
64
|
+
maxObservationWeight?: number;
|
|
65
|
+
}): VoiceImprintCentroidUpdate;
|
|
66
|
+
export declare function matchVoiceImprint(args: {
|
|
67
|
+
embedding: ArrayLike<number>;
|
|
68
|
+
profiles: readonly VoiceImprintProfile[];
|
|
69
|
+
embeddingModel?: string | null;
|
|
70
|
+
threshold?: number;
|
|
71
|
+
}): VoiceImprintMatch | null;
|
|
72
|
+
export declare function voiceSpeakerFromImprintMatch(args: {
|
|
73
|
+
match: VoiceImprintMatch;
|
|
74
|
+
source?: VoiceInputSource;
|
|
75
|
+
observationId?: string;
|
|
76
|
+
}): VoiceSpeaker;
|
|
77
|
+
export declare function attributeVoiceImprintObservations(args: {
|
|
78
|
+
observations: readonly VoiceImprintObservationInput[];
|
|
79
|
+
profiles: readonly VoiceImprintProfile[];
|
|
80
|
+
threshold?: number;
|
|
81
|
+
defaultSource?: VoiceInputSource;
|
|
82
|
+
}): SpeakerAttributionResult;
|
|
83
|
+
//# sourceMappingURL=speaker-imprint.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"speaker-imprint.d.ts","sourceRoot":"","sources":["../../../src/services/voice/speaker-imprint.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAE5E,eAAO,MAAM,qCAAqC,OAAO,CAAC;AAE1D,MAAM,WAAW,mBAAmB;IACnC,EAAE,EAAE,MAAM,CAAC;IACX,iBAAiB,EAAE,SAAS,CAAC,MAAM,CAAC,GAAG,IAAI,GAAG,SAAS,CAAC;IACxD,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,aAAa,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC9B,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;CAC1C;AAED,MAAM,WAAW,iBAAiB;IACjC,OAAO,EAAE,mBAAmB,CAAC;IAC7B,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,0BAA0B;IAC1C,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,4BAA4B;IAC5C,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;IAC7B,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,MAAM,CAAC,EAAE,gBAAgB,CAAC;IAC1B,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;CAC1C;AAED,MAAM,WAAW,0BAA0B;IAC1C,WAAW,EAAE,4BAA4B,CAAC;IAC1C,KAAK,EAAE,iBAAiB,GAAG,IAAI,CAAC;IAChC,OAAO,EAAE,YAAY,GAAG,IAAI,CAAC;IAC7B,OAAO,EAAE,YAAY,CAAC;CACtB;AAED,MAAM,WAAW,wBAAwB;IACxC,YAAY,EAAE,0BAA0B,EAAE,CAAC;IAC3C,QAAQ,EAAE,YAAY,EAAE,CAAC;IACzB,cAAc,CAAC,EAAE,YAAY,CAAC;IAC9B,OAAO,EAAE;QACR,iBAAiB,EAAE,MAAM,CAAC;QAC1B,mBAAmB,EAAE,MAAM,CAAC;QAC5B,qBAAqB,EAAE,MAAM,CAAC;QAC9B,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;KAC9B,CAAC;CACF;AAOD,wBAAgB,uBAAuB,CACtC,SAAS,EAAE,SAAS,CAAC,MAAM,CAAC,GAC1B,MAAM,EAAE,CAiBV;AAED,wBAAgB,gBAAgB,CAC/B,IAAI,EAAE,SAAS,CAAC,MAAM,CAAC,EACvB,KAAK,EAAE,SAAS,CAAC,MAAM,CAAC,GACtB,MAAM,CAWR;AAED,wBAAgB,0BAA0B,CAAC,IAAI,EAAE;IAChD,iBAAiB,CAAC,EAAE,SAAS,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC;IAC7C,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,oBAAoB,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;IACxC,qBAAqB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtC,oBAAoB,CAAC,EAAE,MAAM,CAAC;CAC9B,GAAG,0BAA0B,CA+B7B;AAED,wBAAgB,iBAAiB,CAAC,IAAI,EAAE;IACvC,SAAS,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;IAC7B,QAAQ,EAAE,SAAS,mBAAmB,EAAE,CAAC;IACzC,cAAc,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/B,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB,GAAG,iBAAiB,GAAG,IAAI,CA+B3B;AAED,wBAAgB,4BAA4B,CAAC,IAAI,EAAE;IAClD,KAAK,EAAE,iBAAiB,CAAC;IACzB,MAAM,CAAC,EAAE,gBAAgB,CAAC;IAC1B,aAAa,CAAC,EAAE,MAAM,CAAC;CACvB,GAAG,YAAY,CAqBf;AAED,wBAAgB,iCAAiC,CAAC,IAAI,EAAE;IACvD,YAAY,EAAE,SAAS,4BAA4B,EAAE,CAAC;IACtD,QAAQ,EAAE,SAAS,mBAAmB,EAAE,CAAC;IACzC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,gBAAgB,CAAC;CACjC,GAAG,wBAAwB,CA2E3B"}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import type { SpeakerPreset } from "./types";
|
|
2
|
+
import { type VoicePresetSeedPhrase } from "./voice-preset-format";
|
|
3
|
+
export interface PresetBundlePaths {
|
|
4
|
+
bundleRoot: string;
|
|
5
|
+
/** Relative path to the *default* voice preset inside the bundle. */
|
|
6
|
+
cacheRelPath?: string;
|
|
7
|
+
}
|
|
8
|
+
export interface LoadedPresetBundle {
|
|
9
|
+
preset: SpeakerPreset;
|
|
10
|
+
/** Phrase-cache seed entries parsed alongside the embedding. The engine
|
|
11
|
+
* bridge feeds these into a `PhraseCache.seed(...)` call before the
|
|
12
|
+
* scheduler is constructed. */
|
|
13
|
+
phrases: ReadonlyArray<VoicePresetSeedPhrase>;
|
|
14
|
+
}
|
|
15
|
+
export interface SpeakerPresetCacheOptions {
|
|
16
|
+
/**
|
|
17
|
+
* Maximum number of distinct voices held in RAM at once. Speaker presets
|
|
18
|
+
* are KB-scale (an embedding vector + the seed-phrase PCM), so the default
|
|
19
|
+
* is generous; the bound exists so a connector that switches voices per
|
|
20
|
+
* speaker (e.g. a Discord room with many users) does not grow unbounded.
|
|
21
|
+
* On insert past the bound the least-recently-used voice is evicted.
|
|
22
|
+
*
|
|
23
|
+
* Defaults to 8 hot voices (R6 §1: "LRU 8 hot, mmap-backed").
|
|
24
|
+
*/
|
|
25
|
+
maxVoices?: number;
|
|
26
|
+
}
|
|
27
|
+
export declare const DEFAULT_VOICE_ID = "default";
|
|
28
|
+
export declare const DEFAULT_VOICE_PRESET_REL_PATH: string;
|
|
29
|
+
/**
|
|
30
|
+
* Resolve the on-disk path of a voice preset inside a bundle. The default
|
|
31
|
+
* voice lives at `cache/voice-preset-default.bin`; additional voices ship as
|
|
32
|
+
* `cache/voice-preset-<voiceId>.bin`. Throws on a `voiceId` that is not a safe
|
|
33
|
+
* single path segment (no `/`, no `..`).
|
|
34
|
+
*/
|
|
35
|
+
export declare function voicePresetPath(bundleRoot: string, voiceId: string): string;
|
|
36
|
+
/**
|
|
37
|
+
* LRU cache of parsed speaker presets keyed by `voiceId`. Holds the speaker
|
|
38
|
+
* embedding, the raw preset bytes (for FFI handoff), and the phrase-cache seed
|
|
39
|
+
* list parsed from the preset file. Multi-voice: `load(bundleRoot, voiceId)`
|
|
40
|
+
* reads `cache/voice-preset-<voiceId>.bin` from the bundle on a miss.
|
|
41
|
+
*
|
|
42
|
+
* v2 preset fields (`refAudioTokens`, `refText`, `instruct`) are surfaced
|
|
43
|
+
* on the `SpeakerPreset` shape so the FFI bridge can pass them through to
|
|
44
|
+
* `ov_tts_params` without going through the legacy "instruct == voiceId"
|
|
45
|
+
* misreading.
|
|
46
|
+
*/
|
|
47
|
+
export declare class SpeakerPresetCache {
|
|
48
|
+
private readonly entries;
|
|
49
|
+
private readonly maxVoices;
|
|
50
|
+
constructor(opts?: SpeakerPresetCacheOptions);
|
|
51
|
+
/**
|
|
52
|
+
* Load the bundle's default voice preset (`cache/voice-preset-default.bin`,
|
|
53
|
+
* or `paths.cacheRelPath` if overridden) and return both the speaker
|
|
54
|
+
* embedding and the phrase-cache seed entries. Cached for subsequent
|
|
55
|
+
* `get("default")` lookups (and marked most-recently-used).
|
|
56
|
+
*/
|
|
57
|
+
loadFromBundle(paths: PresetBundlePaths, voiceId?: string): LoadedPresetBundle;
|
|
58
|
+
/**
|
|
59
|
+
* Load an arbitrary voice by id from a bundle root, reading
|
|
60
|
+
* `cache/voice-preset-<voiceId>.bin` (or `cache/voice-preset-default.bin`
|
|
61
|
+
* for `"default"`). Returns the cached entry on a hit (marked MRU).
|
|
62
|
+
*/
|
|
63
|
+
load(bundleRoot: string, voiceId: string): LoadedPresetBundle;
|
|
64
|
+
/** True if `voiceId` is currently resident in the cache. */
|
|
65
|
+
has(voiceId: string): boolean;
|
|
66
|
+
put(preset: SpeakerPreset): void;
|
|
67
|
+
get(voiceId: string): SpeakerPreset | undefined;
|
|
68
|
+
/** Seed entries previously loaded for a voice, if any (does not touch LRU order). */
|
|
69
|
+
getSeed(voiceId: string): ReadonlyArray<VoicePresetSeedPhrase>;
|
|
70
|
+
/** Number of voices currently resident. */
|
|
71
|
+
size(): number;
|
|
72
|
+
/** Drop every cached preset. */
|
|
73
|
+
clear(): void;
|
|
74
|
+
private loadFile;
|
|
75
|
+
private evictOverflow;
|
|
76
|
+
}
|
|
77
|
+
//# sourceMappingURL=speaker-preset-cache.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"speaker-preset-cache.d.ts","sourceRoot":"","sources":["../../../src/services/voice/speaker-preset-cache.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAC7C,OAAO,EAGN,KAAK,qBAAqB,EAC1B,MAAM,uBAAuB,CAAC;AAE/B,MAAM,WAAW,iBAAiB;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,qEAAqE;IACrE,YAAY,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,kBAAkB;IAClC,MAAM,EAAE,aAAa,CAAC;IACtB;;oCAEgC;IAChC,OAAO,EAAE,aAAa,CAAC,qBAAqB,CAAC,CAAC;CAC9C;AAED,MAAM,WAAW,yBAAyB;IACzC;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,eAAO,MAAM,gBAAgB,YAAY,CAAC;AAG1C,eAAO,MAAM,6BAA6B,QAGzC,CAAC;AAEF;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAU3E;AAOD;;;;;;;;;;GAUG;AACH,qBAAa,kBAAkB;IAG9B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAiC;IACzD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,IAAI,GAAE,yBAA8B;IAOhD;;;;;OAKG;IACH,cAAc,CACb,KAAK,EAAE,iBAAiB,EACxB,OAAO,SAAmB,GACxB,kBAAkB;IASrB;;;;OAIG;IACH,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,kBAAkB;IAI7D,4DAA4D;IAC5D,GAAG,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO;IAI7B,GAAG,CAAC,MAAM,EAAE,aAAa,GAAG,IAAI;IAUhC,GAAG,CAAC,OAAO,EAAE,MAAM,GAAG,aAAa,GAAG,SAAS;IAQ/C,qFAAqF;IACrF,OAAO,CAAC,OAAO,EAAE,MAAM,GAAG,aAAa,CAAC,qBAAqB,CAAC;IAI9D,2CAA2C;IAC3C,IAAI,IAAI,MAAM;IAId,gCAAgC;IAChC,KAAK,IAAI,IAAI;IAIb,OAAO,CAAC,QAAQ;IA+BhB,OAAO,CAAC,aAAa;CAOrB"}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Streaming-ASR pipeline adapter (item A1 / W7).
|
|
3
|
+
*
|
|
4
|
+
* The base `VoicePipeline` in `voice/pipeline.ts` drives a `StreamingTranscriber`
|
|
5
|
+
* as a batch: it pushes the WHOLE VAD-gated utterance buffer in a single
|
|
6
|
+
* `feed()` call, awaits `flush()`, and only then splits the final transcript
|
|
7
|
+
* into tokens for the drafter/verifier loop. That works for the fused
|
|
8
|
+
* batch decoder, but it leaves the biggest
|
|
9
|
+
* H2 UX seam (incremental partials → planner / barge-in word-confirm /
|
|
10
|
+
* speculative-on-pause) untapped when the fused build is in streaming mode.
|
|
11
|
+
*
|
|
12
|
+
* Rather than rewrite `pipeline.ts` (the Phase 0/1 agent owns it), this
|
|
13
|
+
* module is a small WRAPPER the engine bridge can use to deliver PCM
|
|
14
|
+
* chunks to a `StreamingTranscriber` as they arrive from the mic / VAD,
|
|
15
|
+
* surface every `partial` event to the turn controller via `onPartial`,
|
|
16
|
+
* and only finalize on `speech-end`. Behind a flag — when the fused
|
|
17
|
+
* library advertises `asrStreamSupported() === false` or the flag is off
|
|
18
|
+
* the caller keeps using `VoicePipeline.transcribeAll` exactly as today.
|
|
19
|
+
*
|
|
20
|
+
* Integration point (documented for the Phase 0/1 agent so they can wire
|
|
21
|
+
* it without merge friction):
|
|
22
|
+
*
|
|
23
|
+
* 1. `EngineVoiceBridge` decides whether streaming is available via
|
|
24
|
+
* `pickStreamingMode({ ffi, asrBundlePresent, flag })`. When the
|
|
25
|
+
* mode is `"streaming"`, the bridge constructs `StreamingAsrFeeder`
|
|
26
|
+
* once per turn (passing the same transcriber that would have been
|
|
27
|
+
* handed to `VoicePipeline`) and routes mic PCM frames through
|
|
28
|
+
* `feeder.feedFrame(frame)` instead of buffering them.
|
|
29
|
+
* 2. The feeder forwards every transcriber `partial` event to
|
|
30
|
+
* `onPartial(update)`. When VAD reports `speech-end` the caller
|
|
31
|
+
* calls `await feeder.finalize()`; the returned `TranscriptUpdate`
|
|
32
|
+
* is the final and is used to seed the drafter/verifier loop exactly
|
|
33
|
+
* as before (`splitTranscriptToTokens(final.partial, 0, final.tokens)`).
|
|
34
|
+
* 3. The batch path (`VoicePipeline.transcribeAll`) is unchanged for
|
|
35
|
+
* every other adapter — there is no fork in `pipeline.ts` itself.
|
|
36
|
+
*
|
|
37
|
+
* This file is intentionally small and side-effect-free so it can land
|
|
38
|
+
* during the merge window without touching files other agents own.
|
|
39
|
+
*/
|
|
40
|
+
import type { PcmFrame, StreamingTranscriber, TextToken, TranscriptUpdate } from "../types";
|
|
41
|
+
/**
|
|
42
|
+
* LocalAgreement-n word-level partial stabilizer.
|
|
43
|
+
*
|
|
44
|
+
* Usage:
|
|
45
|
+
* const buf = new LocalAgreementBuffer();
|
|
46
|
+
* const stable = buf.stable(["hello", "there", "world"]);
|
|
47
|
+
* // → [] on first call (need n=2 consecutive identical prefix)
|
|
48
|
+
* const stable2 = buf.stable(["hello", "there", "how"]);
|
|
49
|
+
* // → ["hello", "there"] (matched across two consecutive hypotheses)
|
|
50
|
+
*/
|
|
51
|
+
export declare class LocalAgreementBuffer {
|
|
52
|
+
private readonly n;
|
|
53
|
+
/** Rolling window of the last `n` hypotheses, oldest first. */
|
|
54
|
+
private window;
|
|
55
|
+
/** Monotonically growing committed word list. */
|
|
56
|
+
private committed;
|
|
57
|
+
constructor(n?: number);
|
|
58
|
+
/**
|
|
59
|
+
* Feed the latest word-level hypothesis. Returns the stable committed
|
|
60
|
+
* prefix — the longest leading word sequence that has appeared
|
|
61
|
+
* identically in `n` consecutive calls. Monotonically non-decreasing.
|
|
62
|
+
*
|
|
63
|
+
* A rolling window of the last `n` hypotheses is maintained. Once the
|
|
64
|
+
* window is full, the agreed prefix is the intersection across all `n`
|
|
65
|
+
* entries — word i is in the agreed prefix only if it is identical in
|
|
66
|
+
* every hypothesis in the window.
|
|
67
|
+
*/
|
|
68
|
+
stable(current: string[]): string[];
|
|
69
|
+
/** Clear all state. Call at utterance boundaries. */
|
|
70
|
+
reset(): void;
|
|
71
|
+
/** The current committed stable word list (read-only view). */
|
|
72
|
+
getCommitted(): string[];
|
|
73
|
+
}
|
|
74
|
+
/** Available transcription drive modes. */
|
|
75
|
+
export type StreamingPipelineMode = "streaming" | "batch";
|
|
76
|
+
export interface PickStreamingModeArgs {
|
|
77
|
+
/** True only when the loaded fused library advertises a working streaming decoder. */
|
|
78
|
+
ffiSupportsStreaming: boolean;
|
|
79
|
+
/** True only when the bundled ASR model is present on disk. */
|
|
80
|
+
asrBundlePresent: boolean;
|
|
81
|
+
/**
|
|
82
|
+
* Feature flag — defaults to FALSE so the streaming path stays opt-in
|
|
83
|
+
* until the Phase 0/1 partial-stabilizer wiring lands. Once that lands
|
|
84
|
+
* the engine bridge flips this default to true.
|
|
85
|
+
*/
|
|
86
|
+
enableStreaming: boolean;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Choose the transcription drive mode. Streaming is selected only when:
|
|
90
|
+
* - the loaded fused library advertises a working streaming decoder
|
|
91
|
+
* (`asr_stream_supported() === 1`), AND
|
|
92
|
+
* - the bundled ASR model is present, AND
|
|
93
|
+
* - the engine bridge has opted in via `enableStreaming`.
|
|
94
|
+
*
|
|
95
|
+
* Any other combination falls back to the existing batch path
|
|
96
|
+
* (`VoicePipeline.transcribeAll`).
|
|
97
|
+
*/
|
|
98
|
+
export declare function pickStreamingMode(args: PickStreamingModeArgs): StreamingPipelineMode;
|
|
99
|
+
export interface StreamingAsrFeederEvents {
|
|
100
|
+
/**
|
|
101
|
+
* Called for every transcriber `partial` event the feeder observes
|
|
102
|
+
* BEFORE the segment is finalized. Includes the running `partial`
|
|
103
|
+
* text, `isFinal: false`, and (when the fused build supplied them)
|
|
104
|
+
* the shared text-model token ids.
|
|
105
|
+
*/
|
|
106
|
+
onPartial?(update: TranscriptUpdate): void;
|
|
107
|
+
/**
|
|
108
|
+
* Called the first time ≥1 real word is recognized in the segment.
|
|
109
|
+
* Wired into the turn controller's word-confirm gate so the agent
|
|
110
|
+
* only barge-in-cancels on real speech, not blips.
|
|
111
|
+
*/
|
|
112
|
+
onWords?(words: ReadonlyArray<string>): void;
|
|
113
|
+
/**
|
|
114
|
+
* Called once, after `finalize()` returns, with the final transcript
|
|
115
|
+
* split into contiguous text tokens (`splitTranscriptToTokens`). The
|
|
116
|
+
* batch path delivers the same shape via `transcribeAll`, so the
|
|
117
|
+
* downstream drafter/verifier loop sees an identical signal.
|
|
118
|
+
*/
|
|
119
|
+
onFinalTokens?(tokens: ReadonlyArray<TextToken>, final: TranscriptUpdate): void;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Drives a `StreamingTranscriber` chunk-by-chunk on behalf of the engine
|
|
123
|
+
* bridge / turn controller. One instance per active speech segment;
|
|
124
|
+
* `finalize()` returns the final transcript and the feeder is disposed.
|
|
125
|
+
*
|
|
126
|
+
* Construction takes a `StreamingTranscriber` (already constructed via
|
|
127
|
+
* `createStreamingTranscriber` with the same options used for batch).
|
|
128
|
+
* The feeder does NOT own the transcriber's lifecycle — disposal still
|
|
129
|
+
* runs through the engine bridge so the same path is used when the
|
|
130
|
+
* batch fallback is taken.
|
|
131
|
+
*/
|
|
132
|
+
export declare class StreamingAsrFeeder {
|
|
133
|
+
private readonly transcriber;
|
|
134
|
+
private readonly events;
|
|
135
|
+
private latestPartial;
|
|
136
|
+
private finalized;
|
|
137
|
+
private unsubscribe;
|
|
138
|
+
constructor(args: {
|
|
139
|
+
transcriber: StreamingTranscriber;
|
|
140
|
+
events?: StreamingAsrFeederEvents;
|
|
141
|
+
});
|
|
142
|
+
/**
|
|
143
|
+
* Feed one PCM frame as it arrives from the mic / connector. Drops
|
|
144
|
+
* frames received after `finalize()` (the segment is over).
|
|
145
|
+
*/
|
|
146
|
+
feedFrame(frame: PcmFrame): void;
|
|
147
|
+
/**
|
|
148
|
+
* Force-finalize on `speech-end`. Resolves with the final transcript
|
|
149
|
+
* and emits `onFinalTokens` so the caller can seed the drafter /
|
|
150
|
+
* verifier loop without re-running the surface split itself.
|
|
151
|
+
*
|
|
152
|
+
* Calling `finalize()` twice is a hard error — the segment is over.
|
|
153
|
+
*/
|
|
154
|
+
finalize(): Promise<TranscriptUpdate>;
|
|
155
|
+
/** The most recent `partial` snapshot observed, or `null` until the first decode lands. */
|
|
156
|
+
getLatestPartial(): TranscriptUpdate | null;
|
|
157
|
+
/** Detach the transcriber subscription. Does NOT dispose the transcriber itself. */
|
|
158
|
+
dispose(): void;
|
|
159
|
+
}
|
|
160
|
+
//# sourceMappingURL=streaming-pipeline-adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"streaming-pipeline-adapter.d.ts","sourceRoot":"","sources":["../../../../src/services/voice/streaming-asr/streaming-pipeline-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AAGH,OAAO,KAAK,EACX,QAAQ,EACR,oBAAoB,EACpB,SAAS,EACT,gBAAgB,EAChB,MAAM,UAAU,CAAC;AAkBlB;;;;;;;;;GASG;AACH,qBAAa,oBAAoB;IAChC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAS;IAC3B,+DAA+D;IAC/D,OAAO,CAAC,MAAM,CAAkB;IAChC,iDAAiD;IACjD,OAAO,CAAC,SAAS,CAAgB;gBAErB,CAAC,SAAI;IASjB;;;;;;;;;OASG;IACH,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE;IAqCnC,qDAAqD;IACrD,KAAK,IAAI,IAAI;IAKb,+DAA+D;IAC/D,YAAY,IAAI,MAAM,EAAE;CAGxB;AAED,2CAA2C;AAC3C,MAAM,MAAM,qBAAqB,GAAG,WAAW,GAAG,OAAO,CAAC;AAE1D,MAAM,WAAW,qBAAqB;IACrC,sFAAsF;IACtF,oBAAoB,EAAE,OAAO,CAAC;IAC9B,+DAA+D;IAC/D,gBAAgB,EAAE,OAAO,CAAC;IAC1B;;;;OAIG;IACH,eAAe,EAAE,OAAO,CAAC;CACzB;AAED;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,CAChC,IAAI,EAAE,qBAAqB,GACzB,qBAAqB,CAKvB;AAED,MAAM,WAAW,wBAAwB;IACxC;;;;;OAKG;IACH,SAAS,CAAC,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC3C;;;;OAIG;IACH,OAAO,CAAC,CAAC,KAAK,EAAE,aAAa,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC;IAC7C;;;;;OAKG;IACH,aAAa,CAAC,CACb,MAAM,EAAE,aAAa,CAAC,SAAS,CAAC,EAChC,KAAK,EAAE,gBAAgB,GACrB,IAAI,CAAC;CACR;AAED;;;;;;;;;;GAUG;AACH,qBAAa,kBAAkB;IAC9B,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAuB;IACnD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAA2B;IAClD,OAAO,CAAC,aAAa,CAAiC;IACtD,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,WAAW,CAA6B;gBAEpC,IAAI,EAAE;QACjB,WAAW,EAAE,oBAAoB,CAAC;QAClC,MAAM,CAAC,EAAE,wBAAwB,CAAC;KAClC;IAqBD;;;OAGG;IACH,SAAS,CAAC,KAAK,EAAE,QAAQ,GAAG,IAAI;IAKhC;;;;;;OAMG;IACG,QAAQ,IAAI,OAAO,CAAC,gBAAgB,CAAC;IAa3C,2FAA2F;IAC3F,gBAAgB,IAAI,gBAAgB,GAAG,IAAI;IAI3C,oFAAoF;IACpF,OAAO,IAAI,IAAI;CAIf"}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* System audio sinks for the interactive voice harness.
|
|
3
|
+
*
|
|
4
|
+
* The voice scheduler writes synthesized PCM (`Float32Array` mono in
|
|
5
|
+
* [-1, 1] at the bridge sample rate) into an {@link AudioSink}. For tests
|
|
6
|
+
* and headless runs `InMemoryAudioSink` (in `./ring-buffer`) captures the
|
|
7
|
+
* samples; for an interactive session the harness needs the audio to
|
|
8
|
+
* actually come out of the speakers.
|
|
9
|
+
*
|
|
10
|
+
* `SystemAudioSink` shells out to a long-lived player that reads raw 16-bit
|
|
11
|
+
* signed-LE PCM on stdin. Per-platform selection (priority order):
|
|
12
|
+
* - Linux: `aplay` (alsa-utils), else `paplay` (PulseAudio), else
|
|
13
|
+
* `play`/`sox` (sox), else `ffplay` (ffmpeg).
|
|
14
|
+
* - macOS: `play`/`sox` (sox), else `ffplay` (ffmpeg). `afplay` needs a
|
|
15
|
+
* file (no stdin) so it cannot be used for streaming.
|
|
16
|
+
* - Windows: `ffplay` (ffmpeg), else `play`/`sox` if installed. PowerShell's
|
|
17
|
+
* `Media.SoundPlayer` also needs a file, not a stream, so it's not
|
|
18
|
+
* used here — the renderer's `AudioContext` path (feeding nothing
|
|
19
|
+
* here; the renderer plays directly) is the no-ffmpeg route.
|
|
20
|
+
* If no player is on `PATH`, `available()` returns false and the harness
|
|
21
|
+
* falls back to `WavFileAudioSink` (writes a rolling WAV) — never silence.
|
|
22
|
+
*
|
|
23
|
+
* `WavFileAudioSink` accumulates everything written and serializes a
|
|
24
|
+
* single mono PCM16 WAV on `finalize()` — used by `--no-audio` and as the
|
|
25
|
+
* no-player fallback.
|
|
26
|
+
*/
|
|
27
|
+
import type { AudioSink } from "./types";
|
|
28
|
+
/**
|
|
29
|
+
* Exported view of {@link resolvePlayer} for the cross-platform preflight
|
|
30
|
+
* (`voice:interactive --platform-report`). Returns the bare program name
|
|
31
|
+
* (no args) the host would stream synthesized audio to, or `null`.
|
|
32
|
+
*/
|
|
33
|
+
export declare function resolveSystemPlayerName(sampleRate?: number): string | null;
|
|
34
|
+
export interface SystemAudioSinkOptions {
|
|
35
|
+
sampleRate: number;
|
|
36
|
+
}
|
|
37
|
+
export declare class SystemAudioSink implements AudioSink {
|
|
38
|
+
private readonly sampleRate;
|
|
39
|
+
private readonly playerSpec;
|
|
40
|
+
private proc;
|
|
41
|
+
private buffered;
|
|
42
|
+
constructor(opts: SystemAudioSinkOptions);
|
|
43
|
+
available(): boolean;
|
|
44
|
+
player(): string;
|
|
45
|
+
private ensureProc;
|
|
46
|
+
write(pcm: Float32Array, _sampleRate: number): void;
|
|
47
|
+
drain(): void;
|
|
48
|
+
bufferedSamples(): number;
|
|
49
|
+
/** Flush + close the player. Idempotent. */
|
|
50
|
+
dispose(): Promise<void>;
|
|
51
|
+
}
|
|
52
|
+
export interface WavFileAudioSinkOptions {
|
|
53
|
+
sampleRate: number;
|
|
54
|
+
filePath: string;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Accumulates all written PCM and serializes a single mono PCM16 WAV on
|
|
58
|
+
* {@link finalize}. Used by `--no-audio` and as the no-player fallback so
|
|
59
|
+
* a headless run still produces an inspectable artifact (never silence).
|
|
60
|
+
*/
|
|
61
|
+
export declare class WavFileAudioSink implements AudioSink {
|
|
62
|
+
private readonly sampleRate;
|
|
63
|
+
private readonly filePath;
|
|
64
|
+
private readonly chunks;
|
|
65
|
+
private buffered;
|
|
66
|
+
constructor(opts: WavFileAudioSinkOptions);
|
|
67
|
+
write(pcm: Float32Array, _sampleRate: number): void;
|
|
68
|
+
drain(): void;
|
|
69
|
+
bufferedSamples(): number;
|
|
70
|
+
totalSamples(): number;
|
|
71
|
+
finalize(): Promise<string>;
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=system-audio-sink.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"system-audio-sink.d.ts","sourceRoot":"","sources":["../../../src/services/voice/system-audio-sink.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAQH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAoIzC;;;;GAIG;AACH,wBAAgB,uBAAuB,CAAC,UAAU,SAAS,GAAG,MAAM,GAAG,IAAI,CAG1E;AAED,MAAM,WAAW,sBAAsB;IACtC,UAAU,EAAE,MAAM,CAAC;CACnB;AAWD,qBAAa,eAAgB,YAAW,SAAS;IAChD,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAyC;IACpE,OAAO,CAAC,IAAI,CAAiC;IAC7C,OAAO,CAAC,QAAQ,CAAK;gBAET,IAAI,EAAE,sBAAsB;IAKxC,SAAS,IAAI,OAAO;IAIpB,MAAM,IAAI,MAAM;IAIhB,OAAO,CAAC,UAAU;IAmBlB,KAAK,CAAC,GAAG,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,GAAG,IAAI;IAUnD,KAAK,IAAI,IAAI;IAmBb,eAAe,IAAI,MAAM;IAIzB,4CAA4C;IACtC,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CA6B9B;AAED,MAAM,WAAW,uBAAuB;IACvC,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;CACjB;AAED;;;;GAIG;AACH,qBAAa,gBAAiB,YAAW,SAAS;IACjD,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAsB;IAC7C,OAAO,CAAC,QAAQ,CAAK;gBAET,IAAI,EAAE,uBAAuB;IAKzC,KAAK,CAAC,GAAG,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,GAAG,IAAI;IAKnD,KAAK,IAAI,IAAI;IAMb,eAAe,IAAI,MAAM;IAIzB,YAAY,IAAI,MAAM;IAMhB,QAAQ,IAAI,OAAO,CAAC,MAAM,CAAC;CAgCjC"}
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Streaming ASR adapters for the local voice pipeline.
|
|
3
|
+
*
|
|
4
|
+
* Implements the `StreamingTranscriber` contract from `voice/types.ts`:
|
|
5
|
+
* PCM frames in (`feed`), running partial-transcript events out, `flush()`
|
|
6
|
+
* to force-finalize on `speech-end`. Two adapters, resolved in priority
|
|
7
|
+
* order by `createStreamingTranscriber()` — both backed by the single fused
|
|
8
|
+
* `libelizainference` build (the SOLE on-device ASR runtime):
|
|
9
|
+
*
|
|
10
|
+
* 1. `FfiStreamingTranscriber` — the FINAL path. Drives the fused
|
|
11
|
+
* `libelizainference` streaming ASR ABI (`eliza_inference_asr_stream_*`,
|
|
12
|
+
* ABI v2 — declared in `packages/app-core/scripts/omnivoice-fuse/ffi.h`,
|
|
13
|
+
* bound in `voice/ffi-bindings.ts`). The C side is W7's job; until the
|
|
14
|
+
* real fused build advertises streaming ASR the binding's `mmap`/`asr`
|
|
15
|
+
* calls return `ELIZA_ERR_NOT_IMPLEMENTED`, which surfaces as a thrown
|
|
16
|
+
* error here. Selected only when `ffi.asrStreamSupported()` is true.
|
|
17
|
+
*
|
|
18
|
+
* 2. `FfiBatchTranscriber` — the contract-clean INTERIM path. Runs the
|
|
19
|
+
* fused build's *batch* decoder (`eliza_inference_asr_transcribe`, ABI
|
|
20
|
+
* v1) over a sliding window with overlap, so each call covers ≤ ~6–7 s
|
|
21
|
+
* of audio — incremental, not "buffer the whole utterance, one giant
|
|
22
|
+
* decode". It lives inside the single shipped llama.cpp/GGML build and
|
|
23
|
+
* emits Gemma text-vocabulary tokens, so it does not vendor a second
|
|
24
|
+
* ggml or introduce a tokenizer-family mismatch.
|
|
25
|
+
* Selected whenever a `libelizainference` handle + bundled ASR model are
|
|
26
|
+
* present (which is always true when the fused build is loaded).
|
|
27
|
+
*
|
|
28
|
+
* If no fused ASR backend can be resolved, `createStreamingTranscriber()`
|
|
29
|
+
* throws `AsrUnavailableError` — a real failure, never a silent
|
|
30
|
+
* empty-transcript degrade and never a fall back to a second ASR runtime
|
|
31
|
+
* (AGENTS.md §3 + §9).
|
|
32
|
+
*/
|
|
33
|
+
import type { ElizaInferenceContextHandle, ElizaInferenceFfi } from "./ffi-bindings";
|
|
34
|
+
import type { PcmFrame, StreamingTranscriber, TranscriberEventListener, TranscriptUpdate, VadEventSource, VoiceInputSource, VoiceSpeaker, VoiceTurnMetadata } from "./types";
|
|
35
|
+
/** The local voice runtime resamples mic input to 16 kHz mono for ASR. */
|
|
36
|
+
export declare const ASR_SAMPLE_RATE = 16000;
|
|
37
|
+
/**
|
|
38
|
+
* Raised when no ASR backend can be resolved. Distinct error class so the
|
|
39
|
+
* caller (engine, `TRANSCRIPTION` model handler) can surface "ASR is not
|
|
40
|
+
* installed" with an actionable message rather than treating an empty
|
|
41
|
+
* string as a successful transcription.
|
|
42
|
+
*/
|
|
43
|
+
export declare class AsrUnavailableError extends Error {
|
|
44
|
+
constructor(message: string);
|
|
45
|
+
}
|
|
46
|
+
export type AsrBackendPreference = "auto" | "fused" | "ffi-batch";
|
|
47
|
+
export declare function normalizeAsrBackendPreference(value: string | null | undefined): AsrBackendPreference | null;
|
|
48
|
+
export declare function readAsrBackendPreferenceFromEnv(env?: NodeJS.ProcessEnv): AsrBackendPreference | null;
|
|
49
|
+
/**
|
|
50
|
+
* Linear-interpolation resample of mono fp32 PCM. Used to coerce mic
|
|
51
|
+
* frames (commonly 16 / 24 / 48 kHz) to the ASR rate. Not a polyphase
|
|
52
|
+
* filter — adequate for speech ASR; the fused build does its own
|
|
53
|
+
* resampling so this is interim-batch only.
|
|
54
|
+
*/
|
|
55
|
+
export declare function resampleLinear(pcm: Float32Array, fromRate: number, toRate: number): Float32Array;
|
|
56
|
+
/**
|
|
57
|
+
* Base implementing the boilerplate every adapter shares: listener
|
|
58
|
+
* fan-out, the `words`-once-per-segment latch, and (optional) VAD-event
|
|
59
|
+
* gating. Subclasses implement `onFrame` / `onFlush` / `onDispose` and
|
|
60
|
+
* call `emitPartial` / `emitFinal`.
|
|
61
|
+
*/
|
|
62
|
+
export declare abstract class BaseStreamingTranscriber implements StreamingTranscriber {
|
|
63
|
+
private readonly listeners;
|
|
64
|
+
private metadata;
|
|
65
|
+
/** True between `speech-start`/first-frame and the next `flush()`. */
|
|
66
|
+
protected segmentOpen: boolean;
|
|
67
|
+
/** Latched once `words` is emitted for the current segment. */
|
|
68
|
+
private wordsEmitted;
|
|
69
|
+
/** When set, frames are only forwarded while the VAD is in an active speech window. */
|
|
70
|
+
private vadActive;
|
|
71
|
+
private readonly vadPrerollFrames;
|
|
72
|
+
private vadUnsub;
|
|
73
|
+
private disposed;
|
|
74
|
+
constructor(vad?: VadEventSource, metadata?: TranscriptMetadataDefaults);
|
|
75
|
+
on(listener: TranscriberEventListener): () => void;
|
|
76
|
+
/**
|
|
77
|
+
* Update the metadata defaults that `withMetadata()` merges into every
|
|
78
|
+
* partial / final emission. The voice pipeline calls this once the
|
|
79
|
+
* async speaker-ID / diarizer lookup resolves, so the speaker /
|
|
80
|
+
* segments are attached to the rest of the turn without buffering all
|
|
81
|
+
* partials for the lookup.
|
|
82
|
+
*/
|
|
83
|
+
setMetadataDefaults(metadata: TranscriptMetadataDefaults): void;
|
|
84
|
+
feed(frame: PcmFrame): void;
|
|
85
|
+
flush(): Promise<TranscriptUpdate>;
|
|
86
|
+
dispose(): void;
|
|
87
|
+
/** Subclass hook: a (VAD-gated) PCM frame for the current speech segment. */
|
|
88
|
+
protected abstract onFrame(frame: PcmFrame): void;
|
|
89
|
+
/** Subclass hook: drain buffered audio, run a final decode, return the final transcript. */
|
|
90
|
+
protected abstract onFlush(): Promise<TranscriptUpdate>;
|
|
91
|
+
/** Subclass hook: release native resources. */
|
|
92
|
+
protected abstract onDispose(): void;
|
|
93
|
+
private rememberVadPreroll;
|
|
94
|
+
private drainVadPreroll;
|
|
95
|
+
/** Emit a running-partial event and (the first time it has words) a `words` event. */
|
|
96
|
+
protected emitPartial(update: TranscriptUpdate): void;
|
|
97
|
+
private withMetadata;
|
|
98
|
+
private emit;
|
|
99
|
+
private onVadEvent;
|
|
100
|
+
}
|
|
101
|
+
export interface TranscriptMetadataDefaults {
|
|
102
|
+
source?: VoiceInputSource;
|
|
103
|
+
speaker?: VoiceSpeaker;
|
|
104
|
+
turn?: VoiceTurnMetadata;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* True when the loaded fused library has a working streaming ASR decoder
|
|
108
|
+
* (not just the v2 symbols — an ABI-only build exports them but `asrStreamSupported`
|
|
109
|
+
* returns false). This is the gate `createStreamingTranscriber` uses to
|
|
110
|
+
* pick the fused path over the fused-batch interim adapter.
|
|
111
|
+
*/
|
|
112
|
+
export declare function ffiSupportsStreamingAsr(ffi: ElizaInferenceFfi | null | undefined): boolean;
|
|
113
|
+
/**
|
|
114
|
+
* `StreamingTranscriber` over the fused `libelizainference` streaming ASR
|
|
115
|
+
* ABI. Each `feed()` forwards the (resampled) PCM into `asrStreamFeed`;
|
|
116
|
+
* after a feed it reads the running partial via `asrStreamPartial`.
|
|
117
|
+
* `flush()` calls `asrStreamFinish` then re-opens a fresh stream for the
|
|
118
|
+
* next segment. Token ids, when the library returns them, are surfaced in
|
|
119
|
+
* `TranscriptUpdate.tokens` — the fused build shares the text vocabulary
|
|
120
|
+
* (AGENTS.md §1) so they feed STT-finish token injection directly.
|
|
121
|
+
*
|
|
122
|
+
* The C side is owned by W7; until the fused build implements these
|
|
123
|
+
* symbols every call throws (the binding maps `ELIZA_ERR_NOT_IMPLEMENTED`
|
|
124
|
+
* to a `VoiceLifecycleError`). That is intentional — no fake transcripts.
|
|
125
|
+
*/
|
|
126
|
+
export declare class FfiStreamingTranscriber extends BaseStreamingTranscriber {
|
|
127
|
+
private readonly ffi;
|
|
128
|
+
private readonly getContext;
|
|
129
|
+
/** Token count to ask the library for per partial; 0 = don't request tokens. */
|
|
130
|
+
private readonly maxTokens;
|
|
131
|
+
private stream;
|
|
132
|
+
constructor(args: {
|
|
133
|
+
ffi: ElizaInferenceFfi;
|
|
134
|
+
getContext: () => ElizaInferenceContextHandle;
|
|
135
|
+
vad?: VadEventSource;
|
|
136
|
+
metadata?: TranscriptMetadataDefaults;
|
|
137
|
+
source?: VoiceInputSource;
|
|
138
|
+
/** Cap on token ids read back per transcript snapshot. Default 256. */
|
|
139
|
+
maxTokens?: number;
|
|
140
|
+
});
|
|
141
|
+
private ensureStream;
|
|
142
|
+
protected onFrame(frame: PcmFrame): void;
|
|
143
|
+
protected onFlush(): Promise<TranscriptUpdate>;
|
|
144
|
+
protected onDispose(): void;
|
|
145
|
+
}
|
|
146
|
+
export interface FfiBatchTranscriberOptions {
|
|
147
|
+
ffi: ElizaInferenceFfi;
|
|
148
|
+
getContext: () => ElizaInferenceContextHandle;
|
|
149
|
+
vad?: VadEventSource;
|
|
150
|
+
metadata?: TranscriptMetadataDefaults;
|
|
151
|
+
source?: VoiceInputSource;
|
|
152
|
+
/** Sliding-window length, seconds. Each batch decode covers ≤ this + overlap. Default 6.0. */
|
|
153
|
+
windowSeconds?: number;
|
|
154
|
+
/** Trailing overlap kept when committing a prefix chunk, seconds. Default 1.0. */
|
|
155
|
+
overlapSeconds?: number;
|
|
156
|
+
/** Minimum new audio (seconds) accumulated before the next decode pass. Default 1.2. */
|
|
157
|
+
stepSeconds?: number;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Interim streaming-ASR adapter over the fused `libelizainference` **batch**
|
|
161
|
+
* decoder (`eliza_inference_asr_transcribe`, ABI v1). The fused build's true
|
|
162
|
+
* streaming decoder (`eliza_inference_asr_stream_*`, ABI v2) reports unsupported
|
|
163
|
+
* until its runtime lands; this adapter is the contract-clean interim — it runs
|
|
164
|
+
* inside the one shipped llama.cpp/GGML build and emits
|
|
165
|
+
* Gemma token-vocab text, so no second ggml is vendored and no
|
|
166
|
+
* tokenizer-family mismatch is introduced.
|
|
167
|
+
*
|
|
168
|
+
* It runs a *windowed re-transcription with overlap* strategy: a prefix older
|
|
169
|
+
* than `windowSeconds` is committed (decoded once, in window-sized chunks
|
|
170
|
+
* with `overlapSeconds` carry-over) and only the tail window is re-decoded
|
|
171
|
+
* each step. So each `asr_transcribe` call is bounded by `windowSeconds +
|
|
172
|
+
* overlap` of audio (≈6–7 s) — incremental, not "buffer the whole utterance,
|
|
173
|
+
* run one giant batch decode". Decodes run serially on the shared ASR mutex
|
|
174
|
+
* (the fused context's ASR region is single-threaded).
|
|
175
|
+
*
|
|
176
|
+
* Requires `ffi.mmapAcquire(ctx, "asr")` to have been called on `getContext()`
|
|
177
|
+
* — the `EngineVoiceBridge` lifecycle does this when voice input is armed.
|
|
178
|
+
*/
|
|
179
|
+
export declare class FfiBatchTranscriber extends BaseStreamingTranscriber {
|
|
180
|
+
private readonly ffi;
|
|
181
|
+
private readonly getContext;
|
|
182
|
+
private readonly windowSamples;
|
|
183
|
+
private readonly overlapSamples;
|
|
184
|
+
private readonly stepSamples;
|
|
185
|
+
/** All 16 kHz samples accumulated for the current speech segment. */
|
|
186
|
+
private buf;
|
|
187
|
+
/** Samples in `buf` already folded into `committed`. */
|
|
188
|
+
private committedSamples;
|
|
189
|
+
/** Text decoded from `buf[0 .. committedSamples)`. */
|
|
190
|
+
private committed;
|
|
191
|
+
/** `buf.length` at the last decode pass — throttles to `stepSamples`. */
|
|
192
|
+
private lastDecodeAt;
|
|
193
|
+
/** Decode chain — `asr_transcribe` calls serialize on the native ASR mutex anyway. */
|
|
194
|
+
private decodeChain;
|
|
195
|
+
constructor(opts: FfiBatchTranscriberOptions);
|
|
196
|
+
private decodeWindow;
|
|
197
|
+
protected onFrame(frame: PcmFrame): void;
|
|
198
|
+
protected onFlush(): Promise<TranscriptUpdate>;
|
|
199
|
+
protected onDispose(): void;
|
|
200
|
+
private resetSegment;
|
|
201
|
+
private scheduleDecode;
|
|
202
|
+
private runDecode;
|
|
203
|
+
}
|
|
204
|
+
export interface CreateStreamingTranscriberOptions {
|
|
205
|
+
/** Fused FFI handle (when a `libelizainference` build is loaded), else null. */
|
|
206
|
+
ffi?: ElizaInferenceFfi | null;
|
|
207
|
+
/** Provider for the fused context pointer (the bridge owns the lazy create). */
|
|
208
|
+
getContext?: () => ElizaInferenceContextHandle;
|
|
209
|
+
/**
|
|
210
|
+
* Whether a bundled ASR model directory is present. The fused path is
|
|
211
|
+
* only chosen when this is true AND the library advertises streaming
|
|
212
|
+
* ASR.
|
|
213
|
+
*/
|
|
214
|
+
asrBundlePresent?: boolean;
|
|
215
|
+
/** VAD event stream to gate decoding (W1). */
|
|
216
|
+
vad?: VadEventSource;
|
|
217
|
+
/** Optional attribution metadata stamped onto emitted transcript updates. */
|
|
218
|
+
metadata?: TranscriptMetadataDefaults;
|
|
219
|
+
/** Convenience shorthand for `metadata.source`. */
|
|
220
|
+
source?: VoiceInputSource;
|
|
221
|
+
/** Fused-batch-interim window/step overrides (see `FfiBatchTranscriber`). */
|
|
222
|
+
ffiBatch?: Omit<FfiBatchTranscriberOptions, "ffi" | "getContext">;
|
|
223
|
+
/**
|
|
224
|
+
* Force a specific fused backend.
|
|
225
|
+
* `"fused"` → fused streaming ASR only (throws if unavailable),
|
|
226
|
+
* `"ffi-batch"` → fused batch (interim) only (throws if unavailable),
|
|
227
|
+
* `"auto"` (default) → fused streaming → fused batch → throw.
|
|
228
|
+
*/
|
|
229
|
+
prefer?: AsrBackendPreference;
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Resolve the fused ASR adapter chain:
|
|
233
|
+
* 1. fused streaming ASR (`eliza_inference_asr_stream_*`, ABI v2 — the FINAL
|
|
234
|
+
* path, W7),
|
|
235
|
+
* 2. fused batch (interim) — windowed `eliza_inference_asr_transcribe` (ABI
|
|
236
|
+
* v1); contract-clean (one ggml, shared text vocab) and available now.
|
|
237
|
+
*
|
|
238
|
+
* The fused `libelizainference` build is the SOLE on-device ASR runtime. There
|
|
239
|
+
* is no whisper.cpp (or other second-runtime) fallback: if no fused decoder is
|
|
240
|
+
* available the caller gets a hard, actionable failure (AGENTS.md §3 + §9) —
|
|
241
|
+
* never a silent empty transcript.
|
|
242
|
+
*/
|
|
243
|
+
export declare function createStreamingTranscriber(opts?: CreateStreamingTranscriberOptions): StreamingTranscriber;
|
|
244
|
+
//# sourceMappingURL=transcriber.d.ts.map
|