@elizaos/plugin-local-inference 2.0.0-beta.1 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/dist/actions/generate-media.d.ts +59 -0
- package/dist/actions/generate-media.d.ts.map +1 -0
- package/dist/actions/identify-speaker.d.ts +23 -0
- package/dist/actions/identify-speaker.d.ts.map +1 -0
- package/dist/actions/transcription-control.d.ts +29 -0
- package/dist/actions/transcription-control.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/environment.d.ts +12 -0
- package/dist/adapters/capacitor-llama/environment.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts +9 -0
- package/dist/adapters/capacitor-llama/index.browser.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/index.d.ts +18 -0
- package/dist/adapters/capacitor-llama/index.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/loader.d.ts +35 -0
- package/dist/adapters/capacitor-llama/loader.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts +70 -0
- package/dist/adapters/capacitor-llama/native-voice-capture.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts +62 -0
- package/dist/adapters/capacitor-llama/structured-output.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts +24 -0
- package/dist/adapters/capacitor-llama/text-streaming.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/types.d.ts +338 -0
- package/dist/adapters/capacitor-llama/types.d.ts.map +1 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts +86 -0
- package/dist/adapters/capacitor-llama/voice-turn.d.ts.map +1 -0
- package/dist/backends/apple-foundation.d.ts +56 -0
- package/dist/backends/apple-foundation.d.ts.map +1 -0
- package/dist/index.d.ts +8 -37
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +38979 -430
- package/dist/index.js.map +217 -0
- package/dist/local-inference-routes.d.ts +47 -0
- package/dist/local-inference-routes.d.ts.map +1 -0
- package/dist/provider.d.ts +21 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/routes/compat-helpers.d.ts +18 -0
- package/dist/routes/compat-helpers.d.ts.map +1 -0
- package/dist/routes/family-member-route.d.ts +62 -0
- package/dist/routes/family-member-route.d.ts.map +1 -0
- package/dist/routes/index.d.ts +20 -0
- package/dist/routes/index.d.ts.map +1 -0
- package/dist/routes/index.js +42040 -0
- package/dist/routes/index.js.map +236 -0
- package/dist/routes/live-diarization-route.d.ts +33 -0
- package/dist/routes/live-diarization-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-route.d.ts +4 -0
- package/dist/routes/local-inference-asr-route.d.ts.map +1 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts +20 -0
- package/dist/routes/local-inference-asr-transcribe.d.ts.map +1 -0
- package/dist/routes/local-inference-compat-routes.d.ts +16 -0
- package/dist/routes/local-inference-compat-routes.d.ts.map +1 -0
- package/dist/routes/local-inference-tts-route.d.ts +7 -0
- package/dist/routes/local-inference-tts-route.d.ts.map +1 -0
- package/dist/routes/native-pcm-turn-route.d.ts +3 -0
- package/dist/routes/native-pcm-turn-route.d.ts.map +1 -0
- package/dist/routes/transcript-audio-store.d.ts +15 -0
- package/dist/routes/transcript-audio-store.d.ts.map +1 -0
- package/dist/routes/transcripts-routes.d.ts +44 -0
- package/dist/routes/transcripts-routes.d.ts.map +1 -0
- package/dist/routes/voice-first-run-routes.d.ts +62 -0
- package/dist/routes/voice-first-run-routes.d.ts.map +1 -0
- package/dist/routes/voice-models-routes.d.ts +62 -0
- package/dist/routes/voice-models-routes.d.ts.map +1 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts +19 -0
- package/dist/routes/voice-profile-plugin-routes.d.ts.map +1 -0
- package/dist/routes/voice-profiles-management-routes.d.ts +52 -0
- package/dist/routes/voice-profiles-management-routes.d.ts.map +1 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts +57 -0
- package/dist/routes/voice-speaker-profile-routes.d.ts.map +1 -0
- package/dist/runtime/embedding-manager-support.d.ts +77 -0
- package/dist/runtime/embedding-manager-support.d.ts.map +1 -0
- package/dist/runtime/embedding-presets.d.ts +16 -0
- package/dist/runtime/embedding-presets.d.ts.map +1 -0
- package/dist/runtime/embedding-warmup-policy.d.ts +14 -0
- package/dist/runtime/embedding-warmup-policy.d.ts.map +1 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts +70 -0
- package/dist/runtime/ensure-local-inference-handler.d.ts.map +1 -0
- package/dist/runtime/index.d.ts +15 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +38768 -0
- package/dist/runtime/index.js.map +217 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts +63 -0
- package/dist/runtime/mobile-local-inference-gate.d.ts.map +1 -0
- package/dist/runtime/voice-entity-binding.d.ts +113 -0
- package/dist/runtime/voice-entity-binding.d.ts.map +1 -0
- package/dist/services/active-model.d.ts +310 -0
- package/dist/services/active-model.d.ts.map +1 -0
- package/dist/services/asr-provenance.d.ts +5 -0
- package/dist/services/asr-provenance.d.ts.map +1 -0
- package/dist/services/assignments.d.ts +84 -0
- package/dist/services/assignments.d.ts.map +1 -0
- package/dist/services/backend-selector.d.ts +55 -0
- package/dist/services/backend-selector.d.ts.map +1 -0
- package/dist/services/backend.d.ts +440 -0
- package/dist/services/backend.d.ts.map +1 -0
- package/dist/services/bionic-host-loader.d.ts +67 -0
- package/dist/services/bionic-host-loader.d.ts.map +1 -0
- package/dist/services/bundled-models.d.ts +34 -0
- package/dist/services/bundled-models.d.ts.map +1 -0
- package/dist/services/cache-bridge.d.ts +206 -0
- package/dist/services/cache-bridge.d.ts.map +1 -0
- package/dist/services/catalog.d.ts +10 -0
- package/dist/services/catalog.d.ts.map +1 -0
- package/dist/services/checkpoint-client.d.ts +109 -0
- package/dist/services/checkpoint-client.d.ts.map +1 -0
- package/dist/services/checkpoint-manager.d.ts +217 -0
- package/dist/services/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/cloud-fallback.d.ts +102 -0
- package/dist/services/cloud-fallback.d.ts.map +1 -0
- package/dist/services/context-fit.d.ts +36 -0
- package/dist/services/context-fit.d.ts.map +1 -0
- package/dist/services/conversation-registry.d.ts +142 -0
- package/dist/services/conversation-registry.d.ts.map +1 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts +111 -0
- package/dist/services/desktop-fused-ffi-backend-runtime.d.ts.map +1 -0
- package/dist/services/device-bridge.d.ts +188 -0
- package/dist/services/device-bridge.d.ts.map +1 -0
- package/dist/services/device-resource-metrics.d.ts +149 -0
- package/dist/services/device-resource-metrics.d.ts.map +1 -0
- package/dist/services/device-tier.d.ts +133 -0
- package/dist/services/device-tier.d.ts.map +1 -0
- package/dist/services/downloader.d.ts +94 -0
- package/dist/services/downloader.d.ts.map +1 -0
- package/dist/services/engine.d.ts +579 -0
- package/dist/services/engine.d.ts.map +1 -0
- package/dist/services/ensure-local-artifacts.d.ts +82 -0
- package/dist/services/ensure-local-artifacts.d.ts.map +1 -0
- package/dist/services/external-scanner.d.ts +17 -0
- package/dist/services/external-scanner.d.ts.map +1 -0
- package/dist/services/ffi-llm-mock.d.ts +90 -0
- package/dist/services/ffi-llm-mock.d.ts.map +1 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts +318 -0
- package/dist/services/ffi-llm-streaming-abi.d.ts.map +1 -0
- package/dist/services/ffi-streaming-backend.d.ts +201 -0
- package/dist/services/ffi-streaming-backend.d.ts.map +1 -0
- package/dist/services/ffi-streaming-runner.d.ts +146 -0
- package/dist/services/ffi-streaming-runner.d.ts.map +1 -0
- package/dist/services/gpu-autotune.d.ts +150 -0
- package/dist/services/gpu-autotune.d.ts.map +1 -0
- package/dist/services/gpu-detect.d.ts +56 -0
- package/dist/services/gpu-detect.d.ts.map +1 -0
- package/dist/services/handler-registry.d.ts +72 -0
- package/dist/services/handler-registry.d.ts.map +1 -0
- package/dist/services/hardware.d.ts +63 -0
- package/dist/services/hardware.d.ts.map +1 -0
- package/dist/services/image-description-runtime.d.ts +14 -0
- package/dist/services/image-description-runtime.d.ts.map +1 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts +134 -0
- package/dist/services/imagegen/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/backend-selector.d.ts +118 -0
- package/dist/services/imagegen/backend-selector.d.ts.map +1 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts +105 -0
- package/dist/services/imagegen/coreml-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/errors.d.ts +16 -0
- package/dist/services/imagegen/errors.d.ts.map +1 -0
- package/dist/services/imagegen/index.d.ts +58 -0
- package/dist/services/imagegen/index.d.ts.map +1 -0
- package/dist/services/imagegen/mflux.d.ts +74 -0
- package/dist/services/imagegen/mflux.d.ts.map +1 -0
- package/dist/services/imagegen/sd-cpp.d.ts +181 -0
- package/dist/services/imagegen/sd-cpp.d.ts.map +1 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts +83 -0
- package/dist/services/imagegen/tensorrt-unavailable.d.ts.map +1 -0
- package/dist/services/imagegen/types.d.ts +181 -0
- package/dist/services/imagegen/types.d.ts.map +1 -0
- package/dist/services/index.d.ts +31 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/index.js +39453 -0
- package/dist/services/index.js.map +227 -0
- package/dist/services/inference-capabilities.d.ts +132 -0
- package/dist/services/inference-capabilities.d.ts.map +1 -0
- package/dist/services/inference-telemetry.d.ts +59 -0
- package/dist/services/inference-telemetry.d.ts.map +1 -0
- package/dist/services/ios-llama-streaming.d.ts +119 -0
- package/dist/services/ios-llama-streaming.d.ts.map +1 -0
- package/dist/services/kv-spill.d.ts +189 -0
- package/dist/services/kv-spill.d.ts.map +1 -0
- package/dist/services/latency-trace.d.ts +346 -0
- package/dist/services/latency-trace.d.ts.map +1 -0
- package/dist/services/lib-target.d.ts +55 -0
- package/dist/services/lib-target.d.ts.map +1 -0
- package/dist/services/live-signals.d.ts +86 -0
- package/dist/services/live-signals.d.ts.map +1 -0
- package/dist/services/llama-server-metrics.d.ts +114 -0
- package/dist/services/llama-server-metrics.d.ts.map +1 -0
- package/dist/services/llm-streaming-binding.d.ts +96 -0
- package/dist/services/llm-streaming-binding.d.ts.map +1 -0
- package/dist/services/load-args.d.ts +82 -0
- package/dist/services/load-args.d.ts.map +1 -0
- package/dist/services/manifest/index.d.ts +4 -0
- package/dist/services/manifest/index.d.ts.map +1 -0
- package/dist/services/manifest/schema.d.ts +903 -0
- package/dist/services/manifest/schema.d.ts.map +1 -0
- package/dist/services/manifest/types.d.ts +32 -0
- package/dist/services/manifest/types.d.ts.map +1 -0
- package/dist/services/manifest/validator.d.ts +66 -0
- package/dist/services/manifest/validator.d.ts.map +1 -0
- package/dist/services/memory-arbiter.d.ts +348 -0
- package/dist/services/memory-arbiter.d.ts.map +1 -0
- package/dist/services/memory-benchmark.d.ts +76 -0
- package/dist/services/memory-benchmark.d.ts.map +1 -0
- package/dist/services/memory-monitor.d.ts +128 -0
- package/dist/services/memory-monitor.d.ts.map +1 -0
- package/dist/services/memory-pressure.d.ts +130 -0
- package/dist/services/memory-pressure.d.ts.map +1 -0
- package/dist/services/mtp-doctor.d.ts +13 -0
- package/dist/services/mtp-doctor.d.ts.map +1 -0
- package/dist/services/network-policy.d.ts +127 -0
- package/dist/services/network-policy.d.ts.map +1 -0
- package/dist/services/paths.d.ts +6 -0
- package/dist/services/paths.d.ts.map +1 -0
- package/dist/services/planner-skeleton.d.ts +124 -0
- package/dist/services/planner-skeleton.d.ts.map +1 -0
- package/dist/services/providers.d.ts +38 -0
- package/dist/services/providers.d.ts.map +1 -0
- package/dist/services/ram-budget.d.ts +110 -0
- package/dist/services/ram-budget.d.ts.map +1 -0
- package/dist/services/readiness.d.ts +9 -0
- package/dist/services/readiness.d.ts.map +1 -0
- package/dist/services/recommendation.d.ts +111 -0
- package/dist/services/recommendation.d.ts.map +1 -0
- package/dist/services/registry.d.ts +33 -0
- package/dist/services/registry.d.ts.map +1 -0
- package/dist/services/router-handler.d.ts +92 -0
- package/dist/services/router-handler.d.ts.map +1 -0
- package/dist/services/routing-policy.d.ts +92 -0
- package/dist/services/routing-policy.d.ts.map +1 -0
- package/dist/services/routing-preferences.d.ts +8 -0
- package/dist/services/routing-preferences.d.ts.map +1 -0
- package/dist/services/runtime-target.d.ts +98 -0
- package/dist/services/runtime-target.d.ts.map +1 -0
- package/dist/services/service.d.ts +128 -0
- package/dist/services/service.d.ts.map +1 -0
- package/dist/services/session-pool.d.ts +72 -0
- package/dist/services/session-pool.d.ts.map +1 -0
- package/dist/services/structured-output/deterministic-repair.d.ts +23 -0
- package/dist/services/structured-output/deterministic-repair.d.ts.map +1 -0
- package/dist/services/structured-output/index.d.ts +2 -0
- package/dist/services/structured-output/index.d.ts.map +1 -0
- package/dist/services/structured-output.d.ts +311 -0
- package/dist/services/structured-output.d.ts.map +1 -0
- package/dist/services/system-memory.d.ts +33 -0
- package/dist/services/system-memory.d.ts.map +1 -0
- package/dist/services/types.d.ts +19 -0
- package/dist/services/types.d.ts.map +1 -0
- package/dist/services/verify-on-device.d.ts +34 -0
- package/dist/services/verify-on-device.d.ts.map +1 -0
- package/dist/services/verify.d.ts +8 -0
- package/dist/services/verify.d.ts.map +1 -0
- package/dist/services/vision/aosp-unavailable.d.ts +115 -0
- package/dist/services/vision/aosp-unavailable.d.ts.map +1 -0
- package/dist/services/vision/capacitor-llama.d.ts +99 -0
- package/dist/services/vision/capacitor-llama.d.ts.map +1 -0
- package/dist/services/vision/cloud-fallback.d.ts +47 -0
- package/dist/services/vision/cloud-fallback.d.ts.map +1 -0
- package/dist/services/vision/hash.d.ts +71 -0
- package/dist/services/vision/hash.d.ts.map +1 -0
- package/dist/services/vision/index.d.ts +95 -0
- package/dist/services/vision/index.d.ts.map +1 -0
- package/dist/services/vision/llama-server.d.ts +73 -0
- package/dist/services/vision/llama-server.d.ts.map +1 -0
- package/dist/services/vision/types.d.ts +162 -0
- package/dist/services/vision/types.d.ts.map +1 -0
- package/dist/services/vision/vast-fallback.d.ts +18 -0
- package/dist/services/vision/vast-fallback.d.ts.map +1 -0
- package/dist/services/vision-embedding-cache.d.ts +98 -0
- package/dist/services/vision-embedding-cache.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts +27 -0
- package/dist/services/voice/__test-helpers__/fake-ffi.d.ts.map +1 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts +66 -0
- package/dist/services/voice/__test-helpers__/synthetic-speech.d.ts.map +1 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts +61 -0
- package/dist/services/voice/acoustic-speaker-attribution.d.ts.map +1 -0
- package/dist/services/voice/audio-frame-consumer.d.ts +294 -0
- package/dist/services/voice/audio-frame-consumer.d.ts.map +1 -0
- package/dist/services/voice/barge-in.d.ts +112 -0
- package/dist/services/voice/barge-in.d.ts.map +1 -0
- package/dist/services/voice/cancellation-coordinator.d.ts +127 -0
- package/dist/services/voice/cancellation-coordinator.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-manager.d.ts +199 -0
- package/dist/services/voice/checkpoint-manager.d.ts.map +1 -0
- package/dist/services/voice/checkpoint-policy.d.ts +178 -0
- package/dist/services/voice/checkpoint-policy.d.ts.map +1 -0
- package/dist/services/voice/corpus-augment.d.ts +111 -0
- package/dist/services/voice/corpus-augment.d.ts.map +1 -0
- package/dist/services/voice/corpus-generator.d.ts +134 -0
- package/dist/services/voice/corpus-generator.d.ts.map +1 -0
- package/dist/services/voice/diarization-error-rate.d.ts +40 -0
- package/dist/services/voice/diarization-error-rate.d.ts.map +1 -0
- package/dist/services/voice/e2e-harness.d.ts +297 -0
- package/dist/services/voice/e2e-harness.d.ts.map +1 -0
- package/dist/services/voice/eager-context-builder.d.ts +170 -0
- package/dist/services/voice/eager-context-builder.d.ts.map +1 -0
- package/dist/services/voice/echo-delay.d.ts +67 -0
- package/dist/services/voice/echo-delay.d.ts.map +1 -0
- package/dist/services/voice/echo-metrics.d.ts +7 -0
- package/dist/services/voice/echo-metrics.d.ts.map +1 -0
- package/dist/services/voice/echo-reference-buffer.d.ts +65 -0
- package/dist/services/voice/echo-reference-buffer.d.ts.map +1 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts +124 -0
- package/dist/services/voice/eliza1-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/embedding-server.d.ts +37 -0
- package/dist/services/voice/embedding-server.d.ts.map +1 -0
- package/dist/services/voice/embedding.d.ts +132 -0
- package/dist/services/voice/embedding.d.ts.map +1 -0
- package/dist/services/voice/emotion-attribution.d.ts +68 -0
- package/dist/services/voice/emotion-attribution.d.ts.map +1 -0
- package/dist/services/voice/engine-bridge.d.ts +762 -0
- package/dist/services/voice/engine-bridge.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts +179 -0
- package/dist/services/voice/eot-classifier-ggml.d.ts.map +1 -0
- package/dist/services/voice/eot-classifier.d.ts +211 -0
- package/dist/services/voice/eot-classifier.d.ts.map +1 -0
- package/dist/services/voice/errors.d.ts +20 -0
- package/dist/services/voice/errors.d.ts.map +1 -0
- package/dist/services/voice/expressive-tags.d.ts +158 -0
- package/dist/services/voice/expressive-tags.d.ts.map +1 -0
- package/dist/services/voice/ffi-bindings.d.ts +696 -0
- package/dist/services/voice/ffi-bindings.d.ts.map +1 -0
- package/dist/services/voice/first-line-cache.d.ts +181 -0
- package/dist/services/voice/first-line-cache.d.ts.map +1 -0
- package/dist/services/voice/fused-eot-scorer.d.ts +51 -0
- package/dist/services/voice/fused-eot-scorer.d.ts.map +1 -0
- package/dist/services/voice/index.d.ts +96 -0
- package/dist/services/voice/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/index.d.ts +24 -0
- package/dist/services/voice/kokoro/index.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts +87 -0
- package/dist/services/voice/kokoro/kokoro-backend.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts +58 -0
- package/dist/services/voice/kokoro/kokoro-engine-discovery.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts +75 -0
- package/dist/services/voice/kokoro/kokoro-ffi-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts +100 -0
- package/dist/services/voice/kokoro/kokoro-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts +51 -0
- package/dist/services/voice/kokoro/phoneme-stream.d.ts.map +1 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts +50 -0
- package/dist/services/voice/kokoro/phonemizer.d.ts.map +1 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts +61 -0
- package/dist/services/voice/kokoro/pick-runtime.d.ts.map +1 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts +31 -0
- package/dist/services/voice/kokoro/runtime-selection.d.ts.map +1 -0
- package/dist/services/voice/kokoro/types.d.ts +82 -0
- package/dist/services/voice/kokoro/types.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts +23 -0
- package/dist/services/voice/kokoro/voice-presets.d.ts.map +1 -0
- package/dist/services/voice/kokoro/voices.d.ts +30 -0
- package/dist/services/voice/kokoro/voices.d.ts.map +1 -0
- package/dist/services/voice/lifecycle.d.ts +135 -0
- package/dist/services/voice/lifecycle.d.ts.map +1 -0
- package/dist/services/voice/live-diarization-session.d.ts +196 -0
- package/dist/services/voice/live-diarization-session.d.ts.map +1 -0
- package/dist/services/voice/metric-math.d.ts +10 -0
- package/dist/services/voice/metric-math.d.ts.map +1 -0
- package/dist/services/voice/mic-source.d.ts +136 -0
- package/dist/services/voice/mic-source.d.ts.map +1 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts +137 -0
- package/dist/services/voice/nlms-echo-canceller.d.ts.map +1 -0
- package/dist/services/voice/optimistic-policy.d.ts +109 -0
- package/dist/services/voice/optimistic-policy.d.ts.map +1 -0
- package/dist/services/voice/optimistic-rollback.d.ts +151 -0
- package/dist/services/voice/optimistic-rollback.d.ts.map +1 -0
- package/dist/services/voice/partial-stabilizer.d.ts +73 -0
- package/dist/services/voice/partial-stabilizer.d.ts.map +1 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts +49 -0
- package/dist/services/voice/phoneme-tokenizer.d.ts.map +1 -0
- package/dist/services/voice/phrase-cache.d.ts +76 -0
- package/dist/services/voice/phrase-cache.d.ts.map +1 -0
- package/dist/services/voice/phrase-chunker.d.ts +62 -0
- package/dist/services/voice/phrase-chunker.d.ts.map +1 -0
- package/dist/services/voice/pipeline-impls.d.ts +151 -0
- package/dist/services/voice/pipeline-impls.d.ts.map +1 -0
- package/dist/services/voice/pipeline.d.ts +216 -0
- package/dist/services/voice/pipeline.d.ts.map +1 -0
- package/dist/services/voice/prefill-client.d.ts +123 -0
- package/dist/services/voice/prefill-client.d.ts.map +1 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts +113 -0
- package/dist/services/voice/prefix-preserving-queue.d.ts.map +1 -0
- package/dist/services/voice/profile-store.d.ts +248 -0
- package/dist/services/voice/profile-store.d.ts.map +1 -0
- package/dist/services/voice/ring-buffer.d.ts +40 -0
- package/dist/services/voice/ring-buffer.d.ts.map +1 -0
- package/dist/services/voice/rollback-queue.d.ts +24 -0
- package/dist/services/voice/rollback-queue.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts +67 -0
- package/dist/services/voice/samantha-preset-placeholder.d.ts.map +1 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts +87 -0
- package/dist/services/voice/samantha-preset-regenerator.d.ts.map +1 -0
- package/dist/services/voice/scheduler.d.ts +146 -0
- package/dist/services/voice/scheduler.d.ts.map +1 -0
- package/dist/services/voice/self-voice-imprint.d.ts +33 -0
- package/dist/services/voice/self-voice-imprint.d.ts.map +1 -0
- package/dist/services/voice/shared-resources.d.ts +204 -0
- package/dist/services/voice/shared-resources.d.ts.map +1 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts +74 -0
- package/dist/services/voice/speaker/attribution-pipeline.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts +59 -0
- package/dist/services/voice/speaker/diarizer-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/diarizer.d.ts +75 -0
- package/dist/services/voice/speaker/diarizer.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts +60 -0
- package/dist/services/voice/speaker/encoder-fused.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts +33 -0
- package/dist/services/voice/speaker/encoder-ggml.d.ts.map +1 -0
- package/dist/services/voice/speaker/encoder.d.ts +37 -0
- package/dist/services/voice/speaker/encoder.d.ts.map +1 -0
- package/dist/services/voice/speaker-imprint.d.ts +83 -0
- package/dist/services/voice/speaker-imprint.d.ts.map +1 -0
- package/dist/services/voice/speaker-preset-cache.d.ts +77 -0
- package/dist/services/voice/speaker-preset-cache.d.ts.map +1 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts +160 -0
- package/dist/services/voice/streaming-asr/streaming-pipeline-adapter.d.ts.map +1 -0
- package/dist/services/voice/system-audio-sink.d.ts +73 -0
- package/dist/services/voice/system-audio-sink.d.ts.map +1 -0
- package/dist/services/voice/transcriber.d.ts +244 -0
- package/dist/services/voice/transcriber.d.ts.map +1 -0
- package/dist/services/voice/transcript-knowledge.d.ts +37 -0
- package/dist/services/voice/transcript-knowledge.d.ts.map +1 -0
- package/dist/services/voice/transcript-service.d.ts +60 -0
- package/dist/services/voice/transcript-service.d.ts.map +1 -0
- package/dist/services/voice/transcript-store.d.ts +64 -0
- package/dist/services/voice/transcript-store.d.ts.map +1 -0
- package/dist/services/voice/turn-controller.d.ts +183 -0
- package/dist/services/voice/turn-controller.d.ts.map +1 -0
- package/dist/services/voice/types.d.ts +643 -0
- package/dist/services/voice/types.d.ts.map +1 -0
- package/dist/services/voice/vad.d.ts +283 -0
- package/dist/services/voice/vad.d.ts.map +1 -0
- package/dist/services/voice/voice-budget.d.ts +241 -0
- package/dist/services/voice/voice-budget.d.ts.map +1 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts +95 -0
- package/dist/services/voice/voice-emotion-classifier.d.ts.map +1 -0
- package/dist/services/voice/voice-preload-predictor.d.ts +76 -0
- package/dist/services/voice/voice-preload-predictor.d.ts.map +1 -0
- package/dist/services/voice/voice-preset-format.d.ts +158 -0
- package/dist/services/voice/voice-preset-format.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-artifact.d.ts +116 -0
- package/dist/services/voice/voice-profile-artifact.d.ts.map +1 -0
- package/dist/services/voice/voice-profile-routes.d.ts +83 -0
- package/dist/services/voice/voice-profile-routes.d.ts.map +1 -0
- package/dist/services/voice/voice-scenario.d.ts +131 -0
- package/dist/services/voice/voice-scenario.d.ts.map +1 -0
- package/dist/services/voice/voice-state-machine.d.ts +364 -0
- package/dist/services/voice/voice-state-machine.d.ts.map +1 -0
- package/dist/services/voice/voice-workbench-report.d.ts +117 -0
- package/dist/services/voice/voice-workbench-report.d.ts.map +1 -0
- package/dist/services/voice/wake-word-ggml.d.ts +100 -0
- package/dist/services/voice/wake-word-ggml.d.ts.map +1 -0
- package/dist/services/voice/wake-word.d.ts +255 -0
- package/dist/services/voice/wake-word.d.ts.map +1 -0
- package/dist/services/voice/wav-codec.d.ts +11 -0
- package/dist/services/voice/wav-codec.d.ts.map +1 -0
- package/dist/services/voice/workbench-entrypoint.d.ts +42 -0
- package/dist/services/voice/workbench-entrypoint.d.ts.map +1 -0
- package/dist/services/voice/workbench-headless-runner.d.ts +102 -0
- package/dist/services/voice/workbench-headless-runner.d.ts.map +1 -0
- package/dist/services/voice/workbench-logic-services.d.ts +36 -0
- package/dist/services/voice/workbench-logic-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-real-services.d.ts +17 -0
- package/dist/services/voice/workbench-real-services.d.ts.map +1 -0
- package/dist/services/voice/workbench-scenarios.d.ts +24 -0
- package/dist/services/voice/workbench-scenarios.d.ts.map +1 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts +70 -0
- package/dist/services/voice/wrap-with-first-line-cache.d.ts.map +1 -0
- package/dist/services/voice-model-updater.d.ts +240 -0
- package/dist/services/voice-model-updater.d.ts.map +1 -0
- package/dist/services/voice-prewarm.d.ts +3 -0
- package/dist/services/voice-prewarm.d.ts.map +1 -0
- package/dist/voice-workbench.d.ts +18 -0
- package/dist/voice-workbench.d.ts.map +1 -0
- package/dist/voice-workbench.js +5259 -0
- package/dist/voice-workbench.js.map +34 -0
- package/package.json +101 -15
- package/registry-entry.json +137 -0
- package/src/actions/generate-media.ts +647 -0
- package/src/actions/identify-speaker.ts +171 -0
- package/src/actions/transcription-control.test.ts +100 -0
- package/src/actions/transcription-control.ts +127 -0
- package/src/adapters/capacitor-llama/__tests__/compat-behavior.test.ts +218 -0
- package/src/adapters/capacitor-llama/__tests__/index.test.ts +68 -0
- package/src/adapters/capacitor-llama/__tests__/structured-output.test.ts +215 -0
- package/src/adapters/capacitor-llama/__tests__/text-streaming.test.ts +174 -0
- package/src/adapters/capacitor-llama/__tests__/voice-turn.test.ts +293 -0
- package/src/adapters/capacitor-llama/environment.ts +71 -0
- package/src/adapters/capacitor-llama/index.browser.ts +83 -0
- package/src/adapters/capacitor-llama/index.ts +831 -0
- package/src/adapters/capacitor-llama/loader.ts +109 -0
- package/src/adapters/capacitor-llama/native-voice-capture.ts +140 -0
- package/src/adapters/capacitor-llama/structured-output.ts +165 -0
- package/src/adapters/capacitor-llama/text-streaming.ts +227 -0
- package/src/adapters/capacitor-llama/types.ts +374 -0
- package/src/adapters/capacitor-llama/voice-turn.ts +178 -0
- package/src/backends/apple-foundation.ts +127 -0
- package/src/index.ts +62 -0
- package/src/local-inference-routes.test.ts +390 -0
- package/src/local-inference-routes.ts +1625 -0
- package/src/provider.ts +1111 -0
- package/src/routes/compat-helpers.ts +275 -0
- package/src/routes/family-member-route.ts +353 -0
- package/src/routes/index.ts +61 -0
- package/src/routes/live-diarization-route.test.ts +347 -0
- package/src/routes/live-diarization-route.ts +198 -0
- package/src/routes/local-inference-asr-route.test.ts +246 -0
- package/src/routes/local-inference-asr-route.ts +166 -0
- package/src/routes/local-inference-asr-transcribe.test.ts +118 -0
- package/src/routes/local-inference-asr-transcribe.ts +97 -0
- package/src/routes/local-inference-compat-routes.test.ts +485 -0
- package/src/routes/local-inference-compat-routes.ts +775 -0
- package/src/routes/local-inference-tts-route.test.ts +179 -0
- package/src/routes/local-inference-tts-route.ts +230 -0
- package/src/routes/native-pcm-turn-route.test.ts +136 -0
- package/src/routes/native-pcm-turn-route.ts +121 -0
- package/src/routes/transcript-audio-store.ts +27 -0
- package/src/routes/transcripts-routes.test.ts +195 -0
- package/src/routes/transcripts-routes.ts +191 -0
- package/src/routes/voice-first-run-routes.ts +524 -0
- package/src/routes/voice-models-routes.ts +554 -0
- package/src/routes/voice-profile-plugin-routes.ts +138 -0
- package/src/routes/voice-profiles-management-routes.ts +476 -0
- package/src/routes/voice-speaker-profile-routes.ts +199 -0
- package/src/runtime/aosp-llama-loader-selection.test.ts +80 -0
- package/src/runtime/bionic-wire-encoding.test.ts +147 -0
- package/src/runtime/capacitor-llama.d.ts +25 -0
- package/src/runtime/embedding-manager-support.ts +497 -0
- package/src/runtime/embedding-presets.ts +81 -0
- package/src/runtime/embedding-warmup-policy.test.ts +53 -0
- package/src/runtime/embedding-warmup-policy.ts +48 -0
- package/src/runtime/ensure-local-inference-handler.test.ts +726 -0
- package/src/runtime/ensure-local-inference-handler.ts +1640 -0
- package/src/runtime/index.ts +36 -0
- package/src/runtime/mobile-local-inference-gate.test.ts +152 -0
- package/src/runtime/mobile-local-inference-gate.ts +99 -0
- package/src/runtime/voice-entity-binding.transcript.test.ts +98 -0
- package/src/runtime/voice-entity-binding.ts +368 -0
- package/src/runtime/voice-speaker-entity-contract.test.ts +149 -0
- package/src/services/README.md +71 -0
- package/src/services/__tests__/backend-selector.precedence.test.ts +333 -0
- package/src/services/__tests__/backend-selector.test.ts +101 -0
- package/src/services/__tests__/checkpoint-manager.test.ts +376 -0
- package/src/services/__tests__/gpu-autotune.test.ts +400 -0
- package/src/services/__tests__/llm-streaming-binding.test.ts +85 -0
- package/src/services/__tests__/planner-grammar.test.ts +372 -0
- package/src/services/__tests__/runtime-target.test.ts +176 -0
- package/src/services/active-model-context-fit.test.ts +125 -0
- package/src/services/active-model-switch-rollback.test.ts +183 -0
- package/src/services/active-model.ts +1416 -0
- package/src/services/asr-provenance.ts +68 -0
- package/src/services/assignment-validation.test.ts +118 -0
- package/src/services/assignments.test.ts +106 -0
- package/src/services/assignments.ts +278 -0
- package/src/services/backend-selector.ts +95 -0
- package/src/services/backend.test.ts +84 -0
- package/src/services/backend.ts +791 -0
- package/src/services/bionic-host-loader.test.ts +226 -0
- package/src/services/bionic-host-loader.ts +252 -0
- package/src/services/bundled-models.ts +129 -0
- package/src/services/cache-bridge.test.ts +516 -0
- package/src/services/cache-bridge.ts +423 -0
- package/src/services/catalog.test.ts +259 -0
- package/src/services/catalog.ts +33 -0
- package/src/services/checkpoint-client.ts +258 -0
- package/src/services/checkpoint-manager.ts +474 -0
- package/src/services/cloud-fallback.ts +230 -0
- package/src/services/context-fit.test.ts +121 -0
- package/src/services/context-fit.ts +113 -0
- package/src/services/conversation-registry.test.ts +235 -0
- package/src/services/conversation-registry.ts +264 -0
- package/src/services/desktop-fused-ffi-backend-runtime.ts +431 -0
- package/src/services/device-bridge.ts +1237 -0
- package/src/services/device-resource-metrics.test.ts +98 -0
- package/src/services/device-resource-metrics.ts +346 -0
- package/src/services/device-tier.test.ts +458 -0
- package/src/services/device-tier.ts +502 -0
- package/src/services/downloader.test.ts +888 -0
- package/src/services/downloader.ts +1039 -0
- package/src/services/engine-direct-bundle.test.ts +90 -0
- package/src/services/engine-streaming.test.ts +80 -0
- package/src/services/engine.ts +2096 -0
- package/src/services/ensure-local-artifacts.integration.test.ts +273 -0
- package/src/services/ensure-local-artifacts.test.ts +368 -0
- package/src/services/ensure-local-artifacts.ts +351 -0
- package/src/services/external-scanner.ts +312 -0
- package/src/services/ffi-llm-mock.ts +354 -0
- package/src/services/ffi-llm-streaming-abi.ts +445 -0
- package/src/services/ffi-streaming-backend.ts +418 -0
- package/src/services/ffi-streaming-runner.test.ts +220 -0
- package/src/services/ffi-streaming-runner.ts +407 -0
- package/src/services/ffi-unload-ordering.test.ts +166 -0
- package/src/services/fused-eliza1-no-regression.test.ts +144 -0
- package/src/services/gpu-autotune.ts +534 -0
- package/src/services/gpu-detect.ts +139 -0
- package/src/services/handler-registry.ts +240 -0
- package/src/services/hardware.test.ts +236 -0
- package/src/services/hardware.ts +438 -0
- package/src/services/image-description-runtime.test.ts +61 -0
- package/src/services/image-description-runtime.ts +118 -0
- package/src/services/imagegen/aosp-unavailable.ts +229 -0
- package/src/services/imagegen/backend-selector.test.ts +190 -0
- package/src/services/imagegen/backend-selector.ts +277 -0
- package/src/services/imagegen/coreml-unavailable.ts +237 -0
- package/src/services/imagegen/errors.ts +40 -0
- package/src/services/imagegen/index.ts +144 -0
- package/src/services/imagegen/mflux.ts +313 -0
- package/src/services/imagegen/sd-cpp.ts +715 -0
- package/src/services/imagegen/tensorrt-unavailable.ts +295 -0
- package/src/services/imagegen/types.ts +193 -0
- package/src/services/index.ts +229 -0
- package/src/services/inference-capabilities.test.ts +75 -0
- package/src/services/inference-capabilities.ts +204 -0
- package/src/services/inference-telemetry.ts +143 -0
- package/src/services/ios-llama-streaming.ts +248 -0
- package/src/services/kv-spill.test.ts +222 -0
- package/src/services/kv-spill.ts +357 -0
- package/src/services/latency-trace.test.ts +266 -0
- package/src/services/latency-trace.ts +844 -0
- package/src/services/lib-target.test.ts +145 -0
- package/src/services/lib-target.ts +102 -0
- package/src/services/live-signals.test.ts +132 -0
- package/src/services/live-signals.ts +177 -0
- package/src/services/llama-server-metrics.test.ts +168 -0
- package/src/services/llama-server-metrics.ts +304 -0
- package/src/services/llm-streaming-binding.ts +136 -0
- package/src/services/load-args.ts +81 -0
- package/src/services/manifest/eliza-1.manifest.v1.json +790 -0
- package/src/services/manifest/index.ts +72 -0
- package/src/services/manifest/manifest.test.ts +791 -0
- package/src/services/manifest/schema.ts +761 -0
- package/src/services/manifest/types.ts +61 -0
- package/src/services/manifest/validator.ts +633 -0
- package/src/services/memory-arbiter.test.ts +558 -0
- package/src/services/memory-arbiter.ts +991 -0
- package/src/services/memory-benchmark.test.ts +91 -0
- package/src/services/memory-benchmark.ts +354 -0
- package/src/services/memory-monitor.test.ts +232 -0
- package/src/services/memory-monitor.ts +309 -0
- package/src/services/memory-pressure.ts +414 -0
- package/src/services/mtp-doctor.ts +86 -0
- package/src/services/network-policy.ts +346 -0
- package/src/services/paths.ts +25 -0
- package/src/services/planner-skeleton.ts +175 -0
- package/src/services/providers.ts +507 -0
- package/src/services/ram-budget-cache.test.ts +164 -0
- package/src/services/ram-budget.ts +309 -0
- package/src/services/readiness.test.ts +87 -0
- package/src/services/readiness.ts +238 -0
- package/src/services/recommendation.test.ts +216 -0
- package/src/services/recommendation.ts +671 -0
- package/src/services/registry.ts +157 -0
- package/src/services/required-kernels-gate.test.ts +64 -0
- package/src/services/router-handler.test.ts +45 -0
- package/src/services/router-handler.ts +426 -0
- package/src/services/routing-policy.test.ts +352 -0
- package/src/services/routing-policy.ts +367 -0
- package/src/services/routing-preferences.ts +17 -0
- package/src/services/runtime-target.ts +154 -0
- package/src/services/service.test.ts +223 -0
- package/src/services/service.ts +750 -0
- package/src/services/session-pool.ts +153 -0
- package/src/services/structured-output/deterministic-repair.test.ts +169 -0
- package/src/services/structured-output/deterministic-repair.ts +443 -0
- package/src/services/structured-output/index.ts +4 -0
- package/src/services/structured-output.test.ts +483 -0
- package/src/services/structured-output.ts +712 -0
- package/src/services/system-memory.test.ts +47 -0
- package/src/services/system-memory.ts +67 -0
- package/src/services/transcription-priority.test.ts +211 -0
- package/src/services/types.ts +59 -0
- package/src/services/verify-on-device.test.ts +87 -0
- package/src/services/verify-on-device.ts +127 -0
- package/src/services/verify.ts +13 -0
- package/src/services/vision/aosp-unavailable.ts +163 -0
- package/src/services/vision/capacitor-llama.ts +255 -0
- package/src/services/vision/cloud-fallback.test.ts +243 -0
- package/src/services/vision/cloud-fallback.ts +268 -0
- package/src/services/vision/fallback-chain.test.ts +86 -0
- package/src/services/vision/hash.ts +157 -0
- package/src/services/vision/index.ts +251 -0
- package/src/services/vision/llama-server.ts +177 -0
- package/src/services/vision/types.ts +163 -0
- package/src/services/vision/vast-fallback.ts +127 -0
- package/src/services/vision-embedding-cache.ts +189 -0
- package/src/services/voice/VOICE_WORKBENCH.md +133 -0
- package/src/services/voice/__fixtures__/voice-workbench-logic-baseline.json +180 -0
- package/src/services/voice/__test-helpers__/fake-ffi.ts +94 -0
- package/src/services/voice/__test-helpers__/synthetic-speech.ts +194 -0
- package/src/services/voice/__tests__/checkpoint-manager.test.ts +241 -0
- package/src/services/voice/__tests__/checkpoint-policy.test.ts +270 -0
- package/src/services/voice/__tests__/eager-context-builder.test.ts +257 -0
- package/src/services/voice/__tests__/eliza1-eot-scorer.test.ts +288 -0
- package/src/services/voice/__tests__/eot-classifier.test.ts +431 -0
- package/src/services/voice/__tests__/optimistic-rollback.test.ts +312 -0
- package/src/services/voice/__tests__/prefill-client.test.ts +266 -0
- package/src/services/voice/__tests__/prefix-preserving-queue.test.ts +208 -0
- package/src/services/voice/__tests__/streaming-asr.test.ts +450 -0
- package/src/services/voice/__tests__/streaming-transcriber.test.ts +339 -0
- package/src/services/voice/__tests__/turn-detector-resolver.test.ts +195 -0
- package/src/services/voice/__tests__/voice-state-machine-prefill.test.ts +275 -0
- package/src/services/voice/__tests__/voice-state-machine.test.ts +354 -0
- package/src/services/voice/acoustic-speaker-attribution.test.ts +165 -0
- package/src/services/voice/acoustic-speaker-attribution.ts +336 -0
- package/src/services/voice/asr-timed.real.test.ts +139 -0
- package/src/services/voice/audio-frame-consumer.test.ts +669 -0
- package/src/services/voice/audio-frame-consumer.ts +651 -0
- package/src/services/voice/barge-in.test.ts +244 -0
- package/src/services/voice/barge-in.ts +335 -0
- package/src/services/voice/cancellation-coordinator.test.ts +196 -0
- package/src/services/voice/cancellation-coordinator.ts +269 -0
- package/src/services/voice/checkpoint-manager.ts +401 -0
- package/src/services/voice/checkpoint-policy.ts +336 -0
- package/src/services/voice/composite-eot-classifier.test.ts +59 -0
- package/src/services/voice/corpus-augment.test.ts +276 -0
- package/src/services/voice/corpus-augment.ts +451 -0
- package/src/services/voice/corpus-generator.test.ts +201 -0
- package/src/services/voice/corpus-generator.ts +413 -0
- package/src/services/voice/diarization-error-rate.greedy.test.ts +140 -0
- package/src/services/voice/diarization-error-rate.test.ts +100 -0
- package/src/services/voice/diarization-error-rate.ts +249 -0
- package/src/services/voice/e2e-harness.der.test.ts +94 -0
- package/src/services/voice/e2e-harness.respond-eot-entity.test.ts +277 -0
- package/src/services/voice/e2e-harness.security-echo.test.ts +103 -0
- package/src/services/voice/e2e-harness.test.ts +182 -0
- package/src/services/voice/e2e-harness.ts +902 -0
- package/src/services/voice/eager-context-builder.ts +262 -0
- package/src/services/voice/echo-delay.test.ts +118 -0
- package/src/services/voice/echo-delay.ts +135 -0
- package/src/services/voice/echo-metrics.test.ts +17 -0
- package/src/services/voice/echo-metrics.ts +20 -0
- package/src/services/voice/echo-reference-buffer.test.ts +86 -0
- package/src/services/voice/echo-reference-buffer.ts +165 -0
- package/src/services/voice/eliza1-eot-scorer.ts +242 -0
- package/src/services/voice/embedding-server.ts +200 -0
- package/src/services/voice/embedding.test.ts +131 -0
- package/src/services/voice/embedding.ts +242 -0
- package/src/services/voice/emotion-attribution.test.ts +129 -0
- package/src/services/voice/emotion-attribution.ts +361 -0
- package/src/services/voice/engine-bridge-cancellation.test.ts +422 -0
- package/src/services/voice/engine-bridge-transcript-join.test.ts +278 -0
- package/src/services/voice/engine-bridge.test.ts +384 -0
- package/src/services/voice/engine-bridge.ts +2343 -0
- package/src/services/voice/eot-classifier-ggml.ts +569 -0
- package/src/services/voice/eot-classifier.test.ts +98 -0
- package/src/services/voice/eot-classifier.ts +422 -0
- package/src/services/voice/errors.ts +34 -0
- package/src/services/voice/expressive-tags.asr.test.ts +77 -0
- package/src/services/voice/expressive-tags.test.ts +102 -0
- package/src/services/voice/expressive-tags.ts +405 -0
- package/src/services/voice/ffi-bindings.test.ts +735 -0
- package/src/services/voice/ffi-bindings.ts +3387 -0
- package/src/services/voice/first-line-cache.ts +725 -0
- package/src/services/voice/fused-eot-scorer.ts +139 -0
- package/src/services/voice/index.ts +502 -0
- package/src/services/voice/kokoro/__tests__/kokoro-backend.test.ts +262 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.real.test.ts +236 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-bridge.test.ts +60 -0
- package/src/services/voice/kokoro/__tests__/kokoro-engine-discovery.test.ts +277 -0
- package/src/services/voice/kokoro/__tests__/kokoro-ffi-runtime.test.ts +235 -0
- package/src/services/voice/kokoro/__tests__/kokoro-runtime.test.ts +95 -0
- package/src/services/voice/kokoro/__tests__/phonemizer.test.ts +53 -0
- package/src/services/voice/kokoro/__tests__/runtime-selection.test.ts +67 -0
- package/src/services/voice/kokoro/__tests__/voices.test.ts +57 -0
- package/src/services/voice/kokoro/index.ts +79 -0
- package/src/services/voice/kokoro/kokoro-backend.ts +223 -0
- package/src/services/voice/kokoro/kokoro-engine-discovery.ts +177 -0
- package/src/services/voice/kokoro/kokoro-ffi-runtime.ts +233 -0
- package/src/services/voice/kokoro/kokoro-runtime.ts +170 -0
- package/src/services/voice/kokoro/phoneme-stream.ts +123 -0
- package/src/services/voice/kokoro/phonemizer.ts +344 -0
- package/src/services/voice/kokoro/pick-runtime.test.ts +91 -0
- package/src/services/voice/kokoro/pick-runtime.ts +130 -0
- package/src/services/voice/kokoro/runtime-selection.ts +64 -0
- package/src/services/voice/kokoro/types.ts +95 -0
- package/src/services/voice/kokoro/voice-presets.ts +129 -0
- package/src/services/voice/kokoro/voices.ts +64 -0
- package/src/services/voice/lifecycle.test.ts +315 -0
- package/src/services/voice/lifecycle.ts +301 -0
- package/src/services/voice/live-diarization-session.echo.test.ts +232 -0
- package/src/services/voice/live-diarization-session.ts +622 -0
- package/src/services/voice/metric-math.test.ts +61 -0
- package/src/services/voice/metric-math.ts +25 -0
- package/src/services/voice/mic-source.test.ts +210 -0
- package/src/services/voice/mic-source.ts +503 -0
- package/src/services/voice/nlms-echo-canceller.test.ts +244 -0
- package/src/services/voice/nlms-echo-canceller.ts +317 -0
- package/src/services/voice/optimistic-policy.power-source.test.ts +36 -0
- package/src/services/voice/optimistic-policy.test.ts +101 -0
- package/src/services/voice/optimistic-policy.ts +192 -0
- package/src/services/voice/optimistic-rollback.ts +343 -0
- package/src/services/voice/partial-stabilizer.test.ts +68 -0
- package/src/services/voice/partial-stabilizer.ts +140 -0
- package/src/services/voice/phoneme-tokenizer.ts +158 -0
- package/src/services/voice/phrase-cache.test.ts +242 -0
- package/src/services/voice/phrase-cache.ts +186 -0
- package/src/services/voice/phrase-chunker.test.ts +239 -0
- package/src/services/voice/phrase-chunker.ts +281 -0
- package/src/services/voice/pipeline-impls.l6.test.ts +110 -0
- package/src/services/voice/pipeline-impls.test.ts +292 -0
- package/src/services/voice/pipeline-impls.ts +315 -0
- package/src/services/voice/pipeline.ts +504 -0
- package/src/services/voice/prefill-client.ts +316 -0
- package/src/services/voice/prefix-preserving-queue.ts +162 -0
- package/src/services/voice/profile-store.ts +887 -0
- package/src/services/voice/real-audio-decode.test.ts +148 -0
- package/src/services/voice/research/VOICE_8785_ASSESSMENT.md +141 -0
- package/src/services/voice/research/VOICE_PIPELINE_RESEARCH_2026.md +117 -0
- package/src/services/voice/research/VOICE_VALIDATION_RUNBOOK.md +135 -0
- package/src/services/voice/ring-buffer.test.ts +129 -0
- package/src/services/voice/ring-buffer.ts +123 -0
- package/src/services/voice/rollback-queue.ts +74 -0
- package/src/services/voice/samantha-preset-placeholder.test.ts +97 -0
- package/src/services/voice/samantha-preset-placeholder.ts +148 -0
- package/src/services/voice/samantha-preset-regenerator.ts +393 -0
- package/src/services/voice/samantha-preset-regenerator.wav.test.ts +90 -0
- package/src/services/voice/scheduler.t2.test.ts +141 -0
- package/src/services/voice/scheduler.ts +927 -0
- package/src/services/voice/self-voice-imprint.test.ts +59 -0
- package/src/services/voice/self-voice-imprint.ts +102 -0
- package/src/services/voice/shared-resources.ts +343 -0
- package/src/services/voice/speaker/attribution-pipeline.test.ts +221 -0
- package/src/services/voice/speaker/attribution-pipeline.ts +449 -0
- package/src/services/voice/speaker/diarizer-fused.real.test.ts +100 -0
- package/src/services/voice/speaker/diarizer-fused.ts +154 -0
- package/src/services/voice/speaker/diarizer.ts +218 -0
- package/src/services/voice/speaker/encoder-fused.real.test.ts +113 -0
- package/src/services/voice/speaker/encoder-fused.ts +138 -0
- package/src/services/voice/speaker/encoder-ggml.test.ts +59 -0
- package/src/services/voice/speaker/encoder-ggml.ts +79 -0
- package/src/services/voice/speaker/encoder.ts +105 -0
- package/src/services/voice/speaker-imprint.test.ts +185 -0
- package/src/services/voice/speaker-imprint.ts +312 -0
- package/src/services/voice/speaker-preset-cache.test.ts +154 -0
- package/src/services/voice/speaker-preset-cache.ts +195 -0
- package/src/services/voice/streaming-asr/streaming-pipeline-adapter.ts +292 -0
- package/src/services/voice/system-audio-sink.test.ts +29 -0
- package/src/services/voice/system-audio-sink.ts +366 -0
- package/src/services/voice/transcriber.asr-backend.test.ts +76 -0
- package/src/services/voice/transcriber.test.ts +392 -0
- package/src/services/voice/transcriber.ts +704 -0
- package/src/services/voice/transcript-knowledge.test.ts +68 -0
- package/src/services/voice/transcript-knowledge.ts +75 -0
- package/src/services/voice/transcript-service.test.ts +195 -0
- package/src/services/voice/transcript-service.ts +205 -0
- package/src/services/voice/transcript-store.test.ts +189 -0
- package/src/services/voice/transcript-store.ts +164 -0
- package/src/services/voice/turn-controller.test.ts +575 -0
- package/src/services/voice/turn-controller.ts +596 -0
- package/src/services/voice/types.ts +699 -0
- package/src/services/voice/vad.test.ts +498 -0
- package/src/services/voice/vad.ts +832 -0
- package/src/services/voice/vad.v1-v4.test.ts +222 -0
- package/src/services/voice/voice-budget.test.ts +415 -0
- package/src/services/voice/voice-budget.ts +635 -0
- package/src/services/voice/voice-duet.test.ts +375 -0
- package/src/services/voice/voice-emotion-classifier.test.ts +210 -0
- package/src/services/voice/voice-emotion-classifier.ts +273 -0
- package/src/services/voice/voice-hardening.fuzz.test.ts +116 -0
- package/src/services/voice/voice-preload-predictor.test.ts +130 -0
- package/src/services/voice/voice-preload-predictor.ts +113 -0
- package/src/services/voice/voice-preset-format.fuzz.test.ts +89 -0
- package/src/services/voice/voice-preset-format.test.ts +75 -0
- package/src/services/voice/voice-preset-format.ts +713 -0
- package/src/services/voice/voice-preset-generator.test.ts +89 -0
- package/src/services/voice/voice-profile-artifact.test.ts +138 -0
- package/src/services/voice/voice-profile-artifact.ts +518 -0
- package/src/services/voice/voice-profile-routes.test.ts +429 -0
- package/src/services/voice/voice-profile-routes.ts +425 -0
- package/src/services/voice/voice-scenario.test.ts +159 -0
- package/src/services/voice/voice-scenario.ts +280 -0
- package/src/services/voice/voice-scenario.turn-helpers.test.ts +77 -0
- package/src/services/voice/voice-state-machine.ts +727 -0
- package/src/services/voice/voice-workbench-report.test.ts +168 -0
- package/src/services/voice/voice-workbench-report.ts +367 -0
- package/src/services/voice/voice-workbench.test.ts +158 -0
- package/src/services/voice/voice.test.ts +1070 -0
- package/src/services/voice/wake-word-ggml.ts +319 -0
- package/src/services/voice/wake-word.test.ts +298 -0
- package/src/services/voice/wake-word.ts +554 -0
- package/src/services/voice/wav-codec.fuzz.test.ts +59 -0
- package/src/services/voice/wav-codec.test.ts +32 -0
- package/src/services/voice/wav-codec.ts +101 -0
- package/src/services/voice/workbench-entrypoint.test.ts +55 -0
- package/src/services/voice/workbench-entrypoint.ts +88 -0
- package/src/services/voice/workbench-headless-runner.test.ts +162 -0
- package/src/services/voice/workbench-headless-runner.ts +396 -0
- package/src/services/voice/workbench-logic-services.test.ts +225 -0
- package/src/services/voice/workbench-logic-services.ts +184 -0
- package/src/services/voice/workbench-real-services.ts +629 -0
- package/src/services/voice/workbench-scenarios.ts +407 -0
- package/src/services/voice/wrap-with-first-line-cache.ts +267 -0
- package/src/services/voice-model-updater.ts +724 -0
- package/src/services/voice-prewarm.ts +51 -0
- package/src/voice-workbench.ts +71 -0
|
@@ -0,0 +1,699 @@
|
|
|
1
|
+
export interface TextToken {
|
|
2
|
+
index: number;
|
|
3
|
+
text: string;
|
|
4
|
+
/**
|
|
5
|
+
* Text-model vocabulary token id, when the producer knows it. ASR
|
|
6
|
+
* (fused Gemma ASR) and the text backbone share the Gemma tokenizer, so an
|
|
7
|
+
* ASR-emitted token id is the same id the text model would assign — a
|
|
8
|
+
* downstream in-process handoff can inject `id` directly into the text KV
|
|
9
|
+
* cache without detokenize →
|
|
10
|
+
* retokenize. Absent for producers that only have surface text (the
|
|
11
|
+
* word-chunk approximation in `splitTranscriptToTokens`).
|
|
12
|
+
*/
|
|
13
|
+
id?: number;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface AcceptedToken extends TextToken {
|
|
17
|
+
acceptedAt: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface RejectedTokenRange {
|
|
21
|
+
fromIndex: number;
|
|
22
|
+
toIndex: number;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface Phrase {
|
|
26
|
+
id: number;
|
|
27
|
+
text: string;
|
|
28
|
+
fromIndex: number;
|
|
29
|
+
toIndex: number;
|
|
30
|
+
terminator: "punctuation" | "max-cap" | "phoneme-stream";
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface AudioChunk {
|
|
34
|
+
phraseId: number;
|
|
35
|
+
fromIndex: number;
|
|
36
|
+
toIndex: number;
|
|
37
|
+
pcm: Float32Array;
|
|
38
|
+
sampleRate: number;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Reference-audio-token payload mirrored on `SpeakerPreset` for v2 presets.
|
|
43
|
+
* Empty (K=0, refT=0, tokens.length=0) when the preset has no reference
|
|
44
|
+
* audio attached (instruct-only voice or legacy v1 file).
|
|
45
|
+
*/
|
|
46
|
+
export interface SpeakerPresetRefAudioTokens {
|
|
47
|
+
K: number;
|
|
48
|
+
refT: number;
|
|
49
|
+
tokens: Int32Array;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface SpeakerPreset {
|
|
53
|
+
voiceId: string;
|
|
54
|
+
embedding: Float32Array;
|
|
55
|
+
bytes: Uint8Array;
|
|
56
|
+
/**
|
|
57
|
+
* Preset file format version. `1` for legacy Kokoro-style presets; `2`
|
|
58
|
+
* for the OmniVoice freeze format that also carries `refAudioTokens`,
|
|
59
|
+
* `refText`, and `instruct`. Defaulted to `1` for older readers that
|
|
60
|
+
* haven't been rebuilt yet.
|
|
61
|
+
*/
|
|
62
|
+
version?: number;
|
|
63
|
+
/**
|
|
64
|
+
* OmniVoice reference-audio-token packet (`[K, refT]` int32). Empty for
|
|
65
|
+
* v1 files and v2 files that intentionally ship instruct-only voices.
|
|
66
|
+
* The FFI bridge passes the tokens through to `params.ref_audio_tokens`
|
|
67
|
+
* + `params.ref_T`.
|
|
68
|
+
*/
|
|
69
|
+
refAudioTokens?: SpeakerPresetRefAudioTokens;
|
|
70
|
+
/**
|
|
71
|
+
* UTF-8 transcript of the reference clip that produced `refAudioTokens`.
|
|
72
|
+
* The FFI bridge passes this through to `params.ref_text`. Empty when
|
|
73
|
+
* the preset is instruct-only or v1.
|
|
74
|
+
*/
|
|
75
|
+
refText?: string;
|
|
76
|
+
/**
|
|
77
|
+
* Resolved VoiceDesign instruct string (e.g. `"female, young adult,
|
|
78
|
+
* american accent, moderate pitch"`). The FFI bridge passes this
|
|
79
|
+
* through to `params.instruct` instead of the historical "use the
|
|
80
|
+
* voiceId as the instruct string" misreading.
|
|
81
|
+
*/
|
|
82
|
+
instruct?: string;
|
|
83
|
+
/**
|
|
84
|
+
* Free-form metadata attached at freeze time (codec sha256, corpus hash,
|
|
85
|
+
* source bundle id, etc.). The runtime never relies on this for
|
|
86
|
+
* correctness.
|
|
87
|
+
*/
|
|
88
|
+
metadata?: Record<string, unknown>;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export interface AudioSink {
|
|
92
|
+
write(pcm: Float32Array, sampleRate: number): void;
|
|
93
|
+
drain(): void;
|
|
94
|
+
bufferedSamples(): number;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export interface OmniVoiceBackend {
|
|
98
|
+
synthesize(args: {
|
|
99
|
+
phrase: Phrase;
|
|
100
|
+
preset: SpeakerPreset;
|
|
101
|
+
cancelSignal: { cancelled: boolean };
|
|
102
|
+
onKernelTick?: () => void;
|
|
103
|
+
}): Promise<AudioChunk>;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* One PCM segment delivered by a streaming OmniVoice runtime. This is the
|
|
108
|
+
* scheduler-facing TypeScript contract for the native streaming ABI extension:
|
|
109
|
+
* the current v1/batch ABI remains valid, and backends that implement this
|
|
110
|
+
* seam can additionally surface first-audio before a full phrase finishes.
|
|
111
|
+
*/
|
|
112
|
+
export interface TtsPcmChunk {
|
|
113
|
+
pcm: Float32Array;
|
|
114
|
+
sampleRate: number;
|
|
115
|
+
isFinal: boolean;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export interface StreamingTtsBackend {
|
|
119
|
+
synthesizeStream(args: {
|
|
120
|
+
phrase: Phrase;
|
|
121
|
+
preset: SpeakerPreset;
|
|
122
|
+
cancelSignal: { cancelled: boolean };
|
|
123
|
+
onChunk: (chunk: TtsPcmChunk) => boolean | undefined;
|
|
124
|
+
onKernelTick?: () => void;
|
|
125
|
+
}): Promise<{ cancelled: boolean }>;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/** Opaque native handle for a streaming ASR session in the v2 ABI shape. */
|
|
129
|
+
export type StreamingAsrHandle = bigint;
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* TS-only v2 streaming ABI contract. Implementations can satisfy this beside
|
|
133
|
+
* the existing synchronous v1 methods; callers should test the support flags
|
|
134
|
+
* rather than probe-and-catch. Native bindings may carry context handles on
|
|
135
|
+
* top of this shape; the scheduler-facing stream semantics stay the same.
|
|
136
|
+
*/
|
|
137
|
+
export interface VoiceStreamingAbiV2 {
|
|
138
|
+
ttsStreamSupported(): boolean;
|
|
139
|
+
ttsSynthesizeStream(args: {
|
|
140
|
+
text: string;
|
|
141
|
+
speakerPresetId: string | null;
|
|
142
|
+
onChunk: (chunk: {
|
|
143
|
+
pcm: Float32Array;
|
|
144
|
+
isFinal: boolean;
|
|
145
|
+
}) => boolean | undefined;
|
|
146
|
+
}): { cancelled: boolean };
|
|
147
|
+
cancelTts(): void;
|
|
148
|
+
asrStreamSupported(): boolean;
|
|
149
|
+
asrStreamOpen(args: { sampleRateHz: number }): StreamingAsrHandle;
|
|
150
|
+
asrStreamFeed(args: { stream: StreamingAsrHandle; pcm: Float32Array }): void;
|
|
151
|
+
asrStreamPartial(args: {
|
|
152
|
+
stream: StreamingAsrHandle;
|
|
153
|
+
maxTextBytes?: number;
|
|
154
|
+
maxTokens?: number;
|
|
155
|
+
}): { partial: string; tokens?: number[] };
|
|
156
|
+
asrStreamFinish(args: {
|
|
157
|
+
stream: StreamingAsrHandle;
|
|
158
|
+
maxTextBytes?: number;
|
|
159
|
+
maxTokens?: number;
|
|
160
|
+
}): { partial: string; tokens?: number[] };
|
|
161
|
+
asrStreamClose(stream: StreamingAsrHandle): void;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export interface TranscriptionAudio {
|
|
165
|
+
pcm: Float32Array;
|
|
166
|
+
sampleRate: number;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
export type VoiceInputKind =
|
|
170
|
+
| "local_mic"
|
|
171
|
+
| "discord"
|
|
172
|
+
| "telegram"
|
|
173
|
+
| "signal"
|
|
174
|
+
| "whatsapp"
|
|
175
|
+
| "phone"
|
|
176
|
+
| "browser"
|
|
177
|
+
| "file"
|
|
178
|
+
| "unknown";
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Where speech audio entered the voice loop. Keep this structural so local
|
|
182
|
+
* mic, Discord, phone, and connector captures can share the same
|
|
183
|
+
* turn-taking and attribution path without branching on prompt text.
|
|
184
|
+
*/
|
|
185
|
+
export interface VoiceInputSource {
|
|
186
|
+
kind: VoiceInputKind;
|
|
187
|
+
/** Connector account, device, guild/channel, call, or upload id. */
|
|
188
|
+
sourceId?: string;
|
|
189
|
+
roomId?: string;
|
|
190
|
+
conversationId?: string;
|
|
191
|
+
messageId?: string;
|
|
192
|
+
deviceId?: string;
|
|
193
|
+
connectorAccountId?: string;
|
|
194
|
+
channelId?: string;
|
|
195
|
+
guildId?: string;
|
|
196
|
+
callId?: string;
|
|
197
|
+
participantId?: string;
|
|
198
|
+
metadata?: Record<string, unknown>;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Speaker attribution for diarized speech. `imprintClusterId` is evidence,
|
|
203
|
+
* not identity: callers that want to attach this to a LifeOps person must
|
|
204
|
+
* submit a normal `EntityStore.observeIdentity` observation with this
|
|
205
|
+
* cluster/observation id in its evidence list. Do not use voice imprints as
|
|
206
|
+
* a parallel identity graph or as authorization for voice synthesis.
|
|
207
|
+
*/
|
|
208
|
+
export interface VoiceSpeaker {
|
|
209
|
+
id: string;
|
|
210
|
+
label?: string;
|
|
211
|
+
displayName?: string;
|
|
212
|
+
source?: VoiceInputSource;
|
|
213
|
+
imprintClusterId?: string;
|
|
214
|
+
imprintObservationId?: string;
|
|
215
|
+
entityId?: string;
|
|
216
|
+
confidence?: number;
|
|
217
|
+
isLocalUser?: boolean;
|
|
218
|
+
metadata?: Record<string, unknown>;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/** One diarized span within a transcript snapshot or finalized voice turn. */
|
|
222
|
+
export interface VoiceSegment {
|
|
223
|
+
id?: string;
|
|
224
|
+
text: string;
|
|
225
|
+
startMs: number;
|
|
226
|
+
endMs: number;
|
|
227
|
+
speaker?: VoiceSpeaker;
|
|
228
|
+
speakerId?: string;
|
|
229
|
+
source?: VoiceInputSource;
|
|
230
|
+
confidence?: number;
|
|
231
|
+
tokens?: number[];
|
|
232
|
+
metadata?: Record<string, unknown>;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
export interface VoiceDiarizationMetadata {
|
|
236
|
+
provider: "local" | "connector" | "cloud" | "unknown";
|
|
237
|
+
model?: string;
|
|
238
|
+
version?: string;
|
|
239
|
+
confidence?: number;
|
|
240
|
+
metadata?: Record<string, unknown>;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
export interface VoiceTurnMetadata {
|
|
244
|
+
turnId?: string;
|
|
245
|
+
source?: VoiceInputSource;
|
|
246
|
+
primarySpeaker?: VoiceSpeaker;
|
|
247
|
+
segments?: VoiceSegment[];
|
|
248
|
+
startedAtMs?: number;
|
|
249
|
+
endedAtMs?: number;
|
|
250
|
+
diarization?: VoiceDiarizationMetadata;
|
|
251
|
+
metadata?: Record<string, unknown>;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/* -------------------------------------------------------------------- *
|
|
255
|
+
* Streaming ASR — frame-fed transcription with incremental partials.
|
|
256
|
+
*
|
|
257
|
+
* Owned jointly by the transcriber adapters (`voice/transcriber.ts`), the
|
|
258
|
+
* VAD gating + barge-in word-confirm (`voice/vad.ts`, `voice/barge-in.ts`),
|
|
259
|
+
* the turn controller / speculative-on-pause path, and the overlapped
|
|
260
|
+
* `VoicePipeline` (`voice/pipeline.ts`). The `StreamingTranscriber` below
|
|
261
|
+
* is the single ASR contract; the two fused adapters (fused Gemma ASR
|
|
262
|
+
* streaming and fused batch, both via libelizainference) implement it in
|
|
263
|
+
* `voice/transcriber.ts`. It consumes the canonical `PcmFrame` (defined
|
|
264
|
+
* below in the audio front-end section) off a `MicSource` and is gated by
|
|
265
|
+
* the `VadEvent` stream. The `VoicePipeline` drives the same contract as a
|
|
266
|
+
* batch (feed the whole utterance buffer, `flush()`, split the final
|
|
267
|
+
* transcript into contiguous text tokens) — there is no separate batch ASR
|
|
268
|
+
* interface.
|
|
269
|
+
* -------------------------------------------------------------------- */
|
|
270
|
+
|
|
271
|
+
/** A running or final transcript snapshot from a `StreamingTranscriber`. */
|
|
272
|
+
export interface TranscriptUpdate {
|
|
273
|
+
/** The full running transcript (not a delta) at this point. */
|
|
274
|
+
partial: string;
|
|
275
|
+
/** True for the snapshot emitted by `flush()` / on `speech-end`. */
|
|
276
|
+
isFinal: boolean;
|
|
277
|
+
/** Channel/device/call metadata for attribution and storage. */
|
|
278
|
+
source?: VoiceInputSource;
|
|
279
|
+
/** Best speaker attribution for single-speaker snapshots. */
|
|
280
|
+
speaker?: VoiceSpeaker;
|
|
281
|
+
/** Diarized spans for multi-speaker snapshots, when available. */
|
|
282
|
+
segments?: VoiceSegment[];
|
|
283
|
+
/** Turn-level metadata carried through to generation and storage. */
|
|
284
|
+
turn?: VoiceTurnMetadata;
|
|
285
|
+
/**
|
|
286
|
+
* Text-model token ids for `partial`, when the backend can supply them
|
|
287
|
+
* cheaply (fused Gemma ASR shares the text vocabulary). Absent when the
|
|
288
|
+
* decoder reports surface text only (re-tokenization is the LLM stage's
|
|
289
|
+
* job there).
|
|
290
|
+
*/
|
|
291
|
+
tokens?: number[];
|
|
292
|
+
/**
|
|
293
|
+
* Voice-side emotion attribution attached to `isFinal` snapshots only.
|
|
294
|
+
* Running partials never carry this — the acoustic classifier wants a
|
|
295
|
+
* stable utterance window and the lexicon read on partial text is noise.
|
|
296
|
+
* Produced by `attributeVoiceEmotion()` after fusing the acoustic
|
|
297
|
+
* classifier output (`VoiceEmotionClassifier`) with text-side evidence;
|
|
298
|
+
* the fusion rule lives in `emotion-attribution.ts` so no consumer
|
|
299
|
+
* re-implements it. See R3-emotion §3 + §5.
|
|
300
|
+
*/
|
|
301
|
+
voiceEmotion?: import("./emotion-attribution").VoiceEmotionAttribution;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/** Events a `StreamingTranscriber` emits while consuming PCM frames. */
|
|
305
|
+
export type TranscriberEvent =
|
|
306
|
+
| { kind: "partial"; update: TranscriptUpdate }
|
|
307
|
+
| { kind: "final"; update: TranscriptUpdate }
|
|
308
|
+
/**
|
|
309
|
+
* Fired the first instant ≥1 real word is recognized in the current
|
|
310
|
+
* speech segment. Wired to W1's barge-in word-confirm gate
|
|
311
|
+
* (`onWordsDetected`) so the agent hard-stops TTS + aborts in-flight
|
|
312
|
+
* LLM/drafter generation only on real speech, not a blip.
|
|
313
|
+
*/
|
|
314
|
+
| { kind: "words"; words: string[] };
|
|
315
|
+
|
|
316
|
+
export type TranscriberEventListener = (event: TranscriberEvent) => void;
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Live transcription. `feed()` is called per PCM frame off a `MicSource`.
|
|
320
|
+
* The adapter runs windowed decode passes internally and emits `partial`
|
|
321
|
+
* events as the running transcript grows; `flush()` force-finalizes (call
|
|
322
|
+
* it when the VAD reports `speech-end`). Implementations gate on the VAD
|
|
323
|
+
* event stream — they only decode while the VAD is in `speech-active`.
|
|
324
|
+
*
|
|
325
|
+
* No silent degrade: a transcriber whose backend is unavailable throws on
|
|
326
|
+
* construction (or on first `feed`), it does not quietly produce empty
|
|
327
|
+
* transcripts.
|
|
328
|
+
*/
|
|
329
|
+
export interface StreamingTranscriber {
|
|
330
|
+
/** Feed one PCM frame. Frames received while VAD is not active are buffered/ignored per the VAD-gating policy. */
|
|
331
|
+
feed(frame: PcmFrame): void;
|
|
332
|
+
/**
|
|
333
|
+
* Force-finalize: drain any buffered audio, run a final decode pass,
|
|
334
|
+
* emit the `final` event, and resolve with the final transcript. Safe
|
|
335
|
+
* to call when no audio is buffered (resolves with an empty final).
|
|
336
|
+
* After `flush()` the transcriber is reset and ready for the next
|
|
337
|
+
* speech segment.
|
|
338
|
+
*/
|
|
339
|
+
flush(): Promise<TranscriptUpdate>;
|
|
340
|
+
/** Subscribe to transcriber events. Returns an unsubscribe fn. */
|
|
341
|
+
on(listener: TranscriberEventListener): () => void;
|
|
342
|
+
/** Release any held native resources (FFI stream handle, temp files). Idempotent. */
|
|
343
|
+
dispose(): void;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
export interface PhraseChunkerConfig {
|
|
347
|
+
/**
|
|
348
|
+
* Hard word cap before a phrase is force-flushed even without a
|
|
349
|
+
* `, . ! ? ; :` boundary. Defaults to 30 (the brief's A6 "first 30 words").
|
|
350
|
+
*/
|
|
351
|
+
maxTokensPerPhrase?: number;
|
|
352
|
+
/**
|
|
353
|
+
* Characters that close a phrase. Default `, . ! ? ; :` — punctuation
|
|
354
|
+
* boundaries let the first clause reach TTS without waiting for a
|
|
355
|
+
* sentence-final mark.
|
|
356
|
+
*/
|
|
357
|
+
sentenceTerminators?: ReadonlySet<string>;
|
|
358
|
+
/**
|
|
359
|
+
* Where the chunker emits a phrase boundary.
|
|
360
|
+
* 'punctuation' — default. Wait for `, . ! ? ; :` or the max-token cap.
|
|
361
|
+
* 'phoneme-stream' — additionally emit a sub-phrase chunk every
|
|
362
|
+
* `phonemesPerChunk` phonemes. Cuts first-audio
|
|
363
|
+
* latency by handing partial phrases to TTS at
|
|
364
|
+
* phoneme boundaries.
|
|
365
|
+
*/
|
|
366
|
+
chunkOn?: "punctuation" | "phoneme-stream";
|
|
367
|
+
/** Phonemes per chunk in `phoneme-stream` mode. Default 8. */
|
|
368
|
+
phonemesPerChunk?: number;
|
|
369
|
+
/**
|
|
370
|
+
* Maximum milliseconds a phrase may sit in the chunker before the
|
|
371
|
+
* scheduler force-flushes it even without punctuation / phoneme / cap
|
|
372
|
+
* boundaries. Default 700 ms. Set to 0 to disable.
|
|
373
|
+
*/
|
|
374
|
+
maxAccumulationMs?: number;
|
|
375
|
+
/**
|
|
376
|
+
* Shorter budget applied ONLY to the first phrase of each reply, so first
|
|
377
|
+
* audio (TTFA) plays sooner on punctuation-sparse openings while later
|
|
378
|
+
* phrases keep `maxAccumulationMs` (no fragmentation). When omitted,
|
|
379
|
+
* derives from `maxAccumulationMs` (half, capped at 350 ms) and honors the
|
|
380
|
+
* `ELIZA_PHRASE_FLUSH_FIRST_MS` env override. Clamped to `maxAccumulationMs`.
|
|
381
|
+
*/
|
|
382
|
+
firstPhraseMaxAccumulationMs?: number;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
export interface VerifierStreamEvent {
|
|
386
|
+
kind: "accept" | "reject";
|
|
387
|
+
tokens: TextToken[];
|
|
388
|
+
/**
|
|
389
|
+
* Optional per-event metadata. Today only the very first `accept` of a
|
|
390
|
+
* streaming completion carries `firstTokenMs` (L5 — time from the fetch
|
|
391
|
+
* being issued to the first SSE chunk arriving). Other consumers MAY
|
|
392
|
+
* ignore this field; producers MUST omit it on non-first events.
|
|
393
|
+
*/
|
|
394
|
+
meta?: {
|
|
395
|
+
/** Milliseconds from request issue (`performance.now()`) to first chunk. */
|
|
396
|
+
firstTokenMs?: number;
|
|
397
|
+
};
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// ---------------------------------------------------------------------------
|
|
401
|
+
// Audio front-end contract (mic capture · VAD · barge-in).
|
|
402
|
+
//
|
|
403
|
+
// Shared by W1 (this module), W2 (`StreamingTranscriber`), and W9 (the voice
|
|
404
|
+
// turn controller / scheduler). Two-tier design:
|
|
405
|
+
//
|
|
406
|
+
// 1. The cheap always-on RMS energy gate is the *fast* path. It only
|
|
407
|
+
// decides "is there acoustic activity right now". A rising edge wakes
|
|
408
|
+
// the response pipeline (KV-prefill, drafter preload, first-filler
|
|
409
|
+
// pre-generation) speculatively.
|
|
410
|
+
// 2. The fused Silero VAD (via the `libelizainference` native VAD ABI) is
|
|
411
|
+
// the *authoritative* speech/no-speech signal. It gates ASR (skip silent
|
|
412
|
+
// frames) and drives turn-taking.
|
|
413
|
+
//
|
|
414
|
+
// Both run on every mic frame. The RMS gate never substitutes for Silero —
|
|
415
|
+
// if the native VAD runtime is unavailable that is a hard "VAD unavailable"
|
|
416
|
+
// error, never a silent downgrade (AGENTS.md §3).
|
|
417
|
+
// ---------------------------------------------------------------------------
|
|
418
|
+
|
|
419
|
+
/** A fixed-size block of mono PCM samples in [-1, 1] at a known sample rate. */
|
|
420
|
+
export interface PcmFrame {
|
|
421
|
+
pcm: Float32Array;
|
|
422
|
+
sampleRate: number;
|
|
423
|
+
/**
|
|
424
|
+
* Monotonic timestamp (ms, `performance.now()` domain) of the *first*
|
|
425
|
+
* sample in this frame. Used to age VAD events and barge-in latency.
|
|
426
|
+
*/
|
|
427
|
+
timestampMs: number;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* Event emitted by `VadDetector` on the authoritative (Silero) timeline.
|
|
432
|
+
*
|
|
433
|
+
* - `speech-start` — speech onset (a run of speech frames crossed the
|
|
434
|
+
* onset threshold). Carries the probability of the
|
|
435
|
+
* triggering frame.
|
|
436
|
+
* - `speech-active` — a periodic heartbeat while speech is ongoing. The
|
|
437
|
+
* barge-in controller uses this to pause TTS.
|
|
438
|
+
* - `speech-pause` — speech has been quiet for `pauseStartedMs..now`
|
|
439
|
+
* but not long enough to count as end-of-utterance.
|
|
440
|
+
* The turn controller uses this to kick a speculative
|
|
441
|
+
* response off the partial transcript.
|
|
442
|
+
* - `speech-end` — end of utterance (silence held past the hangover
|
|
443
|
+
* window). Carries the total speech duration.
|
|
444
|
+
* - `blip` — a short burst of energy that the Silero VAD rejected
|
|
445
|
+
* (or that was too short to be speech). The barge-in
|
|
446
|
+
* controller treats this as "resume TTS".
|
|
447
|
+
*/
|
|
448
|
+
export type VadEvent =
|
|
449
|
+
| { type: "speech-start"; timestampMs: number; probability: number }
|
|
450
|
+
| {
|
|
451
|
+
type: "speech-active";
|
|
452
|
+
timestampMs: number;
|
|
453
|
+
probability: number;
|
|
454
|
+
speechDurationMs: number;
|
|
455
|
+
}
|
|
456
|
+
| { type: "speech-pause"; timestampMs: number; pauseDurationMs: number }
|
|
457
|
+
| { type: "speech-end"; timestampMs: number; speechDurationMs: number }
|
|
458
|
+
| { type: "blip"; timestampMs: number; durationMs: number; peakRms: number };
|
|
459
|
+
|
|
460
|
+
/** Cheap RMS energy gate event — the fast pre-warm path. Distinct timeline
|
|
461
|
+
* from `VadEvent`; this fires with sub-frame latency and never blocks on a
|
|
462
|
+
* model forward pass. */
|
|
463
|
+
export type EnergyGateEvent =
|
|
464
|
+
| { type: "energy-rise"; timestampMs: number; rms: number }
|
|
465
|
+
| { type: "energy-fall"; timestampMs: number; quietMs: number };
|
|
466
|
+
|
|
467
|
+
export type VadEventListener = (event: VadEvent) => void;
|
|
468
|
+
export type EnergyGateListener = (event: EnergyGateEvent) => void;
|
|
469
|
+
|
|
470
|
+
/**
|
|
471
|
+
* Subscribable VAD event stream. `VadDetector` (`voice/vad.ts`) is the
|
|
472
|
+
* concrete implementation; the streaming transcriber and the barge-in
|
|
473
|
+
* controller take this structural view so they don't pull in the optional
|
|
474
|
+
* `onnxruntime-node` surface.
|
|
475
|
+
*/
|
|
476
|
+
export interface VadEventSource {
|
|
477
|
+
onVadEvent(listener: VadEventListener): () => void;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
/**
|
|
481
|
+
* Source of mic PCM. The desktop/Electrobun impl in `mic-source.ts` is the
|
|
482
|
+
* first concrete implementation; Discord / Telegram / mobile connectors
|
|
483
|
+
* implement the same interface so the rest of the voice loop is source-
|
|
484
|
+
* agnostic. A `MicSource` produces fixed-size mono frames at a fixed sample
|
|
485
|
+
* rate and tees them to any number of consumers (the VAD, the ring buffer
|
|
486
|
+
* the ASR reads from, instrumentation taps).
|
|
487
|
+
*/
|
|
488
|
+
export interface MicSource {
|
|
489
|
+
/** Nominal sample rate of every emitted frame (Hz). */
|
|
490
|
+
readonly sampleRate: number;
|
|
491
|
+
/** Samples per emitted frame. */
|
|
492
|
+
readonly frameSamples: number;
|
|
493
|
+
/** True once `start()` has resolved and frames are flowing. */
|
|
494
|
+
readonly running: boolean;
|
|
495
|
+
/** Begin capture. Resolves when the underlying device is producing audio.
|
|
496
|
+
* Throws (never silently no-ops) when no mic backend is available. */
|
|
497
|
+
start(): Promise<void>;
|
|
498
|
+
/** Stop capture and release the device. Idempotent. */
|
|
499
|
+
stop(): Promise<void>;
|
|
500
|
+
/** Subscribe to PCM frames. Returns an unsubscribe function. */
|
|
501
|
+
onFrame(listener: (frame: PcmFrame) => void): () => void;
|
|
502
|
+
/** Subscribe to fatal capture errors (device lost, process died). The
|
|
503
|
+
* source is no longer `running` after one of these. */
|
|
504
|
+
onError(listener: (error: Error) => void): () => void;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
/**
|
|
508
|
+
* Cancellation token threaded from the barge-in controller down through the
|
|
509
|
+
* voice scheduler (TTS) *and* the engine layer (in-flight LLM / MTP
|
|
510
|
+
* drafter generation). `cancelled` is a plain boolean so the synthesis loop
|
|
511
|
+
* and the SSE-consuming generate loop can both poll it cheaply at a kernel
|
|
512
|
+
* boundary; `reason` records *why* for diagnostics; `signal` is the standard
|
|
513
|
+
* `AbortSignal` the engine's HTTP/stream layer aborts on.
|
|
514
|
+
*
|
|
515
|
+
* (W1 owns the controller; W9 threads `signal` into `dispatcher.generate`.)
|
|
516
|
+
*/
|
|
517
|
+
export interface BargeInCancelToken {
|
|
518
|
+
cancelled: boolean;
|
|
519
|
+
reason: "barge-in-words" | "manual" | null;
|
|
520
|
+
readonly signal: AbortSignal;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
/** Signal emitted by `BargeInController` to the scheduler / engine. */
|
|
524
|
+
export type BargeInSignal =
|
|
525
|
+
| { type: "pause-tts"; timestampMs: number }
|
|
526
|
+
| { type: "resume-tts"; timestampMs: number }
|
|
527
|
+
| { type: "hard-stop"; timestampMs: number; token: BargeInCancelToken };
|
|
528
|
+
|
|
529
|
+
export type BargeInSignalListener = (signal: BargeInSignal) => void;
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* Contract the ASR layer (W2's `StreamingTranscriber`) calls into the
|
|
533
|
+
* barge-in controller with. When the transcriber has parsed at least one
|
|
534
|
+
* real word from the user's barge-in audio, it calls `onWordsDetected` with
|
|
535
|
+
* the running word count; the controller promotes a `pause-tts` into a
|
|
536
|
+
* `hard-stop`. This is the *authoritative* blip-vs-words gate — the energy-
|
|
537
|
+
* duration heuristic is only a fast provisional guess until ASR confirms.
|
|
538
|
+
*/
|
|
539
|
+
export interface WordsDetectedSink {
|
|
540
|
+
onWordsDetected(args: {
|
|
541
|
+
/** Number of parsed words observed so far in this barge-in segment. */
|
|
542
|
+
wordCount: number;
|
|
543
|
+
/** Best partial transcript so far (may be empty). */
|
|
544
|
+
partialText: string;
|
|
545
|
+
timestampMs: number;
|
|
546
|
+
}): void;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
export interface SchedulerConfig {
|
|
550
|
+
chunkerConfig: PhraseChunkerConfig;
|
|
551
|
+
preset: SpeakerPreset;
|
|
552
|
+
ringBufferCapacity: number;
|
|
553
|
+
sampleRate: number;
|
|
554
|
+
/**
|
|
555
|
+
* Max concurrent TTS dispatches. When this many phrases are in flight,
|
|
556
|
+
* `accept()` awaits the oldest before dispatching the next, propagating
|
|
557
|
+
* backpressure upstream to the verifier loop. Default 4 — small enough
|
|
558
|
+
* to bound memory under runaway producers without serialising the
|
|
559
|
+
* common case (text gen leads TTS by a phrase or two).
|
|
560
|
+
*/
|
|
561
|
+
maxInFlightPhrases?: number;
|
|
562
|
+
/**
|
|
563
|
+
* Enable the streaming-TTS path (`synthesizeStream`) for phrase
|
|
564
|
+
* synthesis. When `true` (default), the scheduler uses the chunk-by-chunk
|
|
565
|
+
* streaming ABI when the backend supports it, delivering first audio
|
|
566
|
+
* before the full phrase finishes synthesizing and enabling per-chunk
|
|
567
|
+
* prefix-preserving barge-in rollback.
|
|
568
|
+
*
|
|
569
|
+
* Previously this was implicitly gated by `ttsStreamSupported()` from the
|
|
570
|
+
* native FFI layer. On macOS, a `ggml_conv_transpose_1d` stall in the
|
|
571
|
+
* DAC codec region caused the Metal path to hang — that stall is now
|
|
572
|
+
* fixed in the llama.cpp merge (native Metal kernels for
|
|
573
|
+
* `ggml_conv_transpose_1d`; the CPU fallback causing the hang is gone).
|
|
574
|
+
* The flag is therefore `true` by default. Set to `false` only when
|
|
575
|
+
* testing against a non-streaming build or reproducing the pre-fix
|
|
576
|
+
* behaviour.
|
|
577
|
+
*/
|
|
578
|
+
streamingTtsActive?: boolean;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
export interface VoiceSchedulerPhraseTelemetry {
|
|
582
|
+
id: number;
|
|
583
|
+
text: string;
|
|
584
|
+
fromIndex: number;
|
|
585
|
+
toIndex: number;
|
|
586
|
+
terminator: Phrase["terminator"];
|
|
587
|
+
tokenCount: number;
|
|
588
|
+
textBytes: number;
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
export type VoiceAudioSource = "cache" | "synthesis";
|
|
592
|
+
|
|
593
|
+
export type VoiceTtsCancelReason =
|
|
594
|
+
| "barge-in"
|
|
595
|
+
| "rollback"
|
|
596
|
+
| "pending-tts"
|
|
597
|
+
| "synthesis-cancelled";
|
|
598
|
+
|
|
599
|
+
export type VoiceSchedulerTelemetryEvent =
|
|
600
|
+
| {
|
|
601
|
+
type: "phrase-dispatch";
|
|
602
|
+
atMs: number;
|
|
603
|
+
phrase: VoiceSchedulerPhraseTelemetry;
|
|
604
|
+
inFlightPhrases: number;
|
|
605
|
+
}
|
|
606
|
+
| {
|
|
607
|
+
type: "phrase-cache-hit" | "phrase-cache-miss";
|
|
608
|
+
atMs: number;
|
|
609
|
+
phrase: VoiceSchedulerPhraseTelemetry;
|
|
610
|
+
}
|
|
611
|
+
| {
|
|
612
|
+
type: "tts-start";
|
|
613
|
+
atMs: number;
|
|
614
|
+
phrase: VoiceSchedulerPhraseTelemetry;
|
|
615
|
+
inFlightPhrases: number;
|
|
616
|
+
}
|
|
617
|
+
| {
|
|
618
|
+
type: "tts-first-audio";
|
|
619
|
+
atMs: number;
|
|
620
|
+
phrase: VoiceSchedulerPhraseTelemetry;
|
|
621
|
+
source: VoiceAudioSource;
|
|
622
|
+
samples: number;
|
|
623
|
+
sampleRate: number;
|
|
624
|
+
}
|
|
625
|
+
| {
|
|
626
|
+
type: "audio-committed";
|
|
627
|
+
atMs: number;
|
|
628
|
+
phrase: VoiceSchedulerPhraseTelemetry;
|
|
629
|
+
source: VoiceAudioSource;
|
|
630
|
+
samples: number;
|
|
631
|
+
sampleRate: number;
|
|
632
|
+
flushedSamples: number;
|
|
633
|
+
paused: boolean;
|
|
634
|
+
ringBufferSamples: number;
|
|
635
|
+
sinkBufferedSamples: number;
|
|
636
|
+
}
|
|
637
|
+
| {
|
|
638
|
+
type: "tts-cancel";
|
|
639
|
+
atMs: number;
|
|
640
|
+
phrase: VoiceSchedulerPhraseTelemetry;
|
|
641
|
+
reason: VoiceTtsCancelReason;
|
|
642
|
+
}
|
|
643
|
+
| {
|
|
644
|
+
type: "rollback";
|
|
645
|
+
atMs: number;
|
|
646
|
+
phraseId: number;
|
|
647
|
+
range: RejectedTokenRange;
|
|
648
|
+
reason: "rejected-tokens";
|
|
649
|
+
}
|
|
650
|
+
| {
|
|
651
|
+
type: "barge-in";
|
|
652
|
+
atMs: number;
|
|
653
|
+
ringBufferSamplesDrained: number;
|
|
654
|
+
sinkBufferedSamplesDrained: number;
|
|
655
|
+
inFlightPhrasesCancelled: number;
|
|
656
|
+
wasPaused: boolean;
|
|
657
|
+
}
|
|
658
|
+
| {
|
|
659
|
+
/**
|
|
660
|
+
* Fired when the prefix-preserving rollback queue partitions
|
|
661
|
+
* in-flight audio chunks on barge-in. `retainedChunks` are replayed
|
|
662
|
+
* into the sink; `droppedChunks` are discarded. Present only when
|
|
663
|
+
* `PrefixPreservingQueue` is active (at least one chunk was tagged).
|
|
664
|
+
*/
|
|
665
|
+
type: "barge-in-prefix-rollback";
|
|
666
|
+
atMs: number;
|
|
667
|
+
divergencePoint: number;
|
|
668
|
+
retainedChunks: number;
|
|
669
|
+
droppedChunks: number;
|
|
670
|
+
straddledChunks: number;
|
|
671
|
+
retainedDurationMs: number;
|
|
672
|
+
droppedDurationMs: number;
|
|
673
|
+
};
|
|
674
|
+
|
|
675
|
+
export type VoiceSchedulerTelemetryListener = (
|
|
676
|
+
event: VoiceSchedulerTelemetryEvent,
|
|
677
|
+
) => void;
|
|
678
|
+
|
|
679
|
+
// ---------------------------------------------------------------------------
|
|
680
|
+
// Shared interfaces extracted here to break circular dependencies between
|
|
681
|
+
// vad.ts and its consumers, and wake-word.ts ↔ wake-word-ggml.ts.
|
|
682
|
+
// ---------------------------------------------------------------------------
|
|
683
|
+
|
|
684
|
+
/** Minimal VAD model contract consumed by the fused `GgmlSileroVad` and the
|
|
685
|
+
* optional injected external adapter. */
|
|
686
|
+
export interface VadLike {
|
|
687
|
+
readonly windowSamples: number;
|
|
688
|
+
readonly sampleRate: number;
|
|
689
|
+
process(window: Float32Array): Promise<number>;
|
|
690
|
+
reset(): void;
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
/** Minimal wake-word model contract consumed by OpenWakeWordGgmlModel. */
|
|
694
|
+
export interface WakeWordModel {
|
|
695
|
+
readonly frameSamples: number;
|
|
696
|
+
readonly sampleRate: number;
|
|
697
|
+
scoreFrame(frame: Float32Array): Promise<number>;
|
|
698
|
+
reset(): void;
|
|
699
|
+
}
|